Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -62,10 +62,11 @@ tests.o runs.o dashboard.o dashboard-tests.o dashboard-main.o : run_records.scm db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm tests.o tasks.o dashboard-tasks.o : task_records.scm runs.o : test_records.scm megatest.o : megatest-fossil-hash.scm +client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm fs-transport.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm zmq-transport.scm : common_records.scm # Temporary while transitioning to new routine # runs.o : run-tests-queue-classic.scm run-tests-queue-new.scm megatest-fossil-hash.scm : $(SRCFILES) megatest.scm *_records.scm Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -59,64 +59,73 @@ (debug:print 0 "INFO: client:setup remaining-tries=" remaining-tries) (if (<= remaining-tries 0) (begin (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id) (exit 1)) - (let ((server-dat (and run-id (hash-table-ref/default *runremote* run-id #f)))) - (if server-dat - (let ((start-res (http-transport:client-connect run-id ;; NB// confusion over server-dat and connection result! - (car server-dat) - (cadr server-dat)))) + (let ((host-info (hash-table-ref/default *runremote* run-id #f))) + (if host-info + (let ((start-res (http-transport:client-connect run-id ;; NB// confusion over host-info and connection result! + (car host-info) + (cadr host-info)))) (if start-res ;; sucessful login? start-res - (if (eq? remaining-tries 4) + (if (member remaining-tries '(3 4 6)) (begin ;; login failed + (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" host-info) (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id - (car server-dat) - (cadr server-dat) - " client:setup (server-dat=#t)") + (car host-info) + (cadr host-info) + " client:setup (host-info=#t)") (thread-sleep! 5) (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) (begin + (debug:print 25 "INFO: client:setup failed to connect, start-res=" start-res ", run-id=" run-id ", host-info=" host-info) (thread-sleep! 5) (client:setup run-id remaining-tries: (- remaining-tries 1)))))) + ;; YUK: rename server-dat here (let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id))) (if server-dat (let ((start-res (http-transport:client-connect run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat)))) (if start-res start-res - (if (eq? remaining-tries 2) + (if (member remaining-tries '(2 5)) (begin ;; login failed + (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat) - " client:setup (server-dat = #f)") + " client:setup (server-dat = #t)") (thread-sleep! 2) (server:try-running run-id) (thread-sleep! 10) ;; give server a little time to start up (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) (begin + (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) (thread-sleep! 5) (client:setup run-id remaining-tries: (- remaining-tries 1)))))) (begin ;; no server registered - (thread-sleep! 2) - (if (< (open-run-close tasks:num-in-available-state tasks:open-db run-id) 2) - (begin - (server:try-running run-id) - (thread-sleep! 10) ;; give server a little time to start up - (client:setup run-id remaining-tries: (- remaining-tries 1))) - (begin - (thread-sleep! 10) - (client:setup run-id remaining-tries: remainint-tries)))))))))) + (if (eq? remaining-tries 2) + (begin + ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") + (client:setup run-id remaining-tries: 10)) + (begin + (thread-sleep! 2) + (debug:print 25 "INFO: client:setup start-res (not defined here), run-id=" run-id ", server-dat=" server-dat) + (if (< (open-run-close tasks:num-in-available-state tasks:open-db run-id) 3) + (begin + ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") + (server:try-running run-id))) + (thread-sleep! 10) ;; give server a little time to start up + (client:setup run-id remaining-tries: (- remaining-tries 1))))))))))) ;; keep this as a function to ease future (define (client:start run-id server-info) (http-transport:client-connect run-id (tasks:hostinfo-get-interface server-info) Index: common_records.scm ================================================================== --- common_records.scm +++ common_records.scm @@ -11,18 +11,19 @@ (use trace) (define (debug:calc-verbosity vstr) (cond - ((not vstr) #f) - ((string-match "^\\s*$" vstr) #f) - (vstr (let ((debugvals (string-split vstr ","))) - (cond - ((> (length debugvals) 1)(map string->number debugvals)) - ((> (length debugvals) 0)(string->number (car debugvals))) - (else #f)))) - ((args:get-arg "-v") 2) + ((number? vstr) vstr) + ((not (string? vstr)) 1) + ;; ((string-match "^\\s*$" vstr) 1) + (vstr (let ((debugvals (filter number? (map string->number (string-split vstr ","))))) + (cond + ((> (length debugvals) 1) debugvals) + ((> (length debugvals) 0)(car debugvals)) + (else 1)))) + ((args:get-arg "-v") 2) ((args:get-arg "-q") 0) (else 1))) ;; check verbosity, #t is ok (define (debug:check-verbosity verbosity vstr) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -468,12 +468,13 @@ (apply open-run-close-no-exception-handling proc idb params)) (apply open-run-close-no-exception-handling proc idb params))) ;; (define open-run-close (define open-run-close ;; (if (debug:debug-mode 2) - open-run-close-no-exception-handling) - ;; open-run-close-exception-handling)) + ;; open-run-close-no-exception-handling + open-run-close-exception-handling) +;;) (define (db:initialize-main-db db) (let* ((configdat (car *configinfo*)) ;; tut tut, global warning... (keys (keys:config-get-fields configdat)) (havekeys (> (length keys) 0)) Index: dcommon.scm ================================================================== --- dcommon.scm +++ dcommon.scm @@ -476,14 +476,17 @@ ;; (vector-ref server 8) ;; State (vector-ref server 8) ;; State (vector-ref server 12) ;; RunId ))) (for-each (lambda (val) - ;; (print "rownum: " rownum " colnum: " colnum " val: " val) - (iup:attribute-set! servers-matrix (conc rownum ":" colnum) val) - (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum)) - (set! colnum (+ 1 colnum))) + (let* ((row-col (conc rownum ":" colnum)) + (curr-val (iup:attribute servers-matrix row-col))) + (if (not (equal? (conc val) curr-val)) + (begin + (iup:attribute-set! servers-matrix row-col val) + (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum)))) + (set! colnum (+ 1 colnum)))) vals) (set! rownum (+ rownum 1))) (iup:attribute-set! servers-matrix "REDRAW" "ALL")) servers))))) (set! colnum 0) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -145,11 +145,11 @@ ;; get_next_port goes here (http-transport:try-start-server ipaddrstr (+ portnum 1) server-id)) (begin - (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum " http-transport:try-start-server") + (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id ipaddrstr portnum " http-transport:try-start-server") (print "ERROR: Tried and tried but could not start the server")))) ;; any error in following steps will result in a retry (set! *server-info* (list ipaddrstr portnum)) (open-run-close tasks:server-set-interface-port tasks:open-db @@ -157,11 +157,11 @@ ipaddrstr portnum) (debug:print 1 "INFO: Trying to start server on " ipaddrstr ":" portnum) ;; This starts the spiffy server ;; NEED WAY TO SET IP TO #f TO BIND ALL (start-server bind-address: ipaddrstr port: portnum) - (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum " http-transport:try-start-server") + (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id ipaddrstr portnum " http-transport:try-start-server") (debug:print 1 "INFO: server has been stopped"))) ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== @@ -271,14 +271,14 @@ (define (http-transport:client-connect run-id iface port) (let* ((uri-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl")))) (uri-api-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/api")))) (serverdat (list iface port uri-dat uri-api-dat)) (login-res (rmt:login-no-auto-client-setup serverdat run-id))) - (hash-table-set! *runremote* run-id serverdat) ;; may or may not be good ... (if (and (list? login-res) (car login-res)) (begin + (hash-table-set! *runremote* run-id serverdat) (debug:print-info 2 "Logged in and connected to " iface ":" port) (hash-table-set! *runremote* run-id serverdat) serverdat) (begin (debug:print-info 0 "ERROR: Failed to login or connect to " iface ":" port) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -41,16 +41,17 @@ (let* ((run-id (if rid rid 0)) (connection-info (let ((cinfo (hash-table-ref/default *runremote* run-id #f))) (if cinfo cinfo (let loop ((numtries 100)) - (thread-sleep! 1) (let ((res (client:setup run-id))) (if res (hash-table-ref/default *runremote* run-id #f) ;; client:setup filled this in (hopefully) (if (> numtries 0) - (loop (- numtries 1)) + (begin + (thread-sleep! 10) + (loop (- numtries 1))) (begin (debug:print 0 "ERROR: 100 tries and no server, giving up") (exit 1))))))))) (jparams (db:obj->string params)) (res (http-transport:client-api-send-receive run-id connection-info cmd jparams))) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -128,11 +128,11 @@ "SELECT count(id) FROM servers WHERE run_id=? AND state = 'available';" run-id) res)) (define (tasks:server-clean-out-old-records-for-run-id mdb run-id tag) - (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state in ('available','shutting-down') AND (strftime('%s','now') - start_time) > 300 AND run_id=?;" + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state in ('available','shutting-down') AND (strftime('%s','now') - start_time) > 50 AND run_id=?;" (conc "defunct" tag) run-id)) (define (tasks:server-force-clean-running-records-for-run-id mdb run-id tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=?;" (conc "defunct" tag) run-id))