Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -62,10 +62,11 @@ tests.o runs.o dashboard.o dashboard-tests.o dashboard-main.o : run_records.scm db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm tests.o tasks.o dashboard-tasks.o : task_records.scm runs.o : test_records.scm megatest.o : megatest-fossil-hash.scm +client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm fs-transport.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm zmq-transport.scm : common_records.scm # Temporary while transitioning to new routine # runs.o : run-tests-queue-classic.scm run-tests-queue-new.scm megatest-fossil-hash.scm : $(SRCFILES) megatest.scm *_records.scm Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -66,12 +66,13 @@ (let ((start-res (http-transport:client-connect run-id ;; NB// confusion over server-dat and connection result! (car server-dat) (cadr server-dat)))) (if start-res ;; sucessful login? start-res - (if (eq? remaining-tries 4) + (if (member remaining-tries '(3 4 6)) (begin ;; login failed + (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id (car server-dat) @@ -78,40 +79,51 @@ (cadr server-dat) " client:setup (server-dat=#t)") (thread-sleep! 5) (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) (begin + (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) (thread-sleep! 5) (client:setup run-id remaining-tries: (- remaining-tries 1)))))) + ;; YUK: rename server-dat here (let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id))) (if server-dat (let ((start-res (http-transport:client-connect run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat)))) (if start-res start-res - (if (eq? remaining-tries 2) + (if (member remaining-tries '(2 5)) (begin ;; login failed + (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat) - " client:setup (server-dat = #f)") + " client:setup (server-dat = #t)") (thread-sleep! 2) (server:try-running run-id) (thread-sleep! 10) ;; give server a little time to start up (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) (begin + (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) (thread-sleep! 5) (client:setup run-id remaining-tries: (- remaining-tries 1)))))) (begin ;; no server registered - (thread-sleep! 2) - (server:try-running run-id) - (thread-sleep! 10) ;; give server a little time to start up - (client:setup run-id remaining-tries: (- remaining-tries 1))))))))) + (if (eq? remaining-tries 2) + (begin + (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") + (client:setup run-id remaining-tries: 10)) + (begin + (thread-sleep! 2) + (debug:print 25 "INFO: client:setup start-res (not defined here), run-id=" run-id ", server-dat=" server-dat) + (if (< (open-run-close tasks:num-in-available-state tasks:open-db run-id) 3) + (server:try-running run-id)) + (thread-sleep! 10) ;; give server a little time to start up + (client:setup run-id remaining-tries: (- remaining-tries 1))))))))))) ;; keep this as a function to ease future (define (client:start run-id server-info) (http-transport:client-connect run-id (tasks:hostinfo-get-interface server-info) Index: common_records.scm ================================================================== --- common_records.scm +++ common_records.scm @@ -11,17 +11,19 @@ (use trace) (define (debug:calc-verbosity vstr) (cond - ((string-match "^\\s*$" vstr) #f) - (vstr (let ((debugvals (string-split vstr ","))) - (cond - ((> (length debugvals) 1)(map string->number debugvals)) - ((> (length debugvals) 0)(string->number (car debugvals))) - (else #f)))) - ((args:get-arg "-v") 2) + ((number? vstr) vstr) + ((not (string? vstr)) 1) + ;; ((string-match "^\\s*$" vstr) 1) + (vstr (let ((debugvals (filter number? (map string->number (string-split vstr ","))))) + (cond + ((> (length debugvals) 1) debugvals) + ((> (length debugvals) 0)(car debugvals)) + (else 1)))) + ((args:get-arg "-v") 2) ((args:get-arg "-q") 0) (else 1))) ;; check verbosity, #t is ok (define (debug:check-verbosity verbosity vstr) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -312,11 +312,12 @@ (server-timeout (let ((tmo (config-lookup *configdat* "server" "timeout"))) (if (and (string? tmo) (string->number tmo)) (* 60 60 (string->number tmo)) ;; (* 3 24 60 60) ;; default to three days - (* 60 1) ;; default to one minute + ;; (* 60 1) ;; default to one minute + (* 60 60 25) ;; default to one day and one hour )))) (let loop ((count 0) (server-state 'available)) ;; Use this opportunity to sync the inmemdb to db (let ((start-time (current-milliseconds)) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -128,11 +128,11 @@ "SELECT count(id) FROM servers WHERE run_id=? AND state = 'available';" run-id) res)) (define (tasks:server-clean-out-old-records-for-run-id mdb run-id tag) - (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state in ('available','shutting-down') AND (strftime('%s','now') - start_time) > 300 AND run_id=?;" + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state in ('available','shutting-down') AND (strftime('%s','now') - start_time) > 50 AND run_id=?;" (conc "defunct" tag) run-id)) (define (tasks:server-force-clean-running-records-for-run-id mdb run-id tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=?;" (conc "defunct" tag) run-id))