Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -73,11 +73,12 @@ (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id (car server-dat) - (cadr server-dat)) + (cadr server-dat) + " client:setup (server-dat=#t)") (thread-sleep! 5) (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) (begin (thread-sleep! 5) (client:setup run-id remaining-tries: (- remaining-tries 1)))))) @@ -93,11 +94,12 @@ (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id (tasks:hostinfo-get-interface server-dat) - (tasks:hostinfo-get-port server-dat)) + (tasks:hostinfo-get-port server-dat) + " client:setup (server-dat = #f)") (thread-sleep! 2) (server:try-running run-id) (thread-sleep! 10) ;; give server a little time to start up (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) (begin Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -980,17 +980,21 @@ ;; General info about the run(s) and megatest area (define (dashboard:summary db) (let ((rawconfig (read-config (conc *toppath* "/megatest.config") #f 'return-string))) (iup:vbox (iup:split - ;; #:value 500 + #:value 500 (iup:frame #:title "General Info" - (iup:hbox - (dcommon:keys-matrix rawconfig) - (dcommon:general-info) - )) + (iup:vbox + (iup:hbox + (iup:label "Area Path") + (iup:textbox #:value *toppath* #:expand "HORIZONTAL")) + (iup:hbox + (dcommon:keys-matrix rawconfig) + (dcommon:general-info) + ))) (iup:frame #:title "Server" (dcommon:servers-table))) (iup:frame #:title "Megatest config settings" Index: dcommon.scm ================================================================== --- dcommon.scm +++ dcommon.scm @@ -358,24 +358,24 @@ (define (dcommon:general-info) (let ((general-matrix (iup:matrix #:alignment1 "ALEFT" #:expand "YES" ;; "HORIZONTAL" #:numcol 1 - #:numlin 3 + #:numlin 2 #:numcol-visible 1 - #:numlin-visible 3))) - (iup:attribute-set! general-matrix "WIDTH1" "200") + #:numlin-visible 2))) + (iup:attribute-set! general-matrix "WIDTH1" "150") (iup:attribute-set! general-matrix "0:1" "About this Megatest area") ;; User (this is not always obvious - it is common to run as a different user (iup:attribute-set! general-matrix "1:0" "User") (iup:attribute-set! general-matrix "1:1" (current-user-name)) ;; Megatest area - (iup:attribute-set! general-matrix "2:0" "Area") - (iup:attribute-set! general-matrix "2:1" *toppath*) + ;; (iup:attribute-set! general-matrix "2:0" "Area") + ;; (iup:attribute-set! general-matrix "2:1" *toppath*) ;; Megatest version - (iup:attribute-set! general-matrix "3:0" "Version") - (iup:attribute-set! general-matrix "3:1" megatest-version) + (iup:attribute-set! general-matrix "2:0" "Version") + (iup:attribute-set! general-matrix "2:1" (conc megatest-version "-" (substring megatest-fossil-hash 0 4))) general-matrix)) (define (dcommon:run-stats dbstruct) (let* ((stats-matrix (iup:matrix expand: "YES")) @@ -445,13 +445,13 @@ (let* ((colnum 0) (rownum 0) (servers-matrix (iup:matrix #:expand "YES" #:numcol 7 #:numcol-visible 7 - #:numlin-visible 3 + #:numlin-visible 5 )) - (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "InPort" "State" "Transport")) + (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) (updater (lambda () (let ((servers (open-run-close tasks:get-all-servers tasks:open-db))) (iup:attribute-set! servers-matrix "NUMLIN" (length servers)) ;; (set! colnum 0) ;; (for-each (lambda (colname) @@ -466,19 +466,18 @@ (let* ((vals (list (vector-ref server 0) ;; Id (vector-ref server 9) ;; MT-Ver (vector-ref server 1) ;; Pid (vector-ref server 2) ;; Hostname (conc (vector-ref server 3) ":" (vector-ref server 4)) ;; IP:Port - (vector-ref server 5) ;; Pubport + (seconds->hr-min-sec (- (current-seconds)(vector-ref server 6))) + ;; (vector-ref server 5) ;; Pubport ;; (vector-ref server 10) ;; Last beat ;; (vector-ref server 6) ;; Start time ;; (vector-ref server 7) ;; Priority ;; (vector-ref server 8) ;; State - (if (< (vector-ref server 10) 20) ;; Status (Please redo this properly!) - "alive" - "dead") - (vector-ref server 11) ;; Transport + (vector-ref server 8) ;; State + (vector-ref server 12) ;; RunId ))) (for-each (lambda (val) ;; (print "rownum: " rownum " colnum: " colnum " val: " val) (iup:attribute-set! servers-matrix (conc rownum ":" colnum) val) (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum)) @@ -493,39 +492,41 @@ (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum)) (set! colnum (+ colnum 1))) colnames) (set! dashboard:update-servers-table updater) ;; (iup:attribute-set! servers-matrix "WIDTHDEF" "40") - (iup:hbox - (iup:vbox - (iup:button "Start" - ;; #:size "50x" - #:expand "YES" - #:action (lambda (obj) - (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" - "megatest -server - &"))) - ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) - (system cmd)))) - (iup:button "Stop" - #:expand "YES" - ;; #:size "50x" - #:action (lambda (obj) - (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" - "megatest -stop-server 0 &"))) - ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) - (system cmd)))) - (iup:button "Restart" - #:expand "YES" - ;; #:size "50x" - #:action (lambda (obj) - (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" - "megatest -stop-server 0;megatest -server - &"))) - ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) - (system cmd))))) - servers-matrix - ))) - + ;; (iup:hbox + ;; (iup:vbox + ;; (iup:button "Start" + ;; ;; #:size "50x" + ;; #:expand "YES" + ;; #:action (lambda (obj) + ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" + ;; "megatest -server - &"))) + ;; ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) + ;; (system cmd)))) + ;; (iup:button "Stop" + ;; #:expand "YES" + ;; ;; #:size "50x" + ;; #:action (lambda (obj) + ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" + ;; "megatest -stop-server 0 &"))) + ;; ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) + ;; (system cmd)))) + ;; (iup:button "Restart" + ;; #:expand "YES" + ;; ;; #:size "50x" + ;; #:action (lambda (obj) + ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" + ;; "megatest -stop-server 0;megatest -server - &"))) + ;; ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) + ;; (system cmd))))) + ;; servers-matrix + ;; ))) + servers-matrix + )) + ;; The main menu (define (dcommon:main-menu) (iup:menu ;; a menu is a special attribute to a dialog (think Gnome putting the menu at screen top) (iup:menu-item "Files" (iup:menu ;; Note that you can use either #:action or action: for options (iup:menu-item "Open" action: (lambda (obj) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -155,11 +155,11 @@ ipaddrstr portnum) (debug:print 1 "INFO: Trying to start server on " ipaddrstr ":" portnum) ;; This starts the spiffy server ;; NEED WAY TO SET IP TO #f TO BIND ALL (start-server bind-address: ipaddrstr port: portnum) - (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum) + (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum " http-transport:try-start-server") (debug:print 1 "INFO: server has been stopped"))) ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== @@ -391,11 +391,11 @@ "n/a (no queries)" (/ *total-non-write-delay* *number-non-write-queries*)) " ms") (debug:print-info 0 "Server shutdown complete. Exiting") - (tasks:server-delete-record tdb server-id) + (tasks:server-delete-record tdb server-id " http-transport:keep-running") (exit)))))) ;; all routes though here end in exit ... ;; ;; start_server? @@ -411,11 +411,11 @@ (let ((server-id (open-run-close tasks:server-lock-slot tasks:open-db run-id))) (if (not server-id) (begin ;; since we didn't get the server lock we are going to clean up and bail out (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") - (open-run-close tasks:server-delete-records-for-this-pid tasks:open-db) + (open-run-close tasks:server-delete-records-for-this-pid tasks:open-db " http-transport:launch") ) (let* ((th2 (make-thread (lambda () (http-transport:run (if (args:get-arg "-server") (args:get-arg "-server") Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -129,8 +129,9 @@ (let ((res (client:start run-id server))) ;; if the server didn't respond we must remove the record (if res res (begin - (open-run-close tasks:server-force-clean-running-records-for-run-id tasks:open-db run-id) + (open-run-close tasks:server-force-clean-running-records-for-run-id tasks:open-db run-id + " server:check-if-running") res))) #f))) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -91,11 +91,11 @@ (define (tasks:hostinfo-get-transport vec) (vector-ref vec 4)) (define (tasks:hostinfo-get-pid vec) (vector-ref vec 5)) (define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) (define (tasks:server-lock-slot mdb run-id) - (tasks:server-clean-out-old-records-for-run-id mdb run-id) + (tasks:server-clean-out-old-records-for-run-id mdb run-id " tasks:server-lock-slot") (if (< (tasks:num-in-available-state mdb run-id) 4) (begin (tasks:server-set-available mdb run-id) (thread-sleep! 2) ;; Try removing this. It may not be needed. (tasks:server-am-i-the-server? mdb run-id)) @@ -127,33 +127,37 @@ mdb "SELECT count(id) FROM servers WHERE run_id=? AND state = 'available';" run-id) res)) -(define (tasks:server-clean-out-old-records-for-run-id mdb run-id) - (sqlite3:execute mdb "UPDATE servers SET state='defunct' WHERE state in ('available','shutting-down') AND (strftime('%s','now') - start_time) > 300 AND run_id=?;" run-id)) - -(define (tasks:server-force-clean-running-records-for-run-id mdb run-id) - (sqlite3:execute mdb "UPDATE servers SET state='defunct' WHERE state = 'running' AND run_id=?;" run-id)) - -(define (tasks:server-force-clean-run-record mdb run-id iface port) - (sqlite3:execute mdb "UPDATE servers SET state='defunct' WHERE state = 'running' AND run_id=? AND interface=? AND port=?;" - run-id iface port)) - -(define (tasks:server-delete-records-for-this-pid mdb) - (sqlite3:execute mdb "UPDATE servers SET state='defunct' WHERE hostname=? AND pid=?;" (get-host-name) (current-process-id))) - -(define (tasks:server-delete-record mdb server-id) - (sqlite3:execute mdb "UPDATE servers SET state='defunct' WHERE id=?;" server-id) +(define (tasks:server-clean-out-old-records-for-run-id mdb run-id tag) + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state in ('available','shutting-down') AND (strftime('%s','now') - start_time) > 300 AND run_id=?;" + (conc "defunct" tag) run-id)) + +(define (tasks:server-force-clean-running-records-for-run-id mdb run-id tag) + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=?;" + (conc "defunct" tag) run-id)) + +(define (tasks:server-force-clean-run-record mdb run-id iface port tag) + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=? AND interface=? AND port=?;" + (conc "defunct" tag) run-id iface port)) + +(define (tasks:server-delete-records-for-this-pid mdb tag) + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE hostname=? AND pid=?;" + (conc "defunct" tag) (get-host-name) (current-process-id))) + +(define (tasks:server-delete-record mdb server-id tag) + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE id=?;" + (conc "defunct" tag) server-id) ;; use this opportuntity to clean out records over one month old (sqlite3:execute mdb "DELETE FROM servers WHERE state not in ('running','shutting-down') AND (strftime('%s','now') - start_time) > 2628000;")) (define (tasks:server-set-state! mdb server-id state) - (sqlite3:execute mdb "UPDATE servers SET state=? WHERE id=?;" state server-id)) + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE id=?;" state server-id)) (define (tasks:server-set-interface-port mdb server-id interface port) - (sqlite3:execute mdb "UPDATE servers SET interface=?,port=? WHERE id=?;" interface port server-id)) + (sqlite3:execute mdb "UPDATE servers SET interface=?,port=?,heartbeat=strftime('%s','now') WHERE id=?;" interface port server-id)) ;; Get random port not used in long time ;; (define (tasks:server-get-next-port mdb) (let* ((lownum 30000) @@ -238,14 +242,15 @@ res)) (define (tasks:get-all-servers mdb) (let ((res '())) (sqlite3:for-each-row - (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport) - (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport) res))) + (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) + ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 + (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res))) mdb - "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport FROM servers ORDER BY start_time DESC;") + "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id FROM servers WHERE state NOT LIKE 'defunct%' ORDER BY start_time DESC;") res)) (define (tasks:kill-server status hostname port pid) (debug:print-info 1 "Removing defunct server record for " hostname ":" port) (if port