Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -118,11 +118,11 @@ (define (api:execute-requests dbstruct dat) (handle-exceptions exn (let ((call-chain (get-call-chain)) ) - (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer") + (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat) (print-call-chain (current-error-port)) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens (cond ((not (vector? dat)) ;; it is an error to not receive a vector @@ -150,11 +150,11 @@ ;; SERVERS ((start-server) (apply server:kind-run params)) ((kill-server) (set! *server-run* #f)) ;; TESTS - ((test-set-state-status-by-id) (apply db:test-set-state-status-by-id dbstruct params)) + ((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) ((delete-test-records) (apply db:delete-test-records dbstruct params)) ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) ((test-set-state-status) (apply db:test-set-state-status dbstruct params)) ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params)) Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -95,11 +95,11 @@ ((http)(http-transport:client-connect host port)))) (ping-res (case *transport-type* ((http)(rmt:login-no-auto-client-setup start-res))))) (if (and start-res ping-res) - (begin + (let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res) (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res)) start-res) (begin ;; login failed but have a server record, clean out the record and try again (debug:print-info 0 *default-log-port* "client:setup, login failed, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332 Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -1202,23 +1202,23 @@ ;; lazy-safe get file mod time. on any error (file not existing etc.) return 0 ;; (define (common:lazy-modification-time fpath) (handle-exceptions - exn - 0 - (file-modification-time fpath))) + exn + 0 + (file-modification-time fpath))) ;; find timestamp of newest file associated with a sqlite db file (define (common:lazy-sqlite-db-modification-time fpath) (let* ((glob-list (handle-exceptions - exn - '("/no/such/file") - (glob (conc fpath "*")))) + exn + `(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn))) + (glob (conc fpath "*")))) (file-list (if (eq? 0 (length glob-list)) - '("/no/such/file") - glob-list))) + '("/no/such/file") + glob-list))) (apply max (map common:lazy-modification-time file-list)))) Index: common_records.scm ================================================================== --- common_records.scm +++ common_records.scm @@ -26,13 +26,27 @@ (define-syntax define-simple-syntax (syntax-rules () ((_ (name arg ...) body ...) (define-syntax name (syntax-rules () ((name arg ...) (begin body ...))))))) +;; (define-syntax common:handle-exceptions +;; (syntax-rules () +;; ((_ exn-in errstmt ...)(handle-exceptions exn-in errstmt ...)))) + +(define-syntax common:debug-handle-exceptions + (syntax-rules () + ((_ debug exn errstmt body ...) + (if debug + (begin body ...) + (handle-exceptions exn errstmt body ...))))) + (define-syntax common:handle-exceptions (syntax-rules () - ((_ exn-in errstmt ...)(handle-exceptions exn-in errstmt ...)))) + ((_ exn errstmt body ...) + (begin body ...)))) + +;; (define handle-exceptions common:handle-exceptions) ;; iup callbacks are not dumping the stack, this is a work-around ;; (define-simple-syntax (debug:catch-and-dump proc procname) (handle-exceptions @@ -97,11 +111,11 @@ (list? n)) (not (null? (lset-intersection! eq? *verbosity* n)))) ((and (number? *verbosity*) (list? n)) (member *verbosity* n)))) - + (define (debug:setup) (let ((debugstr (or (args:get-arg "-debug") (getenv "MT_DEBUG_MODE")))) (set! *verbosity* (debug:calc-verbosity debugstr)) (debug:check-verbosity *verbosity* debugstr) Index: configf.scm ================================================================== --- configf.scm +++ configf.scm @@ -653,19 +653,31 @@ (lambda (section) (hash-table-set! ht (car section)(cdr section))) adat) ht)) +;; if (define (configf:read-alist fname) - (configf:alist->config - (with-input-from-file fname read))) + (handle-exceptions + exn + #f + (configf:alist->config + (with-input-from-file fname read)))) (define (configf:write-alist cdat fname) - (with-output-to-file fname - (lambda () - (pp (configf:config->alist cdat))))) - + (let ((dat (configf:config->alist cdat))) + (with-output-to-file fname ;; first write out the file + (lambda () + (pp dat))) + (if (file-exists? fname) ;; now verify it is readable + (if (configf:read-alist fname) + #t ;; data is good. + (begin + (delete-file fname) + (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.") + #f)) + #f))) ;; convert hierarchial list to ini format ;; (define (configf:config->ini data) (map Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -474,13 +474,13 @@ (make-hash-table)))) (testconfig (begin ;; (runs:set-megatest-env-vars run-id inrunname: runname testname: test-name itempath: item-path) (runs:set-megatest-env-vars run-id inkeyvals: keydat inrunname: runname intarget: keystring testname: testname itempath: item-path) ;; these may be needed by the launching process (handle-exceptions - exn + exn ;; NOTE: I've no idea why this was written this way. Research, study and fix needed! (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f) - (tests:get-testconfig (db:test-get-testname testdat) test-registry #t)))) + (tests:get-testconfig (db:test-get-testname testdat) item-path test-registry #t)))) (viewlog (lambda (x) (if (file-exists? logfile) ;(system (conc "firefox " logfile "&")) (dashboard-tests:run-html-viewer logfile) (message-window (conc "File " logfile " not found"))))) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -2712,11 +2712,11 @@ ;; (define *monitor-db-path* #f) (define *last-monitor-update-time* 0) ;; Force creation of the db in case it isn't already there. -(tasks:open-db) +;; (tasks:open-db) (define (dashboard:get-youngest-run-db-mod-time dbdir) (handle-exceptions exn (begin Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -898,11 +898,10 @@ (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.") (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) (tmpdb (db:get-db dbstruct)) (refndb (dbr:dbstruct-refndb dbstruct)) (allow-cleanup #t) ;; (if run-ids #f #t)) - ;; (tdbdat (tasks:open-db)) (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat))) (data-synced 0)) ;; count of changed records (I hope) ;; kill servers (if (member 'killservers options) @@ -2580,24 +2579,33 @@ ;; (db:delay-if-busy) ;; ;; NB// This call only operates on toplevel tests. Consider replacing it with more general call ;; (define (db:set-tests-state-status dbstruct run-id testnames currstate currstatus newstate newstatus) - (for-each (lambda (testname) - (let ((qry (conc "UPDATE tests SET state=?,status=? WHERE " - (if currstate (conc "state='" currstate "' AND ") "") - (if currstatus (conc "status='" currstatus "' AND ") "") - " run_id=? AND testname LIKE ?;")) - (test-id (db:get-test-id dbstruct run-id testname ""))) - (db:with-db - dbstruct - run-id - #t - (lambda (db) - (sqlite3:execute db qry newstate newstatus run-id testname))) - (if test-id (mt:process-triggers dbstruct run-id test-id newstate newstatus)))) - testnames)) + (let ((test-ids '())) + (for-each + (lambda (testname) + (let ((qry (conc "UPDATE tests SET state=?,status=? WHERE " + (if currstate (conc "state='" currstate "' AND ") "") + (if currstatus (conc "status='" currstatus "' AND ") "") + " run_id=? AND testname LIKE ?;")) + (test-id (db:get-test-id dbstruct run-id testname ""))) + (db:with-db + dbstruct + run-id + #t + (lambda (db) + (sqlite3:execute db qry + (or newstate currstate "NOT_STARTED") + (or newstatus currstate "UNKNOWN") + run-id testname))) + (if test-id + (begin + (set! test-ids (cons test-id test-ids)) + (mt:process-triggers dbstruct run-id test-id newstate newstatus))))) + testnames) + test-ids)) ;; ;; speed up for common cases with a little logic ;; ;; NB// Ultimately this will be deprecated in deference to mt:test-set-state-status-by-id ;; ;; NOTE: run-id is not used Index: dcommon.scm ================================================================== --- dcommon.scm +++ dcommon.scm @@ -609,12 +609,11 @@ (iup:vbox ;; (iup:label "Run statistics" #:expand "HORIZONTAL") stats-matrix))) (define (dcommon:servers-table commondat tabdat) - (let* ((tdbdat (tasks:open-db)) - (colnum 0) + (let* ((colnum 0) (rownum 0) (servers-matrix (iup:matrix #:expand "YES" #:numcol 7 #:numcol-visible 7 #:numlin-visible 5 @@ -621,11 +620,10 @@ )) (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) (updater (lambda () (if (dashboard:monitor-changed? commondat tabdat) (let ((servers (server:get-list *toppath* limit: 10))) - ;; (tasks:get-all-servers (db:delay-if-busy tdbdat)))) (iup:attribute-set! servers-matrix "NUMLIN" (length servers)) ;; (set! colnum 0) ;; (for-each (lambda (colname) ;; ;; (print "colnum: " colnum " colname: " colname) ;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -112,40 +112,40 @@ ;; (define (http-transport:try-start-server ipaddrstr portnum) (let ((config-hostname (configf:lookup *configdat* "server" "hostname"))) (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname) (handle-exceptions - exn - (begin - (print-error-message exn) - (if (< portnum 64000) - (begin - (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "exn=" (condition->list exn)) - (portlogger:open-run-close portlogger:set-failed portnum) - (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") - (thread-sleep! 0.1) - - ;; get_next_port goes here - (http-transport:try-start-server ipaddrstr - (portlogger:open-run-close portlogger:find-port))) - (begin - (print "ERROR: Tried and tried but could not start the server")))) - ;; any error in following steps will result in a retry - (set! *server-info* (list ipaddrstr portnum)) - (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) - ;; This starts the spiffy server - ;; NEED WAY TO SET IP TO #f TO BIND ALL - ;; (start-server bind-address: ipaddrstr port: portnum) - (if config-hostname ;; this is a hint to bind directly - (start-server port: portnum bind-address: (if (equal? config-hostname "-") - ipaddrstr - config-hostname)) - (start-server port: portnum)) - (portlogger:open-run-close portlogger:set-port portnum "released") - (debug:print 1 *default-log-port* "INFO: server has been stopped")))) + exn + (begin + (print-error-message exn) + (if (< portnum 64000) + (begin + (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "exn=" (condition->list exn)) + (portlogger:open-run-close portlogger:set-failed portnum) + (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") + (thread-sleep! 0.1) + + ;; get_next_port goes here + (http-transport:try-start-server ipaddrstr + (portlogger:open-run-close portlogger:find-port))) + (begin + (print "ERROR: Tried and tried but could not start the server")))) + ;; any error in following steps will result in a retry + (set! *server-info* (list ipaddrstr portnum)) + (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) + ;; This starts the spiffy server + ;; NEED WAY TO SET IP TO #f TO BIND ALL + ;; (start-server bind-address: ipaddrstr port: portnum) + (if config-hostname ;; this is a hint to bind directly + (start-server port: portnum bind-address: (if (equal? config-hostname "-") + ipaddrstr + config-hostname)) + (start-server port: portnum)) + (portlogger:open-run-close portlogger:set-port portnum "released") + (debug:print 1 *default-log-port* "INFO: server has been stopped")))) ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== @@ -229,30 +229,30 @@ ;; ((exn http client-error) e (print e))) (set! res (vector success (db:string->obj (handle-exceptions - exn - (begin - (set! success #f) - (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (if runremote - (remote-conndat-set! runremote #f)) - ;; Killing associated server to allow clean retry.") - ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? - (mutex-unlock! *http-mutex*) + exn + (begin + (set! success #f) + (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (if runremote + (remote-conndat-set! runremote #f)) + ;; Killing associated server to allow clean retry.") + ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? + (mutex-unlock! *http-mutex*) ;;; (signal (make-composite-condition ;;; (make-property-condition 'commfail 'message "failed to connect to server"))) ;;; "communications failed" - (db:obj->string #f)) - (with-input-from-request ;; was dat - fullurl - (list (cons 'key "thekey") - (cons 'cmd cmd) - (cons 'params sparams)) - read-string)) + (db:obj->string #f)) + (with-input-from-request ;; was dat + fullurl + (list (cons 'key "thekey") + (cons 'cmd cmd) + (cons 'params sparams)) + read-string)) transport: 'http) 0)) ;; added this speculatively ;; Shouldn't this be a call to the managed call-all-connections stuff above? (close-all-connections!) (mutex-unlock! *http-mutex*) @@ -440,17 +440,16 @@ (else (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) (http-transport:server-shutdown port))))))) (define (http-transport:server-shutdown port) - (let ((tdbdat (tasks:open-db))) + (begin ;;(BB> "http-transport:server-shutdown called") (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) ;; ;; start_shutdown ;; - ;; (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down") (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up (portlogger:open-run-close portlogger:set-port port "released") (thread-sleep! 1) ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -781,12 +781,11 @@ (if (or (args:get-arg "-list-servers") (args:get-arg "-stop-server") (args:get-arg "-kill-server")) (let ((tl (launch:setup))) (if tl - (let* ((tdbdat (tasks:open-db)) - (servers (tasks:get-all-servers (db:delay-if-busy tdbdat))) + (let* ((servers (server:get-list *toppath*)) (fmtstr "~5a~12a~8a~20a~24a~10a~10a~10a~10a\n") (servers-to-kill '()) (kill-switch (if (args:get-arg "-kill-server") "-9" "")) (killinfo (or (args:get-arg "-stop-server") (args:get-arg "-kill-server") )) (khost-port (if killinfo (if (substring-index ":" killinfo)(string-split ":") #f) #f)) @@ -810,15 +809,15 @@ (killed #f) (status (< last-update 20))) ;; (zmq-sockets (if status (server:client-connect hostname port) #f))) ;; no need to login as status of #t indicates we are connecting to correct ;; server - (if (equal? state "dead") - (if (> last-update (* 25 60 60)) ;; keep records around for slighly over a day. - (tasks:server-deregister (db:delay-if-busy tdbdat) hostname pullport: pullport pid: pid action: 'delete)) - (if (> last-update 20) ;; Mark as dead if not updated in last 20 seconds - (tasks:server-deregister (db:delay-if-busy tdbdat) hostname pullport: pullport pid: pid))) + ;; (if (equal? state "dead") + ;; (if (> last-update (* 25 60 60)) ;; keep records around for slighly over a day. + ;; (tasks:server-deregister (db:delay-if-busy tdbdat) hostname pullport: pullport pid: pid action: 'delete)) + ;; (if (> last-update 20) ;; Mark as dead if not updated in last 20 seconds + ;; (tasks:server-deregister (db:delay-if-busy tdbdat) hostname pullport: pullport pid: pid))) (format #t fmtstr id mt-ver pid hostname (conc interface ":" pullport) pubport last-update (if status "alive" "dead") transport) (if (or (equal? id sid) (equal? sid 0)) ;; kill all/any (begin @@ -1179,11 +1178,11 @@ runs-spec) (newline))))) (for-each (lambda (test) - (handle-exceptions + (common:debug-handle-exceptions #f exn (begin (debug:print-error 0 *default-log-port* "Bad data in test record? " test) (print "exn=" (condition->list exn)) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) Index: mtut.scm ================================================================== --- mtut.scm +++ mtut.scm @@ -391,11 +391,12 @@ ((number? sched-in) sched-in) (else (current-seconds)))) (args-data (if args-alist args-alist (hash-table->alist args:arg-hash))) - (alldat (apply append (list 'a action + (alldat (apply append (list 'T "cmd" + 'a action 'U (current-user-name) 'D sched) (map (lambda (x) (let* ((param (car x)) (value (cdr x)) Index: portlogger.scm ================================================================== --- portlogger.scm +++ portlogger.scm @@ -50,25 +50,29 @@ db)) (define (portlogger:open-run-close proc . params) (let* ((fname (conc "/tmp/." (current-user-name) "-portlogger.db")) (avail (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away - (handle-exceptions - exn - (begin - ;; (release-dot-lock fname) - (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "exn=" (condition->list exn)) - (if (file-exists? fname)(delete-file fname)) ;; brutally get rid of it - (print-call-chain (current-error-port))) + ;;(handle-exceptions + ;; exn + ;; (begin + ;; ;; (release-dot-lock fname) + ;; (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params) + ;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + ;; (debug:print 0 *default-log-port* "exn=" (condition->list exn)) + ;; (if (file-exists? fname) + ;; (begin + ;; (debug:print 0 *default-log-port* "Removing portlogger database file " fname) + ;; (delete-file fname))) ;; just get rid of the portlogger file + ;; (print-call-chain (current-error-port))) (let* (;; (lock (obtain-dot-lock fname 2 9 10)) (db (portlogger:open-db fname)) (res (apply proc db params))) (sqlite3:finalize! db) ;; (release-dot-lock fname) - res)))) + res))) +;; ) ;; (fold-row PROC INIT DATABASE SQL . PARAMETERS) (define (portlogger:take-port db portnum) (let* ((qry1 (sqlite3:prepare db "INSERT INTO ports (port,state) VALUES (?,?);")) (qry2 (sqlite3:prepare db "UPDATE ports SET state=?,update_time=strftime('%s','now') WHERE port=?;")) @@ -99,24 +103,24 @@ (sqlite3:finalize! qry3) res)) (define (portlogger:get-prev-used-port db) (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 *default-log-port* "exn=" (condition->list exn)) - (print-call-chain (current-error-port)) - (debug:print 0 *default-log-port* "Continuing anyway.") - #f) - (sqlite3:fold-row - (lambda (var curr) - (or curr var curr)) - #f - db - "SELECT (port) FROM ports WHERE state='released' LIMIT 1;"))) + exn + (begin + (debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* "exn=" (condition->list exn)) + (print-call-chain (current-error-port)) + (debug:print 0 *default-log-port* "Continuing anyway.") + #f) + (sqlite3:fold-row + (lambda (var curr) + (or curr var curr)) + #f + db + "SELECT (port) FROM ports WHERE state='released' LIMIT 1;"))) (define (portlogger:find-port db) (let* ((lowport (let ((val (configf:lookup *configdat* "server" "lowport"))) (if (and val (string->number val)) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -33,12 +33,13 @@ ;; if a server is either running or in the process of starting call client:setup ;; else return #f to let the calling proc know that there is no server available ;; (define (rmt:get-connection-info areapath #!key (area-dat #f)) ;; TODO: push areapath down. (let* ((runremote (or area-dat *runremote*)) - (cinfo (remote-conndat runremote)) - (run-id 0)) + (cinfo (if (remote? runremote) + (remote-conndat runremote) + #f))) (if cinfo cinfo (if (server:check-if-running areapath) (client:setup areapath) #f)))) @@ -446,11 +447,11 @@ ;; WARNING: This currently bypasses the transaction wrapped writes system (define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment) (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment))) -(define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus) +(define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus) (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus))) (define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode) (if (number? run-id) (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -211,11 +211,11 @@ (all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names (all-test-names #f) ;; (hash-table-keys all-tests-registry)) (test-names #f) ;; Generated by a call to (tests:filter-test-names all-test-names test-patts)) (required-tests #f) ;; Put fully qualified test/testpath names in this list to be done (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) - (tdbdat (tasks:open-db)) + ;; (tdbdat (tasks:open-db)) (config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) (if x (string->number x) #f))) (allowed-tests #f)) ;; check if readonly @@ -237,12 +237,12 @@ (let ((sighand (lambda (signum) ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting (set! *time-to-exit* #t) (print "Received signal " signum ", cleaning up before exit. Please wait...") (let ((th1 (make-thread (lambda () - (let ((tdbdat (tasks:open-db))) - (rmt:tasks-set-state-given-param-key task-key "killed")) + ;; (let ((tdbdat (tasks:open-db))) + (rmt:tasks-set-state-given-param-key task-key "killed") ;; ) (print "Killed by signal " signum ". Exiting") (thread-sleep! 3) (exit)))) (th2 (make-thread (lambda () (thread-sleep! 5) @@ -318,13 +318,18 @@ ;; ;; (rmt:general-call 'delete-tests-in-state run-id "NOT_STARTED") ;; Now convert anything in allow-auto-rerun to NOT_STARTED ;; - (for-each (lambda (state) - (rmt:set-tests-state-status run-id test-names state #f "NOT_STARTED" state)) - (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") ""))))) + (for-each + (lambda (state-status) + (let* ((ss-lst (string-split-fields "/" state-status #:infix)) + (state (if (> (length ss-lst) 0)(car ss-lst) #f)) + (status (if (> (length ss-lst) 1)(cadr ss-lst) #f))) + (rmt:set-tests-state-status run-id test-names state status "NOT_STARTED" status))) + ;; list of state/status pairs separated by spaces + (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") ""))))) ;; Ensure all tests are registered in the test_meta table (runs:update-all-test_meta #f) ;; now add non-directly referenced dependencies (i.e. waiton) @@ -1075,11 +1080,11 @@ (string->number mcj) 1))) ;; length of the register queue ahead (reglen (if (number? reglen-in) reglen-in 1)) (last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle (last-time-some-running (current-seconds)) - (tdbdat (tasks:open-db)) + ;; (tdbdat (tasks:open-db)) (runsdat (make-runs:dat ;; hed: hed ;; tal: tal ;; reg: reg ;; reruns: reruns @@ -1170,13 +1175,13 @@ itemmaps: itemmaps ;; prereqs-not-met: prereqs-not-met ))) (runs:dat-regfull-set! runsdat regfull) ;; every couple minutes verify the server is there for this run - (if (and (common:low-noise-print 60 "try start server" run-id) - (tasks:need-server run-id)) - (tasks:start-and-wait-for-server tdbdat run-id 10)) ;; NOTE: delay and wait is done under the hood + ;; (if (and (common:low-noise-print 60 "try start server" run-id) + ;; (tasks:need-server run-id)) + ;; (tasks:start-and-wait-for-server tdbdat run-id 10)) ;; NOTE: delay and wait is done under the hood (if (> num-running 0) (set! last-time-some-running (current-seconds))) (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000))) @@ -1647,11 +1652,11 @@ ;; NB// should pass in keys? ;; (define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode 'remove-all)(options '())) (common:clear-caches) ;; clear all caches (let* ((db #f) - (tdbdat (tasks:open-db)) + ;; (tdbdat (tasks:open-db)) (keys (rmt:get-keys)) (rundat (mt:get-runs-by-patt keys runnamepatt target)) (header (vector-ref rundat 0)) (runs (vector-ref rundat 1)) (states (if state (string-split state ",") '())) @@ -1705,11 +1710,11 @@ (tasks:kill-runner target run-name testpatt) ;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt)) (debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname"))) ((set-state-status) - (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) + ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) (debug:print 1 *default-log-port* "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname"))) ((print-run) (debug:print 1 *default-log-port* "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header) action) ((run-wait) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -52,10 +52,11 @@ (begin (thread-sleep! 1) (loop (file-exists? fullpath) (- count 1))) (begin + (debug:print 0 *default-log-port* "ERROR: removing the journal file " fullpath ", this is not good. Look for disk full, write access and other issues.") (if remove (system (conc "rm -rf " fullpath))) #f))) #t)))))) (define (tasks:get-task-db-path) @@ -93,11 +94,11 @@ (tasks:open-db numretries (- numretries 1))) (begin (print-call-chain (current-error-port)) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 *default-log-port* " exn=" (condition->list exn)))) - (let* ((dbpath (tasks:get-task-db-path)) + (let* ((dbpath (db:dbfile-path )) ;; (tasks:get-task-db-path)) (dbfile (conc dbpath "/monitor.db")) (avail (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away (exists (file-exists? dbpath)) (write-access (file-write-access? dbpath)) (mdb (cond ;; what the hek is *toppath* doing here? Index: tests/Makefile ================================================================== --- tests/Makefile +++ tests/Makefile @@ -19,13 +19,15 @@ # The NEWTARGET causes some tests to fail. Do not use until this is fixed. NEWTARGET = "$(OS)/$(FS)/$(VER)" TARGET = "ubuntu/nfs/none" -all : build unit test1 test2 test3 test4 test5 test6 test7 test8 test9 +all : build unit test4 +# test1 test2 test3 test4 test5 test6 test7 test8 test9 -unit : basicserver.log runs.log misc.log tests.log +unit : all-rmt.log +# basicserver.log runs.log misc.log tests.log rel : cd release;dashboard -rows 25 & ## basicserver.log : unittests/basicserver.scm Index: tests/fullrun/megatest.config ================================================================== --- tests/fullrun/megatest.config +++ tests/fullrun/megatest.config @@ -99,12 +99,12 @@ # htmlviewercmd firefox -new-window htmlviewercmd arora # -runtests automatically deletes the records for tests with the listed states on starting up a run allowing them to re-run # (nb// this is in addition to NOT_STARTED which is automatically re-run) -# -allow-auto-rerun INCOMPLETE ZERO_ITEMS +# format is STATE/STATUS +allow-auto-rerun /INCOMPLETE /ZERO_ITEMS # could add: STUCK STUCK/DEAD UNKNOWN KILLED KILLREQ PREQ_DISCARD [validvalues] state start end 0 1 - 2 status pass fail n/a 0 1 running - 2 ADDED tests/unittests/all-rmt.scm Index: tests/unittests/all-rmt.scm ================================================================== --- /dev/null +++ tests/unittests/all-rmt.scm @@ -0,0 +1,122 @@ + +;;====================================================================== +;; A L L - R M T +;;====================================================================== + +;; Run like this: +;; +;; ./rununittest.sh all-rmt 1 + +;; Definitions: +;; NTN - no test needed +;; DEP - function is deprecated, no point in testing +;; NED - function nested under others, no test needed. +;; DEF - deferred + +(print "start dir: " (current-directory)) + +(define toppath (current-directory)) +(test #f #t (string?(server:start-and-wait *toppath*))) + +(test "setup for run" #t (begin (launch:setup) + (string? (getenv "MT_RUN_AREA_HOME")))) +(test #f #t (vector? (client:setup toppath))) + +(test #f #t (vector? (rmt:get-connection-info toppath))) ;; TODO: push areapath down. +(test #f #t (string? (server:check-if-running "."))) +;; DEF (test #f #f (rmt:send-receive-no-auto-client-setup *runremote* 'get-keys #f '())) +;; DEF (rmt:kill-server run-id) +;; DEF (rmt:start-server run-id) +(test #f '(#t "successful login")(rmt:login #f)) +;; DEF (rmt:login-no-auto-client-setup connection-info) +(test #f #t (pair? (rmt:get-latest-host-load (get-host-name)))) +(test #f #t (list? (rmt:get-changed-record-ids 0))) +(test #f #f (begin (runs:update-all-test_meta #f) #f)) +(test #f '("test1" "test2")(sort (alist-ref "tagtwo" (hash-table->alist (rmt:get-tests-tags)) equal?) string<=)) +(test #f '() (rmt:get-key-val-pairs 0)) +(test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) +(test #f '("SYSTEM" "RELEASE") (rmt:get-keys-write)) ;; dummy query to force server start +(test #f '() (rmt:get-key-vals 1)) +(test #f (vector '("SYSTEM" "RELEASE") '()) (rmt:get-targets)) +(test #f "" (rmt:get-target 1)) +(test #f #t (rmt:register-test 1 "foo" "")) +(test #f 1 (rmt:get-test-id 1 "foo" "")) +(test #f "foo" (vector-ref (rmt:get-test-info-by-id 1 1) 2)) +(test #f "/tmp/badname" (rmt:test-get-rundir-from-test-id 1 1)) +(test #f '(1) (db:set-tests-state-status *db* 1 '("foo") "COMPLETED" "PASS" "NOT_STARTED" "PASS")) +(test #f '(1) (rmt:set-tests-state-status 1 '("foo") "COMPLETED" "PASS" "NOT_STARTED" "PASS")) +(test #f #t (mt:test-set-state-status-by-id 1 1 "COMPLETED" "PASS" "Just testing!")) +(test #f #t (list? (rmt:get-tests-for-run 1 "%" '() '() #f #f #f #f #f #f 0 #f))) +(test #f #t (list? (rmt:get-tests-for-runs-mindata '(1) "%" '() '() #f))) +(test #f #f (begin (rmt:delete-test-records 1 2) #f)) +(test #f #t (begin (rmt:test-set-state-status 1 1 "COMPLETED" "FAIL" "Another message") #t)) +(test #f 0 (rmt:test-toplevel-num-items 1 "foo")) +(test #f '()(rmt:get-matching-previous-test-run-records 1 "foo" "")) +(test #f '("/tmp/badname" "logs/final.log") (rmt:test-get-logfile-info 1 "foo")) +(test #f '()(rmt:test-get-records-for-index-file 1 "foo")) +(test #f #t (vector? (rmt:get-testinfo-state-status 1 1))) +(test #f #t (rmt:test-set-log! 1 1 "/tmp/another/logfile/eh")) +(test #f #f (begin (rmt:test-set-top-process-pid 1 1 123) #f)) +(test #f 123 (rmt:test-get-top-process-pid 1 1)) +(define keys (rmt:get-keys)) +(test #f '()(rmt:get-run-ids-matching-target keys "%/%" #f "%" "%" "%" "%")) +(test #f '()(rmt:test-get-paths-matching-keynames-target-new keys "%/%" #f "%" "%" "%" "%")) +(test #f '()(rmt:get-prereqs-not-met 1 '() "foo" "")) +(test #f 0 (rmt:get-count-tests-running-for-run-id 1)) +(test #f 0 (rmt:get-count-tests-running 1)) +(test #f 0 (rmt:get-count-tests-running-for-testname 1 "foo")) +(test #f 0 (rmt:get-count-tests-running-in-jobgroup 1 "nada")) +(test #f #f (begin (rmt:set-state-status-and-roll-up-items 1 "foo" "" "COMPLETED" "FAIL" "Just yet another message") #f)) +(test #f #t (rmt:top-test-set-per-pf-counts 1 "foo")) +(test #f '() (rmt:get-raw-run-stats 1)) +(test #f #t (vector? (rmt:get-run-info 1))) +(test #f 0 (rmt:get-num-runs "%")) +(define keypatts '(("SYSTEM" "ubuntu")("RELEASE" "v1.234")) ) +(test #f 1 (rmt:register-run '(("SYSTEM" "ubuntu")("RELEASE" "v1.234")) "bar" "NEW" "JUSTFINE" "bobafett" "quick")) +(test #f "bar" (rmt:get-run-name-from-id 1)) +(test #f #t (begin (rmt:delete-run 2) #t)) ;; delete a non-existant run +(test #f #t (begin (rmt:update-run-stats 1 '()) #t)) +(test #f #t (begin (rmt:delete-old-deleted-test-records) #t)) +(test #f #t (vector? (rmt:get-runs "%" 10 0 keypatts))) +(test #f '(1)(rmt:get-all-run-ids)) +(test #f '()(rmt:get-prev-run-ids 1)) +(test #f #t (begin (rmt:lock/unlock-run 1 #t #f "mikey") #t)) +(test #f "JUSTFINE" (rmt:get-run-status 1)) +(test #f #t (begin (rmt:set-run-status 1 "NOTFINE" msg: "A message") #t)) +(test #f #t (begin (rmt:update-run-event_time 1) #t)) +;; (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-runs-update) ;; fields of #f uses default +;; (rmt:find-and-mark-incomplete run-id ovr-deadtime) +;; (rmt:get-main-run-stats run-id) +;; (rmt:get-var varname) +;; (rmt:set-var varname value) +;; (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f)) +;; (rmt:get-previous-test-run-record run-id test-name item-path) +;; (rmt:get-run-stats) +;; (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile) +;; (rmt:get-steps-for-test run-id test-id) +;; (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) +;; (rmt:testmeta-add-record testname) +;; (rmt:testmeta-get-record testname) +;; (rmt:testmeta-update-field test-name fld val) +;; (rmt:test-data-rollup run-id test-id status) +;; (rmt:csv->test-data run-id test-id csvdata) +;; (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt) +;; (rmt:tasks-add action owner target runname testpatt params) +;; (rmt:tasks-set-state-given-param-key param-key new-state) +;; (rmt:tasks-get-last target runname) +;; (rmt:archive-get-allocations testname itempath dneeded) +;; (rmt:archive-register-block-name bdisk-id archive-path) +;; (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey) +;; (rmt:archive-register-disk bdisk-name bdisk-path df) +;; (rmt:test-set-archive-block-id run-id test-id archive-block-id) +;; (rmt:test-get-archive-block-info archive-block-id) +;; NED (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) +;; NED (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected +;; DEF (test #f #f (rmt:print-db-stats)) +;; DEF (rmt:get-max-query-average run-id) +;; NED (rmt:general-call stmtname run-id . params) +;; DEP (rmt:sdb-qry qry val run-id) +;; DEF (rmt:runtests user run-id testpatt params) +;; DEP (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f)) +;; DEP (rmt:synchash-get run-id proc synckey keynum params) +;; DEP (test #f #f (rmt:update-pass-fail-counts 1 "foo"))