Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -2863,49 +2863,10 @@ (if (null? tal) fallback-launcher (loop (car tal)(cdr tal)))))))) fallback-launcher))) -;;====================================================================== -;; NMSG AND NEW API -;;====================================================================== -;; -;; ;;====================================================================== -;; ;; nm based server experiment, keep around for now. -;; ;; -;; (define (nm:start-server dbconn #!key (given-host-name #f)) -;; (let* ((srvdat (start-raw-server given-host-name: given-host-name)) -;; (host-name (srvdat-host srvdat)) -;; (soc (srvdat-soc srvdat))) -;; -;; ;; start the queue processor (save for second round of development) -;; ;; -;; (thread-start! (make-thread! (lambda ()(queue-processor dbconn) "Queue processor"))) -;; ;; msg is an alist -;; ;; 'r host:port <== where to return the data -;; ;; 'p params <== data to apply the command to -;; ;; 'e j|s|l <== encoding of the params. default is s (sexp), if not specified is assumed to be default -;; ;; 'c command <== look up the function to call using this key -;; ;; -;; (let loop ((msg-in (nn-recv soc))) -;; (if (not (equal? msg-in "quit")) -;; (let* ((dat (decode msg-in)) -;; (host-port (alist-ref 'r dat)) ;; this is for the reverse req rep where the server is a client of the original client -;; (params (alist-ref 'p dat)) -;; (command (let ((c (alist-ref 'c dat)))(if c (string->symbol c) #f))) -;; (all-good (and host-port params command (hash-table-exists? *commands* command)))) -;; (if all-good -;; (let ((cmddat (make-qitem -;; command: command -;; host-port: host-port -;; params: params))) -;; (queue-push cmddat) ;; put request into the queue -;; (nn-send soc "queued")) ;; reply with "queued" -;; (print "ERROR: ["(common:human-time)"] BAD request " dat)) -;; (loop (nn-recv soc))))) -;; (nn-close soc))) - ;;====================================================================== ;; D A S H B O A R D U S E R V I E W S ;;====================================================================== ;;====================================================================== Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -229,10 +229,14 @@ ;; gets and calls updater list based on curr-tab-num ;; (define (dboard:common-run-curr-updaters commondat #!key (tab-num #f)) ;; (sync-db-to-tmp (dboard:common-get-tabdat commondat tab-num: tab-num)) ;; no longer applies + + ;; maybe need sleep here? + + (if (dboard:common-get-tabdat commondat tab-num: tab-num) ;; only update if there is a tabdat (let* ((tnum (or tab-num (dboard:commondat-curr-tab-num commondat))) (updaters (hash-table-ref/default (dboard:commondat-updaters commondat) tnum '()))) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -452,42 +452,10 @@ (db:sync-tables (db:sync-main-list source-db) last-update source-db cache-db) (db:sync-tables db:sync-tests-only last-update source-db cache-db) (hash-table-set! *global-db-store* target cache-db) cache-db))) -;; ;; call a proc with a cached db -;; ;; -;; (define (db:call-with-cached-db proc . params) -;; ;; first cache the db in /tmp -;; (let* ((cname-part (conc "megatest_cache/" (common:get-testsuite-name))) -;; (fname (conc (common:get-area-path-signature) ".db")) -;; (cache-dir (common:get-create-writeable-dir -;; (list (conc "/tmp/" (current-user-name) "/" cname-part) -;; (conc "/tmp/" (current-user-name) "-" cname-part) -;; (conc "/tmp/" (current-user-name) "_" cname-part)))) -;; (megatest-db (conc *toppath* "/megatest.db"))) -;; ;; (debug:print-info 0 *default-log-port* "Using cache dir " cache-dir) -;; (if (not cache-dir) -;; (begin -;; (debug:print 0 *default-log-port* "ERROR: Failed to find an area to write the cache db") -;; (exit 1)) -;; (let* ((th1 (make-thread -;; (lambda () -;; (if (and (common:file-exists? megatest-db) -;; (file-write-access? megatest-db)) -;; (begin -;; (db:sync-to-megatest.db dbstruct 'timestamps) ;; internally mutexes on *db-local-sync* -;; (debug:print-info 2 *default-log-port* "Done syncing to megatest.db")))) -;; "call-with-cached-db sync-to-megatest.db")) -;; (cache-db (db:cache-for-read-only -;; megatest-db -;; (conc cache-dir "/" fname) -;; use-last-update: #t))) -;; (thread-start! th1) -;; (apply proc cache-db params) -;; )))) - (define (db:get-sqlite3-mod-time fname) (let* ((wal-file (conc fname "-wal")) (shm-file (conc fname "-shm")) (get-mtime (lambda (f) (if (and (file-exists? f) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -811,48 +811,29 @@ (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records)) (let ((reglen (configf:lookup *configdat* "setup" "runqueue"))) (if (> (length (hash-table-keys test-records)) 0) (let* ((keep-going #t) (run-queue-retries 5) - ;; (th1 (make-thread (lambda () - ;; (handle-exceptions - ;; exn - ;; (begin - ;; (print-call-chain) - ;; (print " message: " ((condition-property-accessor 'exn 'message) exn))) - ;; (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests - ;; (any->number reglen) all-tests-registry))) - ;; "runs:run-tests-queue")) - (th2 (make-thread (lambda () ;; BBQ: why are we visiting ALL runs here? - ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ... - (let ((run-ids (rmt:get-all-run-ids))) - (for-each (lambda (run-id) - (if keep-going - (handle-exceptions - exn - (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn) - (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27) - run-ids))) - "runs: mark-incompletes"))) - ;; (thread-start! th1) - (thread-start! th2) - ;; (thread-join! th1) - ;; just do the main stuff in the main thread + (run-ids (rmt:get-all-run-ids))) + (for-each (lambda (run-id) + (if keep-going + (handle-exceptions + exn + (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn) + (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27) + run-ids) (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests - (any->number reglen) all-tests-registry) + (any->number reglen) all-tests-registry) (set! keep-going #f) - (thread-join! th2) - ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD (if (> run-count 0) ;; handle reruns (begin (if (not (hash-table-ref/default flags "-preclean" #f)) (hash-table-set! flags "-preclean" #t)) (if (not (hash-table-ref/default flags "-rerun" #f)) (hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS")) - ;; recursive call to self (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))) - (launch:end-of-run-check run-id))) + (launch:end-of-run-check run-id))) (debug:print-info 0 *default-log-port* "No tests to run"))) (debug:print-info 4 *default-log-port* "All done by here") ;; TODO: try putting post hook call here ; (debug:print-info 2 *default-log-port* " run-count " run-count) Index: tcp-transportmod.scm ================================================================== --- tcp-transportmod.scm +++ tcp-transportmod.scm @@ -368,11 +368,11 @@ ;; listener socket has been started by this stage ;; wait for a port before creating the registration file ;; (let* ((db-locked-in #f) (areapath (tt-areapath ttdat)) - (nosyncdbpath (conc areapath"/.megatest")) + (nosyncdbpath (conc areapath"/.mtdb")) (cleanup (lambda () (if (tt-cleanup-proc ttdat) ((tt-cleanup-proc ttdat))) (dbfile:with-no-sync-db nosyncdbpath (lambda (db)