Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -31,28 +31,22 @@ (declare (uses commonmod)) (import commonmod) (include "common_records.scm") - -;; (require-library margs) -;; (include "margs.scm") - -;; (define old-exit exit) -;; -;; (define (exit . code) -;; (if (null? code) -;; (old-exit) -;; (old-exit code))) +(define (remove-server-files directory-path) + (let ((files (glob (string-append directory-path "/server*")))) + (for-each delete-file* files))) (define (stop-the-train) (thread-start! (make-thread (lambda () (let loop () (if (and *toppath* (file-exists? (conc *toppath*"/stop-the-train"))) (begin (debug:print 0 *default-log-port* "ERROR: found file "*toppath*"/stop-the-train, exiting immediately") + (remove-server-files (conc *toppath* "/logs")) (exit 1))) (thread-sleep! 5) (loop)))))) ;; execute thunk, return value. If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* . Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -375,19 +375,20 @@ (sqlite3:execute db (conc "PRAGMA synchronous = "sync-mode";"))) (if journal-mode (sqlite3:execute db (conc "PRAGMA journal_mode = "journal-mode";"))) (if (and init-proc (not db-exists)) (init-proc db)) - db))) + db)) + expire-time: 30) (begin (if (file-exists? fname ) (let ((db (sqlite3:open-database fname))) ;; pragmas synchronous not needed because this db is used read-only ;; (sqlite3:execute db (conc "PRAGMA synchronous = "mode";") (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000)) ;; read-only but still need timeout db ) - (print "file doesn't exist: " fname)))) + (print "cautious-open-database: file doesn't exist: " fname)))) (exn (io-error) (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.") (retry)) (exn (corrupt) (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.") @@ -1161,11 +1162,15 @@ (handle-exceptions exn #f (with-input-from-file fname (lambda () (equal? key-string (read-line))))) - #f) + (begin + (dbfile:print-err "dbfile:simple-file-lock created " fname " but it was gone 0.25 seconds later") + #f + ) + ) ) ) ) ) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -593,11 +593,14 @@ (list "MT_TARGET" target) (list "MT_LINKTREE" (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) (list "MT_TESTSUITENAME" (common:get-testsuite-name)))) ;;(bb-check-path msg: "launch:execute post block 3") - (if mt-bindir-path (setenv "PATH" (conc "\""(getenv "PATH")":"mt-bindir-path"\""))) + (let ((tmppath (getenv "PATH"))) + (if (string-search tmppath " ") + (debug:print 0 *default-log-port* "WARNING: spaces in PATH are not supported.")) + (if mt-bindir-path (setenv "PATH" (conc tmppath":"mt-bindir-path)))) ;;(bb-check-path msg: "launch:execute post block 4") ;; (change-directory top-path) ;; Can setup as client for server mode now ;; (client:setup) @@ -891,11 +894,11 @@ (let ((tmpfile (conc fulldir "/.megatest.cfg." (current-seconds))) (targfile (conc fulldir "/.megatest.cfg-" megatest-version "-" megatest-fossil-hash)) (rconfig (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash))) (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached (begin - (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile) + (debug:print-info 2 *default-log-port* "Caching megatest.config in " tmpfile) (if (not (common:in-running-test?)) (configf:write-alist *configdat* tmpfile)) (system (conc "ln -sf " tmpfile " " targfile)))) ))) (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs."))))) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -86,11 +86,17 @@ ;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file ;; (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc"))) (if (common:file-exists? debugcontrolf) - (load debugcontrolf))) + (begin + ;; for some reason, debug:print does not work here. Had to use print. + (print (conc "WARNING: loading " debugcontrolf)) + (load debugcontrolf) + ) + ) +) ;; usage logging, careful with this, it is not designed to deal with all real world challenges! ;; (if (and *usage-log-file* (file-write-access? *usage-log-file*)) Index: portlogger.scm ================================================================== --- portlogger.scm +++ portlogger.scm @@ -26,11 +26,12 @@ (declare (uses db)) ;; lsof -i (define (portlogger:open-db fname) - (let* ((avail (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away + (let* (;; (avail (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away + (avail #t) (exists (common:file-exists? fname)) (db (if avail (sqlite3:open-database fname) (begin (system (conc "rm -f " fname)) @@ -56,12 +57,12 @@ fail_count INTEGER DEFAULT 0, update_time TIMESTAMP DEFAULT (strftime('%s','now')) );") db)) (define (portlogger:open-run-close proc . params) - (let* ((fname (conc "/tmp/." (current-user-name) "-portlogger.db")) - (avail (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away + (let* ((fname (conc "/tmp/." (current-user-name) "-portlogger.db"))) + ;; (avail (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away (handle-exceptions exn (begin ;; (release-dot-lock fname) (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -335,11 +335,11 @@ (if success ;; success only tells us that the transport was ;; successful, have to examine the data to see if ;; there was a detected issue at the other end (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) (begin - (debug:print-error 0 *default-log-port* " dat=" dat) + (debug:print-info 0 *default-log-port* "Bad return data from Megatest server: dat=" dat) (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params)) ))) (define (rmt:print-db-stats) (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" @@ -1129,10 +1129,11 @@ (run-meta (alist-ref "meta" all-dat equal?)) (run-id (string->number (alist-ref "id" run-meta equal?)))) (rmt:insert-run run-id target runname run-meta) (if (list? tests-data) (begin + (debug:print 0 *default-log-port* "Inserting " (length tests-data) " tests in run " runname) (for-each (lambda (test-dat) (let* ((test-id (car test-dat)) (test-rec (cdr test-dat))) (rmt:insert-test run-id test-rec))) @@ -1166,14 +1167,13 @@ ) (if test-id (debug:print 0 *default-log-port* "test "testname"/"item-path " already exists in run-id " run-id) (begin - (debug:print 0 *default-log-port* " Insert test in run "run-id": "testname"/"item-path) (rmt:send-receive 'insert-test run-id test-rec) ) ) ) ) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -799,50 +799,24 @@ (debug:print-info 1 *default-log-port* "Adding \"" (string-intersperse required-tests " ") "\" to the run queue")) ;; NOTE: these are all parent tests, items are not expanded yet. (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records)) (let ((reglen (configf:lookup *configdat* "setup" "runqueue"))) (if (> (length (hash-table-keys test-records)) 0) - (let* ((keep-going #t) - (run-queue-retries 5) - ;; (th1 (make-thread (lambda () - ;; (handle-exceptions - ;; exn - ;; (begin - ;; (print-call-chain) - ;; (print " message: " ((condition-property-accessor 'exn 'message) exn))) - ;; (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests - ;; (any->number reglen) all-tests-registry))) - ;; "runs:run-tests-queue")) - #;(th2 (make-thread (lambda () ;; BBQ: why are we visiting ALL runs here? - ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ... - (let ((run-ids (rmt:get-all-run-ids))) - (for-each (lambda (run-id) - (if keep-going - (handle-exceptions - exn - (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn) - (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27) - run-ids))) - "runs: mark-incompletes"))) - ;; (thread-start! th1) - ;; (thread-start! th2) - ;; (thread-join! th1) + (let* () ;; just do the main stuff in the main thread (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry) - (set! keep-going #f) - #;(thread-join! th2) ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD (if (> run-count 0) ;; handle reruns (begin (if (not (hash-table-ref/default flags "-preclean" #f)) (hash-table-set! flags "-preclean" #t)) (if (not (hash-table-ref/default flags "-rerun" #f)) (hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS")) ;; recursive call to self - (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))) - (launch:end-of-run-check run-id))) + (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))) + (launch:end-of-run-check run-id))) (debug:print-info 0 *default-log-port* "No tests to run"))) (debug:print-info 4 *default-log-port* "All done by here") ;; TODO: try putting post hook call here ; (debug:print-info 2 *default-log-port* " run-count " run-count) @@ -2906,11 +2880,11 @@ (fld (car key)) (val (configf:lookup test-conf "test_meta" fld))) ;; (debug:print 5 *default-log-port* "idx: " idx " fld: " fld " val: " val) (if (and val (not (equal? (vector-ref currrecord idx) val))) (begin - (debug:print 0 *default-log-port* "Updating " test-name " " fld " to " val) + (debug:print 2 *default-log-port* "Updating " test-name " " fld " to " val) (rmt:testmeta-update-field test-name fld val))))) '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10))))) ;; find tests with matching tags, tagpatt is a string "tagpatt1,tagpatt2%, ..." ;; Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -1639,11 +1639,11 @@ (if (and testexists cache-file (file-write-access? cache-path) allow-write-cache) (let ((tpath (conc cache-path "/.testconfig"))) - (debug:print-info 1 *default-log-port* "Caching testconfig for " test-name " in " tpath) + (debug:print-info 2 *default-log-port* "Caching testconfig for " test-name " in " tpath) (if (and tcfg (not (common:in-running-test?))) (configf:write-alist tcfg tpath)))) tcfg)))))) ;; sort tests by priority and waiton