Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -908,13 +908,15 @@ (define (common:get-testsuite-name) (or (configf:lookup *configdat* "setup" "area-name") ;; megatest is a flexible tool, testsuite is too limiting a description. (configf:lookup *configdat* "setup" "testsuite" ) (getenv "MT_TESTSUITE_NAME") - (if (string? *toppath* ) - (pathname-file *toppath*) - #f))) ;; (pathname-file (current-directory))))) + (pathname-file (or (if (string? *toppath* ) + (pathname-file *toppath*) + #f) + (common:get-topath #f))) + "please-set-setup-area-name")) ;; (pathname-file (current-directory))))) ;; safe getting of toppath (define (common:get-toppath areapath) (or *toppath* (if areapath @@ -948,17 +950,23 @@ (handle-exceptions exn (begin (debug:print-error 0 *default-log-port* "Couldn't create path to " *db-cache-path* ", exn=" exn) (exit 1)) - (let ((dbpath (common:get-create-writeable-dir - (list (conc "/tmp/" (current-user-name) - "/megatest_localdb/" - (common:get-testsuite-name) "/" - (string-translate *toppath* "/" ".")))))) ;; #t)))) - (set! *db-cache-path* dbpath) - dbpath)) + (let* ((tsname (common:get-testsuite-name)) + (dbpath (common:get-create-writeable-dir + (list (conc "/tmp/" (current-user-name) + "/megatest_localdb/" + tsname "/" + (string-translate *toppath* "/" ".")) + (conc "/tmp/" (current-process-id) ;; just in case we have an issue with the dir by own user name + "/megatest_localdb/" + tsname + (string-translate *toppath* "/" ".")) + )))) + (set! *db-cache-path* dbpath) + dbpath)) #f))) (define (common:get-area-path-signature) (message-digest-string (md5-primitive) *toppath*)) @@ -1191,11 +1199,12 @@ (file-write-access? hed) hed) (handle-exceptions exn (begin - (debug:print-info 0 *default-log-port* "could not create " hed ", this might cause problems down the road. exn=" exn) + (debug:print-info 0 *default-log-port* "could not create " hed + ", this might cause problems down the road. exn=" exn) #f) (create-directory hed #t))))) (if (and (string? res) (directory? res)) res @@ -1698,11 +1707,13 @@ (handle-exceptions exn (begin (debug:print 0 *default-log-port* "Failed to get modifcation time for " fpath ", treating it as zero. exn=" exn) 0) - (file-modification-time fpath))) + (if (file-exists? fpath) + (file-modification-time fpath) + 0))) ;; find timestamp of newest file associated with a sqlite db file (define (common:lazy-sqlite-db-modification-time fpath) (let* ((glob-list (handle-exceptions exn Index: common_records.scm ================================================================== --- common_records.scm +++ common_records.scm @@ -124,17 +124,19 @@ (list? n)) (member *verbosity* n)))) (define (debug:setup) (let ((debugstr (or (args:get-arg "-debug") + (args:get-arg "-debug-noprop") (getenv "MT_DEBUG_MODE")))) (set! *verbosity* (debug:calc-verbosity debugstr)) (debug:check-verbosity *verbosity* debugstr) ;; if we were handed a bad verbosity rule then we will override it with 1 and continue (if (not *verbosity*)(set! *verbosity* 1)) - (if (or (args:get-arg "-debug") - (not (getenv "MT_DEBUG_MODE"))) + (if (and (not (args:get-arg "-debug-noprop")) + (or (args:get-arg "-debug") + (not (getenv "MT_DEBUG_MODE")))) (setenv "MT_DEBUG_MODE" (if (list? *verbosity*) (string-intersperse (map conc *verbosity*) ",") (conc *verbosity*)))))) (define (debug:print n e . params) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -457,11 +457,11 @@ exn (begin (print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn) (thread-sleep! 3) (sqlite3:interrupt! db) - (db:safely-close-sqlite3-db db try-num: (- try-num 1))) + (db:safely-close-sqlite3-db db stmtcache try-num: (- try-num 1))) (if (sqlite3:database? db) (let* ((stmts (hash-table-ref/default stmt-cache db #f))) (if stmts (map sqlite3:finalize! (hash-table-values stmts))) (sqlite3:finalize! db) #t) @@ -480,14 +480,14 @@ (stack->list (dbr:dbstruct-dbstack dbstruct)))) (mdb (db:dbdat-get-db (dbr:dbstruct-mtdb dbstruct))) (rdb (db:dbdat-get-db (dbr:dbstruct-refndb dbstruct))) (stmt-cache (dbr:dbstruct-stmt-cache dbstruct))) (map (lambda (db) - (db:safely-close-sqlite3-db stmt-cache db)) + (db:safely-close-sqlite3-db db stmt-cache)) tdbs) - (db:safely-close-sqlite3-db stmt-cache mdb) ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb)) - (db:safely-close-sqlite3-db stmt-cache rdb))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb)))))) + (db:safely-close-sqlite3-db mdb stmt-cache) ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb)) + (db:safely-close-sqlite3-db rdb stmt-cache))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb)))))) ;; (let ((locdbs (dbr:dbstruct-locdbs dbstruct))) ;; (if (hash-table? locdbs) ;; (for-each (lambda (run-id) ;; (db:close-run-db dbstruct run-id)) @@ -2180,12 +2180,12 @@ (if newres newres res)) res))) -(define (db:no-sync-close-db db) - (db:safely-close-sqlite3-db db)) +(define (db:no-sync-close-db db stmtcache) + (db:safely-close-sqlite3-db db stmtcache)) ;; transaction protected lock aquisition ;; either: ;; fails returns (#f . lock-creation-time) ;; succeeds (returns (#t . lock-creation-time) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -132,11 +132,14 @@ ((equal? (uri-path (request-uri (current-request))) '(/ "dashboard")) (send-response body: (http-transport:html-dboard $) headers: '((content-type text/HTML)))) (else (continue)))))))) - (with-output-to-file start-file (lambda ()(print (current-process-id)))) + (handle-exceptions + exn + (debug:print 0 *default-log-port* "Failed to create file " start-file ", exn=" exn) + (with-output-to-file start-file (lambda ()(print (current-process-id))))) (http-transport:try-start-server ipaddrstr start-port))) ;; This is recursively run by http-transport:run until sucessful ;; (define (http-transport:try-start-server ipaddrstr portnum) @@ -295,10 +298,11 @@ (close-all-connections!) (mutex-unlock! *http-mutex*) )) (time-out (lambda () (thread-sleep! 45) + (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!") #f)) (th1 (make-thread send-recieve "with-input-from-request")) (th2 (make-thread time-out "time out"))) (thread-start! th1) (thread-start! th2) @@ -431,11 +435,14 @@ (last-access 0) (server-timeout (server:expiration-timeout)) (server-going #f) (server-log-file (args:get-arg "-log"))) ;; always set when we are a server - (with-output-to-file started-file (lambda ()(print (current-process-id)))) + (handle-exceptions + exn + (debug:print 0 *default-log-port* "Failed to create " started-file ", exn=" exn) + (with-output-to-file started-file (lambda ()(print (current-process-id))))) (let loop ((count 0) (server-state 'available) (bad-sync-count 0) (start-time (current-milliseconds))) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -1548,21 +1548,21 @@ (testprevvals (alist->env-vars (hash-table-ref/default tconfig "pre-launch-env-overrides" '()))) ;; Launchwait defaults to true, must override it to turn off wait (launchwait (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t)) (launch-results-prev (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed. - process:cmd-run-with-stderr-and-exitcode->list - process-run) - (if useshell - (let ((cmdstr (string-intersperse fullcmd " "))) - (if launchwait - cmdstr - (conc cmdstr " >> mt_launch.log 2>&1 &"))) - (car fullcmd)) - (if useshell - '() - (cdr fullcmd)))) + process:cmd-run-with-stderr-and-exitcode->list + process-run) + (if useshell + (let ((cmdstr (string-intersperse fullcmd " "))) + (if launchwait + cmdstr + (conc cmdstr " >> mt_launch.log 2>&1 &"))) + (car fullcmd)) + (if useshell + '() + (cdr fullcmd)))) (success (if launchwait (equal? 0 (cadr launch-results-prev)) #t)) (launch-results (if launchwait (car launch-results-prev) launch-results-prev))) (if (not success) (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED")) (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -210,10 +210,11 @@ -repl : start a repl (useful for extending megatest) -load file.scm : load and run file.scm -mark-incompletes : find and mark incomplete tests -ping run-id|host:port : ping server, exit with 0 if found -debug N|N,M,O... : enable debug 0-N or N and M and O ... + -debug-noprop N|M,M,O...: enable debug but do not propagate to subprocesses via MT_DEBUG -config fname : override the megatest.config file with fname -append-config fname : append fname to the megatest.config file Utilities -env2file fname : write the environment to fname.csh and fname.sh @@ -345,10 +346,11 @@ "-include" "-exclude-rx" "-exclude-rx-from" "-debug" ;; for *verbosity* > 2 + "-debug-noprop" "-create-test" "-override-timeout" "-test-files" ;; -test-paths is for listing all "-load" ;; load and exectute a scheme file "-section" Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -313,29 +313,10 @@ ;; there was a detected issue at the other end (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params) ))) -;; (define (rmt:update-db-stats run-id rawcmd params duration) -;; (mutex-lock! *db-stats-mutex*) -;; (handle-exceptions -;; exn -;; (begin -;; (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats") -;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) -;; (print "exn=" (condition->list exn)) -;; #f) ;; if this fails we don't care, it is just stats -;; (let* ((cmd (conc "run-id=" run-id " " (if (eq? rawcmd 'general-call) (car params) rawcmd))) -;; (stat-vec (hash-table-ref/default *db-stats* cmd #f))) -;; (if (not (vector? stat-vec)) -;; (let ((newvec (vector 0 0))) -;; (hash-table-set! *db-stats* cmd newvec) -;; (set! stat-vec newvec))) -;; (vector-set! stat-vec 0 (+ (vector-ref stat-vec 0) 1)) -;; (vector-set! stat-vec 1 (+ (vector-ref stat-vec 1) duration)))) -;; (mutex-unlock! *db-stats-mutex*)) - (define (rmt:print-db-stats) (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f" (debug:print 18 *default-log-port* "DB Stats\n========") (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg")) (for-each (lambda (cmd) @@ -424,21 +405,10 @@ (http-transport:client-api-send-receive run-id connection-info cmd params)))) (if (and res (vector-ref res 0)) (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!! #f))) -;; ;; Wrap json library for strings (why the ports crap in the first place?) -;; (define (rmt:dat->json-str dat) -;; (with-output-to-string -;; (lambda () -;; (json-write dat)))) -;; -;; (define (rmt:json-str->dat json-str) -;; (with-input-from-string json-str -;; (lambda () -;; (json-read)))) - ;;====================================================================== ;; ;; A C T U A L A P I C A L L S ;; ;;====================================================================== @@ -478,13 +448,10 @@ ;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host (define (rmt:get-latest-host-load hostname) (rmt:send-receive 'get-latest-host-load 0 (list hostname))) -;; (define (rmt:sync-inmem->db run-id) -;; (rmt:send-receive 'sync-inmem->db run-id '())) - (define (rmt:sdb-qry qry val run-id) ;; add caching if qry is 'getid or 'getstr (rmt:send-receive 'sdb-qry run-id (list qry val))) ;; NOT COMPLETED @@ -649,15 +616,10 @@ ;; run-id-list)))) (define (rmt:delete-test-records run-id test-id) (rmt:send-receive 'delete-test-records run-id (list run-id test-id))) -;; This is not needed as test steps are deleted on test delete call -;; -;; (define (rmt:delete-test-step-records run-id test-id) -;; (rmt:send-receive 'delete-test-step-records run-id (list run-id test-id))) - (define (rmt:test-set-state-status run-id test-id state status msg) (rmt:send-receive 'test-set-state-status run-id (list run-id test-id state status msg))) (define (rmt:test-toplevel-num-items run-id test-name) (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name))) @@ -696,13 +658,10 @@ (apply append (map (lambda (run-id) (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname))) run-ids)))) -;; (define (rmt:get-run-ids-matching keynames target res) -;; (rmt:send-receive #f 'get-run-ids-matching (list keynames target res))) - (define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f)) (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps))) (define (rmt:get-count-tests-running-for-run-id run-id fastmode) (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id fastmode))) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -1438,10 +1438,13 @@ ;; every time though the loop increment the test/itempatt val. ;; when the min is > max-allowed and none running then force exit ;; (define *max-tries-hash* (make-hash-table)) +(define (runs:pretty-long-list lst) + (if (> (length lst) 8)(append (take lst 3)(list "...")) lst)) + ;;====================================================================== ;; runs:run-tests-queue is called by runs:run-tests ;;====================================================================== ;; ;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > @@ -1618,11 +1621,11 @@ (runs:incremental-print-results run-id) (debug:print 4 *default-log-port* "TOP OF LOOP => " "test-name: " test-name "\n hed: " hed - "\n tal: " tal + "\n tal: " (runs:pretty-long-list tal) "\n reg: " reg "\n test-record " test-record "\n itemdat: " itemdat "\n items: " items "\n item-path: " item-path Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -626,10 +626,11 @@ (let ((legacy-sync (common:run-sync?)) (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) (debug-mode (debug:debug-mode 1)) (last-time (current-seconds)) (no-sync-db (db:open-no-sync-db)) + (stmt-cache (dbr:dbstruct-stmt-cache dbstruct)) (sync-duration 0) ;; run time of the sync in milliseconds ;;(this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x))) ) (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls (debug:print-info 2 *default-log-port* "Periodic sync thread started.") @@ -747,9 +748,9 @@ (begin (thread-sleep! 1) (delay-loop (+ count 1)))) (if (not *time-to-exit*) (loop)))) ;; time to exit, close the no-sync db here - (db:no-sync-close-db no-sync-db) + (db:no-sync-close-db no-sync-db stmt-cache) (if (common:low-noise-print 30) (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num)))))))