Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -164,10 +164,33 @@ (debug:print 0 "ERROR: received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn)) (print-call-chain (current-error-port)) #f) (read (open-input-string (base64:base64-decode instr)))) (read (open-input-string (z3:decode-buffer (base64:base64-decode instr)))))) + +;; dot-locking egg seems not to work, using this for now +;; if lock is older than expire-time then remove it and try again +;; to get the lock +;; +(define (common:simple-file-lock fname #!key (expire-time 300)) + (if (file-exists? fname) + (if (> (- (current-seconds)(file-modification-time fname)) expire-time) + (begin + (delete-file* fname) + (common:simple-file-lock fname expire-time: expire-time)) + #f) + (let ((key-string (conc (get-host-name) "-" (current-process-id)))) + (with-output-to-file fname + (lambda () + (print key-string))) + (thread-sleep! 0.25) + (with-input-from-file fname + (lambda () + (equal? key-string (read-line))))))) + +(define (common:simple-file-release-lock fname) + (delete-file* fname)) ;;====================================================================== ;; S T A T E S A N D S T A T U S E S ;;====================================================================== Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -401,11 +401,12 @@ ;;====================================================================== ;; ;;====================================================================== (define (examine-test run-id test-id) ;; run-id run-key origtest) (let* ((db-path (db:dbfile-path run-id)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/db/" run-id ".db")) - (dbstruct (make-dbr:dbstruct path: (configf:lookup *configdat* "setup" "linktree") local: #t)) + (dbstruct (make-dbr:dbstruct path: (db:dbfile-path #f) ;; (configf:lookup *configdat* "setup" "linktree") + local: #t)) (testdat (db:get-test-info-by-id dbstruct run-id test-id)) (db-mod-time 0) ;; (file-modification-time db-path)) (last-update 0) ;; (current-seconds)) (request-update #t)) (if (not testdat) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -85,11 +85,11 @@ (if (not (launch:setup-for-run)) (begin (print "Failed to find megatest.config, exiting") (exit 1))) -(define *dbdir* (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) +(define *dbdir* (db:dbfile-path #f)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) (define *dbstruct-local* (make-dbr:dbstruct path: *dbdir* local: #t)) (define *db-file-path* (db:dbfile-path 0)) ;; HACK ALERT: this is a hack, please fix. Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -137,24 +137,32 @@ ;; ;; ;; (define (db:get-path dbstruct id) ;; (let ((fdb (db:get-filedb dbstruct))) ;; (filedb:get-path db id))) -;; NB// #f => zeroth db with name=main.db +;; NB// #f => return dbdir only +;; (was planned to be; zeroth db with name=main.db) ;; (define (db:dbfile-path run-id) (let* (;; (toppath (dbr:dbstruct-get-path dbstruct)) (link-tree-path (configf:lookup *configdat* "setup" "linktree")) - (fname (if (eq? run-id 0) "main.db" (conc run-id ".db"))) - (dbdir (conc link-tree-path "/.db/"))) + (dbpath (configf:lookup *configdat* "setup" "dbdir")) + (fname (if run-id + (if (eq? run-id 0) "main.db" (conc run-id ".db")) + #f)) + (dbdir (if dbpath + dbpath + (conc link-tree-path "/.db/")))) (handle-exceptions exn (begin (debug:print 0 "ERROR: Couldn't create path to " dbdir) (exit 1)) (if (not (directory? dbdir))(create-directory dbdir #t))) - (conc dbdir fname))) + (if fname + (conc dbdir fname) + dbdir))) (define (db:set-sync db) (let ((syncprag (configf:lookup *configdat* "setup" "sychronous"))) (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 1) ";")))) @@ -266,11 +274,11 @@ dbdat)))) ;; Make the dbstruct, setup up auxillary db's and call for main db at least once ;; (define (db:setup run-id #!key (local #f)) - (let* ((dbdir (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) + (let* ((dbdir (db:dbfile-path #f)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) (dbstruct (make-dbr:dbstruct path: dbdir local: local))) dbstruct)) ;; Open the classic megatest.db file in toppath ;; Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -435,13 +435,13 @@ (args:get-arg "-m") #f) ;; need to update the top test record if PASS or FAIL and this is a subtest ;; NO NEED TO CALL roll-up-pass-fail-counts HERE, THIS IS DONE IN roll-up-pass-fail-counts called by tests:test-set-status! )) ;; for automated creation of the rollup html file this is a good place... - (if (and (not (equal? item-path "")) - (< (random (rmt:get-count-tests-running-for-testname run-id test-name)) 5)) - (tests:summarize-items run-id test-id test-name #f)) + ;; (if (and (not (equal? item-path "")) + ;; (< (random (rmt:get-count-tests-running-for-testname run-id test-name)) 5)) + (tests:summarize-items run-id test-id test-name #f) (tests:summarize-test run-id test-id)) ;; don't force - just update if no (mutex-unlock! m) (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n") (if (not (vector-ref exit-info 1)) Index: lock-queue.scm ================================================================== --- lock-queue.scm +++ lock-queue.scm @@ -119,18 +119,18 @@ (mklckqry (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');"))) (let ((result (handle-exceptions exn (begin - (debug:print 0 "WARNING: failed to get queue lock. Will try again in a few seconds") + (debug:print 0 "WARNING: failed to get queue lock. Removing lock db and returning fail") ;; Will try again in a few seconds") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (thread-sleep! 10) - (if (> count 0) - (lock-queue:get-lock dbdat test-id count: (- count 1)) - (begin ;; never recovered, remote the lock file and return #f, no lock obtained - (lock-queue:delete-lock-db dbdat) - #f))) + ;; (if (> count 0) + ;; #f ;; (lock-queue:get-lock dbdat test-id count: (- count 1)) - give up on retries + ;; (begin ;; never recovered, remote the lock file and return #f, no lock obtained + (lock-queue:delete-lock-db dbdat) + #f) (sqlite3:with-transaction db (lambda () (sqlite3:for-each-row (lambda (tid lockstate) (set! res (list tid lockstate))) Index: newdashboard.scm ================================================================== --- newdashboard.scm +++ newdashboard.scm @@ -83,11 +83,11 @@ ;; ease debugging by loading ~/.dashboardrc (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.dashboardrc"))) (if (file-exists? debugcontrolf) (load debugcontrolf))) -(define *dbdir* (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) +(define *dbdir* (db:dbfile-path #f)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) (define *dbstruct-local* (make-dbr:dbstruct path: *dbdir* local: #t)) (define *db-file-path* (db:dbfile-path 0)) ;; HACK ALERT: this is a hack, please fix. Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -222,11 +222,11 @@ res)) (define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) (let* ((dbstruct-local (if *dbstruct-db* *dbstruct-db* - (let* ((dbdir (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) + (let* ((dbdir (db:dbfile-path #f)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) (db (make-dbr:dbstruct path: dbdir local: #t))) (set! *dbstruct-db* db) db))) (db-file-path (db:dbfile-path 0)) ;; (read-only (not (file-read-access? db-file-path))) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -516,11 +516,11 @@ ;; (define (tasks:start-monitor db mdb) (if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more (debug:print-info 1 "Not starting monitor, already have more than two running") (let* ((megatestdb (conc *toppath* "/megatest.db")) - (monitordbf (conc (configf:lookup *configdat* "setup" "linktree") "/.db/monitor.db")) + (monitordbf (conc (db:dbfile-path #f) "/monitor.db")) (last-db-update 0)) ;; (file-modification-time megatestdb))) (task:register-monitor mdb) (let loop ((count 0) (next-touch 0)) ;; next-touch is the time where we need to update last_update ;; if the db has been modified we'd best look at the task queue Index: tdb.scm ================================================================== --- tdb.scm +++ tdb.scm @@ -45,26 +45,34 @@ (define (open-test-db work-area) (debug:print-info 11 "open-test-db " work-area) (if (and work-area (directory? work-area) (file-read-access? work-area)) - (let* ((dbpath (conc work-area "/testdat.db")) - (tdb-writeable (file-write-access? dbpath)) - (dbexists (file-exists? dbpath)) + (let* ((dbpath (conc work-area "/testdat.db")) + (dbexists (file-exists? dbpath)) + (work-area-writeable (file-write-access? work-area)) + (db (handle-exceptions ;; open the db if area writeable or db pre-existing. open in-mem otherwise. if exception, open in-mem + exn + (begin + (print-call-chain (current-error-port)) + (debug:print 2 "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test" + ((condition-property-accessor 'exn 'message) exn)) + (set! dbexists #f) ;; must force re-creation of tables, more tom-foolery + (sqlite3:open-database ":memory:")) ;; open an in-memory db to allow readonly access + (if (or work-area-writeable + dbexists) + (sqlite3:open-database dbpath) + (sqlite3:open-database ":memory:")))) + (tdb-writeable (and (file-write-access? work-area) + (file-write-access? dbpath))) (handler (make-busy-timeout (if (args:get-arg "-override-timeout") (string->number (args:get-arg "-override-timeout")) 136000)))) - (handle-exceptions - exn - (begin - (print-call-chain (current-error-port)) - (debug:print 2 "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test" - ((condition-property-accessor 'exn 'message) exn)) - (set! db (sqlite3:open-database ":memory:")) ;; open an in-memory db to allow readonly access - (set! dbexists #f)) ;; must force re-creation of tables, more tom-foolery - (set! db (sqlite3:open-database dbpath))) - (if *db-write-access* (sqlite3:set-busy-handler! db handler)) + + (if (and tdb-writeable + *db-write-access*) + (sqlite3:set-busy-handler! db handler)) (if (not dbexists) (begin (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = FULL;") (debug:print-info 11 "Initialized test database " dbpath) (tdb:testdb-initialize db))) @@ -81,15 +89,16 @@ #f) ;; Is there a cheaper single line operation that will check for existance of a table ;; and raise an exception ? (sqlite3:execute db "SELECT id FROM test_data LIMIT 1;")) db) + ;; no work-area or not readable - create a placeholder to fake rest of world out (let ((baddb (sqlite3:open-database ":memory:"))) - (debug:print-info 11 "open-test-db END (unsucessful)" work-area) - ;; provide an in-mem db (this is dangerous!) - (tdb:testdb-initialize baddb) - baddb))) + (debug:print-info 11 "open-test-db END (unsucessful)" work-area) + ;; provide an in-mem db (this is dangerous!) + (tdb:testdb-initialize baddb) + baddb))) ;; find and open the testdat.db file for an existing test (define (tdb:open-test-db-by-test-id test-id #!key (work-area #f)) (let* ((test-path (if work-area work-area Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -316,85 +316,93 @@ (debug:print 0 "ERROR: summarize-items for run-id=" run-id ", test-name=" test-name ", no such path: " path)) (debug:print 4 "summarize-items with logf " logf ", outputfilename " outputfilename " and force " force) (if (or (equal? logf "logs/final.log") (equal? logf outputfilename) force) - (begin - (if (not (lock-queue:wait-turn outputfilename test-id)) - (print "Not updating " outputfilename " as another test item has signed up for the job") - (begin - (print "Obtained lock for " outputfilename) - (let ((oup (open-output-file outputfilename)) - (counts (make-hash-table)) - (statecounts (make-hash-table)) - (outtxt "") - (tot 0) - (testdat (rmt:test-get-records-for-index-file run-id test-name))) - (with-output-to-port - oup - (lambda () - (set! outtxt (conc outtxt "Summary: " test-name - "

Summary for " test-name "

")) - (for-each - (lambda (testrecord) - (let ((id (vector-ref testrecord 0)) - (itempath (vector-ref testrecord 1)) - (state (vector-ref testrecord 2)) - (status (vector-ref testrecord 3)) - (run_duration (vector-ref testrecord 4)) - (logf (vector-ref testrecord 5)) - (comment (vector-ref testrecord 6))) - (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0))) - (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0))) - (set! outtxt (conc outtxt "" - ;; " " itempath "" - " " itempath "" - "" state "" - "" status "" - "" (if (equal? comment "") - " " - comment) "" - "")))) - (if (list? testdat) - testdat - (begin - (print "ERROR: failed to get records with rmt:test-get-records-for-index-file run-id=" run-id "test-name=" test-name) - '()))) - - (print "
") - ;; Print out stats for status - (set! tot 0) - (print "") - (for-each (lambda (state) - (set! tot (+ tot (hash-table-ref statecounts state))) - (print "")) - (hash-table-keys statecounts)) - (print "

State stats

" state "" (hash-table-ref statecounts state) "
Total" tot "
") - (print "
") - ;; Print out stats for state - (set! tot 0) - (print "") - (for-each (lambda (status) - (set! tot (+ tot (hash-table-ref counts status))) - (print "")) - (hash-table-keys counts)) - (print "

Status stats

" status - "" (hash-table-ref counts status) "
Total" tot "
") - (print "
") - - (print "" - "" - outtxt "
ItemStateStatusComment
") - ;; (release-dot-lock outputfilename) - )) - (close-output-port oup) - (lock-queue:release-lock outputfilename test-id) + (let ((my-start-time (current-seconds)) + (lockf (conc outputfilename ".lock"))) + (let loop ((have-lock (common:simple-file-lock lockf))) + (if have-lock + (begin + (print "Obtained lock for " outputfilename) + (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename) + (common:simple-file-release-lock lockf) (change-directory orig-dir) ;; NB// tests:test-set-toplog! is remote internal... - (tests:test-set-toplog! run-id test-name outputfilename) - ))))))) + (tests:test-set-toplog! run-id test-name outputfilename)) + ;; didn't get the lock, check to see if current update started later than this + ;; update, if so we can exit without doing any work + (if (> my-start-time (file-modification-time lockf)) + ;; we started since current re-gen in flight, delay a little and try again + (begin + (debug:print-info 1 "Waiting to update " outputfilename ", another test currently updating it") + (thread-sleep! (+ 5 (random 5))) ;; delay between 5 and 10 seconds + (loop (common:simple-file-lock lockf)))))))))) + +(define (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename) + (let ((counts (make-hash-table)) + (statecounts (make-hash-table)) + (outtxt "") + (tot 0) + (testdat (rmt:test-get-records-for-index-file run-id test-name))) + (with-output-to-file outputfilename + (lambda () + (set! outtxt (conc outtxt "Summary: " test-name + "

Summary for " test-name "

")) + (for-each + (lambda (testrecord) + (let ((id (vector-ref testrecord 0)) + (itempath (vector-ref testrecord 1)) + (state (vector-ref testrecord 2)) + (status (vector-ref testrecord 3)) + (run_duration (vector-ref testrecord 4)) + (logf (vector-ref testrecord 5)) + (comment (vector-ref testrecord 6))) + (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0))) + (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0))) + (set! outtxt (conc outtxt "" + ;; " " itempath "" + " " itempath "" + "" state "" + "" status "" + "" (if (equal? comment "") + " " + comment) "" + "")))) + (if (list? testdat) + testdat + (begin + (print "ERROR: failed to get records with rmt:test-get-records-for-index-file run-id=" run-id "test-name=" test-name) + '()))) + + (print "
") + ;; Print out stats for status + (set! tot 0) + (print "") + (for-each (lambda (state) + (set! tot (+ tot (hash-table-ref statecounts state))) + (print "")) + (hash-table-keys statecounts)) + (print "

State stats

" state "" (hash-table-ref statecounts state) "
Total" tot "
") + (print "
") + ;; Print out stats for state + (set! tot 0) + (print "") + (for-each (lambda (status) + (set! tot (+ tot (hash-table-ref counts status))) + (print "")) + (hash-table-keys counts)) + (print "

Status stats

" status + "" (hash-table-ref counts status) "
Total" tot "
") + (print "
") + + (print "" + "" + outtxt "
ItemStateStatusComment
") + ;; (release-dot-lock outputfilename) + )))) ;; CHECK - WAS THIS ADDED OR REMOVED? MANUAL MERGE WITH API STUFF!!! ;; ;; get a pretty table to summarize steps ;; Index: tests/fullrun/megatest.config ================================================================== --- tests/fullrun/megatest.config +++ tests/fullrun/megatest.config @@ -27,14 +27,15 @@ # this may save a few milliseconds on launching tests # launchwait no waivercommentpatt ^WW\d+ [a-z].* incomplete-timeout 1 -# yes, anything else is no -run-wait yes +# set the dbdir, default is linktree +dbdir #{getenv MT_RUN_AREA_HOME}/db/ - +# wait for runs to completely complete. yes, anything else is no +run-wait yes # If set to "default" the old code is used. Otherwise defaults to 200 or uses # numeric value given. # runqueue 20 Index: tests/unittests/basicserver.scm ================================================================== --- tests/unittests/basicserver.scm +++ tests/unittests/basicserver.scm @@ -92,11 +92,11 @@ ;; (test #f 1 (rmt:register-run keyvals runname "new" "n/a" user)) ;; (test #f #f (rmt:get-runs-by-patt keys runname)) (test #f #t (rmt:general-call 'register-test run-id run-id "test-one" "")) (define test-one-id #f) -(test #f 1 (let ((test-id (rmt:get-test-id run-id "test-one" ""))) +(test #f 30001 (let ((test-id (rmt:get-test-id run-id "test-one" ""))) (set! test-one-id test-id) test-id)) (define test-one-rec #f) (test #f "test-one" (let ((test-rec (rmt:get-test-info-by-id run-id test-one-id))) (set! test-one-rec test-rec)