Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -164,10 +164,33 @@ (debug:print 0 "ERROR: received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn)) (print-call-chain (current-error-port)) #f) (read (open-input-string (base64:base64-decode instr)))) (read (open-input-string (z3:decode-buffer (base64:base64-decode instr)))))) + +;; dot-locking egg seems not to work, using this for now +;; if lock is older than expire-time then remove it and try again +;; to get the lock +;; +(define (common:simple-file-lock fname #!key (expire-time 300)) + (if (file-exists? fname) + (if (> (- (current-seconds)(file-modification-time fname)) expire-time) + (begin + (delete-file* fname) + (common:simple-file-lock fname expire-time: expire-time)) + #f) + (let ((key-string (conc (get-host-name) "-" (current-process-id)))) + (with-output-to-file fname + (lambda () + (print key-string))) + (thread-sleep! 0.25) + (with-input-from-file fname + (lambda () + (equal? key-string (read-line))))))) + +(define (common:simple-file-release-lock fname) + (delete-file* fname)) ;;====================================================================== ;; S T A T E S A N D S T A T U S E S ;;====================================================================== Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -142,12 +142,15 @@ ;; NB// #f => zeroth db with name=main.db ;; (define (db:dbfile-path run-id) (let* (;; (toppath (dbr:dbstruct-get-path dbstruct)) (link-tree-path (configf:lookup *configdat* "setup" "linktree")) + (dbpath (configf:lookup *configdat* "setup" "dbdir")) (fname (if (eq? run-id 0) "main.db" (conc run-id ".db"))) - (dbdir (conc link-tree-path "/.db/"))) + (dbdir (if dbpath + dbpath + (conc link-tree-path "/.db/")))) (handle-exceptions exn (begin (debug:print 0 "ERROR: Couldn't create path to " dbdir) (exit 1)) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -435,13 +435,13 @@ (args:get-arg "-m") #f) ;; need to update the top test record if PASS or FAIL and this is a subtest ;; NO NEED TO CALL roll-up-pass-fail-counts HERE, THIS IS DONE IN roll-up-pass-fail-counts called by tests:test-set-status! )) ;; for automated creation of the rollup html file this is a good place... - (if (and (not (equal? item-path "")) - (< (random (rmt:get-count-tests-running-for-testname run-id test-name)) 5)) - (tests:summarize-items run-id test-id test-name #f)) + ;; (if (and (not (equal? item-path "")) + ;; (< (random (rmt:get-count-tests-running-for-testname run-id test-name)) 5)) + (tests:summarize-items run-id test-id test-name #f) (tests:summarize-test run-id test-id)) ;; don't force - just update if no (mutex-unlock! m) (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n") (if (not (vector-ref exit-info 1)) Index: lock-queue.scm ================================================================== --- lock-queue.scm +++ lock-queue.scm @@ -119,18 +119,18 @@ (mklckqry (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');"))) (let ((result (handle-exceptions exn (begin - (debug:print 0 "WARNING: failed to get queue lock. Will try again in a few seconds") + (debug:print 0 "WARNING: failed to get queue lock. Removing lock db and returning fail") ;; Will try again in a few seconds") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (thread-sleep! 10) - (if (> count 0) - (lock-queue:get-lock dbdat test-id count: (- count 1)) - (begin ;; never recovered, remote the lock file and return #f, no lock obtained - (lock-queue:delete-lock-db dbdat) - #f))) + ;; (if (> count 0) + ;; #f ;; (lock-queue:get-lock dbdat test-id count: (- count 1)) - give up on retries + ;; (begin ;; never recovered, remote the lock file and return #f, no lock obtained + (lock-queue:delete-lock-db dbdat) + #f) (sqlite3:with-transaction db (lambda () (sqlite3:for-each-row (lambda (tid lockstate) (set! res (list tid lockstate))) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -316,85 +316,93 @@ (debug:print 0 "ERROR: summarize-items for run-id=" run-id ", test-name=" test-name ", no such path: " path)) (debug:print 4 "summarize-items with logf " logf ", outputfilename " outputfilename " and force " force) (if (or (equal? logf "logs/final.log") (equal? logf outputfilename) force) - (begin - (if (not (lock-queue:wait-turn outputfilename test-id)) - (print "Not updating " outputfilename " as another test item has signed up for the job") - (begin - (print "Obtained lock for " outputfilename) - (let ((oup (open-output-file outputfilename)) - (counts (make-hash-table)) - (statecounts (make-hash-table)) - (outtxt "") - (tot 0) - (testdat (rmt:test-get-records-for-index-file run-id test-name))) - (with-output-to-port - oup - (lambda () - (set! outtxt (conc outtxt "Summary: " test-name - "

Summary for " test-name "

")) - (for-each - (lambda (testrecord) - (let ((id (vector-ref testrecord 0)) - (itempath (vector-ref testrecord 1)) - (state (vector-ref testrecord 2)) - (status (vector-ref testrecord 3)) - (run_duration (vector-ref testrecord 4)) - (logf (vector-ref testrecord 5)) - (comment (vector-ref testrecord 6))) - (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0))) - (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0))) - (set! outtxt (conc outtxt "" - ;; " " itempath "" - " " itempath "" - "" state "" - "" status "" - "" (if (equal? comment "") - " " - comment) "" - "")))) - (if (list? testdat) - testdat - (begin - (print "ERROR: failed to get records with rmt:test-get-records-for-index-file run-id=" run-id "test-name=" test-name) - '()))) - - (print "
") - ;; Print out stats for status - (set! tot 0) - (print "") - (for-each (lambda (state) - (set! tot (+ tot (hash-table-ref statecounts state))) - (print "")) - (hash-table-keys statecounts)) - (print "

State stats

" state "" (hash-table-ref statecounts state) "
Total" tot "
") - (print "
") - ;; Print out stats for state - (set! tot 0) - (print "") - (for-each (lambda (status) - (set! tot (+ tot (hash-table-ref counts status))) - (print "")) - (hash-table-keys counts)) - (print "

Status stats

" status - "" (hash-table-ref counts status) "
Total" tot "
") - (print "
") - - (print "" - "" - outtxt "
ItemStateStatusComment
") - ;; (release-dot-lock outputfilename) - )) - (close-output-port oup) - (lock-queue:release-lock outputfilename test-id) + (let ((my-start-time (current-seconds)) + (lockf (conc outputfilename ".lock"))) + (let loop ((have-lock (common:simple-file-lock lockf))) + (if have-lock + (begin + (print "Obtained lock for " outputfilename) + (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename) + (common:simple-file-release-lock lockf) (change-directory orig-dir) ;; NB// tests:test-set-toplog! is remote internal... - (tests:test-set-toplog! run-id test-name outputfilename) - ))))))) + (tests:test-set-toplog! run-id test-name outputfilename)) + ;; didn't get the lock, check to see if current update started later than this + ;; update, if so we can exit without doing any work + (if (> my-start-time (file-modification-time lockf)) + ;; we started since current re-gen in flight, delay a little and try again + (begin + (debug:print-info 1 "Waiting to update " outputfilename ", another test currently updating it") + (thread-sleep! (+ 5 (random 5))) ;; delay between 5 and 10 seconds + (loop (common:simple-file-lock lockf)))))))))) + +(define (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename) + (let ((counts (make-hash-table)) + (statecounts (make-hash-table)) + (outtxt "") + (tot 0) + (testdat (rmt:test-get-records-for-index-file run-id test-name))) + (with-output-to-file outputfilename + (lambda () + (set! outtxt (conc outtxt "Summary: " test-name + "

Summary for " test-name "

")) + (for-each + (lambda (testrecord) + (let ((id (vector-ref testrecord 0)) + (itempath (vector-ref testrecord 1)) + (state (vector-ref testrecord 2)) + (status (vector-ref testrecord 3)) + (run_duration (vector-ref testrecord 4)) + (logf (vector-ref testrecord 5)) + (comment (vector-ref testrecord 6))) + (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0))) + (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0))) + (set! outtxt (conc outtxt "" + ;; " " itempath "" + " " itempath "" + "" state "" + "" status "" + "" (if (equal? comment "") + " " + comment) "" + "")))) + (if (list? testdat) + testdat + (begin + (print "ERROR: failed to get records with rmt:test-get-records-for-index-file run-id=" run-id "test-name=" test-name) + '()))) + + (print "
") + ;; Print out stats for status + (set! tot 0) + (print "") + (for-each (lambda (state) + (set! tot (+ tot (hash-table-ref statecounts state))) + (print "")) + (hash-table-keys statecounts)) + (print "

State stats

" state "" (hash-table-ref statecounts state) "
Total" tot "
") + (print "
") + ;; Print out stats for state + (set! tot 0) + (print "") + (for-each (lambda (status) + (set! tot (+ tot (hash-table-ref counts status))) + (print "")) + (hash-table-keys counts)) + (print "

Status stats

" status + "" (hash-table-ref counts status) "
Total" tot "
") + (print "
") + + (print "" + "" + outtxt "
ItemStateStatusComment
") + ;; (release-dot-lock outputfilename) + )))) ;; CHECK - WAS THIS ADDED OR REMOVED? MANUAL MERGE WITH API STUFF!!! ;; ;; get a pretty table to summarize steps ;; Index: tests/fullrun/megatest.config ================================================================== --- tests/fullrun/megatest.config +++ tests/fullrun/megatest.config @@ -27,14 +27,15 @@ # this may save a few milliseconds on launching tests # launchwait no waivercommentpatt ^WW\d+ [a-z].* incomplete-timeout 1 -# yes, anything else is no -run-wait yes +# set the dbdir, default is linktree +dbdir #{getenv MT_RUN_AREA_HOME}/db/ - +# wait for runs to completely complete. yes, anything else is no +run-wait yes # If set to "default" the old code is used. Otherwise defaults to 200 or uses # numeric value given. # runqueue 20