Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -6,11 +6,11 @@ ods.scm runconfig.scm server.scm configf.scm \ db.scm keys.scm margs.scm megatest-version.scm \ process.scm runs.scm tasks.scm tests.scm genexample.scm \ fs-transport.scm http-transport.scm \ client.scm gutils.scm synchash.scm daemon.scm mt.scm dcommon.scm \ - tree.scm ezsteps.scm + tree.scm ezsteps.scm lock-queue.scm GUISRCF = dashboard-tests.scm dashboard-guimonitor.scm OFILES = $(SRCFILES:%.scm=%.o) GOFILES = $(GUISRCF:%.scm=%.o) Index: ezsteps.scm ================================================================== --- ezsteps.scm +++ ezsteps.scm @@ -165,9 +165,9 @@ ;; need to update the top test record if PASS or FAIL and this is a subtest (if (not (equal? item-path "")) (cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path new-status)))) ;; for automated creation of the rollup html file this is a good place... (if (not (equal? item-path "")) - (tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no + (tests:summarize-items #f run-id test-id test-name #f)) ;; don't force - just update if no ))) (pop-directory) rollup-status)) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -1,7 +1,7 @@ -;; Copyright 2006-2012, Matthew Welland. +;; Copyright 2006-2013, Matthew Welland. ;; ;; This program is made available under the GNU GPL version 2.0 or ;; greater. See the accompanying file COPYING for details. ;; ;; This program is distributed WITHOUT ANY WARRANTY; without even the @@ -362,24 +362,19 @@ new-state new-status (args:get-arg "-m") #f) ;; need to update the top test record if PASS or FAIL and this is a subtest (if (not (equal? item-path "")) - (cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path new-status)) - - )) + (begin + (thread-sleep! 0.1) ;; give other processes an opportunity to access the db as rollup is lower priority + (cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path new-status))))) ;; for automated creation of the rollup html file this is a good place... (if (not (equal? item-path "")) - (tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no - ) + (tests:summarize-items #f run-id test-id test-name #f))) ;; don't force - just update if no (mutex-unlock! m) - ;; (exec-results (cmd-run->list fullrunscript)) ;; (list ">" (conc test-name "-run.log")))) - ;; (success exec-results)) ;; (eq? (cadr exec-results) 0))) (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n") - ;; (sqlite3:finalize! db) - ;; (sqlite3:finalize! tdb) (if (not (vector-ref exit-info 1)) (exit 4))))))) ;; set up the very basics needed for doing anything here. (define (setup-for-run) ADDED lock-queue.scm Index: lock-queue.scm ================================================================== --- /dev/null +++ lock-queue.scm @@ -0,0 +1,126 @@ +;; Copyright 2006-2013, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. + +;;====================================================================== +;; launch a task - this runs on the originating host, tests themselves +;; +;;====================================================================== + +(use sqlite3 srfi-18) +(import (prefix sqlite3 sqlite3:)) + +(declare (unit lock-queue)) +(declare (uses common)) + +;;====================================================================== +;; attempt to prevent overlapping updates of rollup files by queueing +;; update requests in an sqlite db +;;====================================================================== + +(define (lock-queue:open-db fname) + (let* ((actualfname (conc fname ".lockdb")) + (dbexists (file-exists? actualfname)) + (db (sqlite3:open-database actualfname)) + (handler (make-busy-timeout 3600))) + (if dbexists + db + (begin + (sqlite3:execute + db + "CREATE TABLE IF NOT EXISTS queue ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + start_time INTEGER, + state TEXT, + CONSTRAINT queue_constraint UNIQUE (test_id));") + (sqlite3:execute + db + "CREATE TABLE IF NOT EXISTS runlocks ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + run_lock TEXT, + CONSTRAINT runlock_constraint UNIQUE (run_lock));"))) + db)) + +(define (lock-queue:set-state db test-id newstate) + (sqlite3:execute db "UPDATE queue SET state=? WHERE test_id=?;" + newstate + test-id)) + +(define (lock-queue:any-younger? db mystart test-id) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (tid) + ;; Actually this should not be needed as mystart cannot be simultaneously less than and test-id same as + (if (not (equal? tid test-id)) + (set! res tid))) + db + "SELECT test_id FROM queue WHERE start_time > ?;" mystart) + res)) + +(define (lock-queue:get-lock db test-id) + (let ((res #f) + (lckqry (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';")) + (mklckqry (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');"))) + (handle-exceptions + exn + #f + (sqlite3:with-transaction + db + (lambda () + (sqlite3:for-each-row (lambda (tid lockstate) + (set! res (list tid lockstate))) + lckqry) + (if res + (if (equal? (car res) test-id) + #t ;; already have the lock + #f) + (begin + (sqlite3:execute mklckqry test-id) + ;; if no error handled then return #t for got the lock + #t))))))) + +(define (lock-queue:release-lock fname test-id) + (let ((db (lock-queue:open-db fname))) + (sqlite3:execute db "DELETE FROM runlocks WHERE test_id=?;" test-id) + (sqlite3:finalize! db))) + +;; returns #f if ok to skip the task +;; returns #t if ok to proceed with task +;; otherwise waits +;; +(define (lock-queue:wait-turn fname test-id) + (let ((db (lock-queue:open-db fname)) + (mystart (current-seconds))) + (sqlite3:execute + db + "INSERT OR REPLACE INTO queue (test_id,start_time,state) VALUES (?,?,'waiting');" + test-id mystart) + (thread-sleep! 1) ;; give other tests a chance to register + (let loop ((younger-waiting (lock-queue:any-younger? db mystart test-id))) + (if younger-waiting + (begin + ;; no need for us to wait. mark in the lock queue db as skipping + (lock-queue:set-state db test-id "skipping") + (begin + ;; (sqlite3:finalize! db) + #f)) ;; let the calling process know that nothing needs to be done + (if (lock-queue:get-lock db test-id) + (begin + ;; (sqlite3:finalize! db) + #t) + (if (> (- (current-seconds) mystart) 36000) ;; waited too long, steal the lock + (lock-queue:steal-lock db test-id) + (begin + (thread-sleep! 1) + (loop (lock-queue:any-younger? db mystart test-id))))))))) + + +;; (use trace) +;; (trace lock-queue:get-lock lock-queue:release-lock lock-queue:wait-turn lock-queue:any-younger? lock-queue:set-state) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -936,11 +936,11 @@ (if (args:get-arg "-set-toplog") ;; DO NOT run remote (tests:test-set-toplog! db run-id test-name (args:get-arg "-set-toplog"))) (if (args:get-arg "-summarize-items") ;; DO NOT run remote - (tests:summarize-items db run-id test-name #t)) ;; do force here + (tests:summarize-items db run-id test-id test-name #t)) ;; do force here (if (args:get-arg "-runstep") (if (null? remargs) (begin (debug:print 0 "ERROR: nothing specified to run!") (if db (sqlite3:finalize! db)) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -15,10 +15,11 @@ (use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking tcp directory-utils) (import (prefix sqlite3 sqlite3:)) (declare (unit tests)) +(declare (uses lock-queue)) (declare (uses db)) (declare (uses common)) (declare (uses items)) (declare (uses runconfig)) @@ -378,11 +379,11 @@ (define (tests:test-set-toplog! db run-id test-name logf) (cdb:client-call *runremote* 'tests:test-set-toplog #t 2 logf run-id test-name)) -(define (tests:summarize-items db run-id test-name force) +(define (tests:summarize-items db run-id test-id test-name force) ;; if not force then only update the record if one of these is true: ;; 1. logf is "log/final.log ;; 2. logf is same as outputfilename (let* ((outputfilename (conc "megatest-rollup-" test-name ".html")) (orig-dir (current-directory)) @@ -399,12 +400,13 @@ (debug:print 4 "summarize-items with logf " logf ", outputfilename " outputfilename " and force " force) (if (or (equal? logf "logs/final.log") (equal? logf outputfilename) force) (begin - (if (not (obtain-dot-lock outputfilename 1 5 7)) ;; retry every second for 20 seconds, call it dead after 30 seconds and steal the lock - (print "Failed to obtain lock for " outputfilename) + (if ;; (not (obtain-dot-lock outputfilename 1 5 7)) ;; retry every second for 20 seconds, call it dead after 30 seconds and steal the lock + (not (lock-queue:wait-turn outputfilename test-id)) + (print "Not updating " outputfilename " as another test item has signed up for the job") (begin (print "Obtained lock for " outputfilename) (let ((oup (open-output-file outputfilename)) (counts (make-hash-table)) (statecounts (make-hash-table)) @@ -461,10 +463,11 @@ (print "" "" outtxt "
ItemStateStatusComment
") (release-dot-lock outputfilename))) (close-output-port oup) + (lock-queue:release-lock outputfilename test-id) (change-directory orig-dir) ;; NB// tests:test-set-toplog! is remote internal... (tests:test-set-toplog! db run-id test-name outputfilename) )))))))