Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -214,10 +214,11 @@ ;; NO SYNC DB ((no-sync-set) (apply db:no-sync-set *no-sync-db* params)) ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params)) ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params)) + ((no-sync-get-lock) (apply db:no-sync-get-lock *no-sync-db* params)) ;; ARCHIVES ;; ((archive-get-allocations) ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -2071,11 +2071,17 @@ (number->string x 16)) (map string->number (string-split instr))) "/")) -(define (common:faux-lock keyname #!key (wait-time 8)) +;;====================================================================== +;; L O C K I N G M E C H A N I S M S +;;====================================================================== + +;; faux-lock is deprecated. Please use simple-lock below +;; +(define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t)) (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count (if (> wait-time 0) (begin (thread-sleep! 1) (if (eq? wait-time 1) ;; only one second left, steal the lock @@ -2093,11 +2099,19 @@ (begin (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname)) #t) #f)) - +;; simple lock. improve and converge on this one. +;; +(define (common:simple-lock keyname) + (rmt:no-sync-get-lock keyname)) + +;;====================================================================== +;; +;;====================================================================== + (define (common:in-running-test?) (and (args:get-arg "-execute") (get-environment-variable "MT_CMDINFO"))) (define (common:get-color-from-status status) (cond Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -1891,10 +1891,29 @@ res)) res))) (define (db:no-sync-close-db db) (db:safely-close-sqlite3-db db)) + +;; transaction protected lock aquisition +;; either: +;; fails returns (#f . lock-creation-time) +;; succeeds (returns (#t . lock-creation-time) +;; use (db:no-sync-del! db keyname) to release the lock +;; +(define (db:no-sync-get-lock db keyname) + (sqlite3:with-transaction + (db:no-sync-db db) + (lambda () + (handle-exceptions + exn + (let ((lock-time (current-seconds))) + (sqlite3:execute db "INSERT INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time) + `(#t . ,lock-time)) + `(#f . ,(sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname)))))) + + ;; use a global for some primitive caching, it is just silly to ;; re-read the db over and over again for the keys since they never ;; change Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -1238,11 +1238,24 @@ ;; - could be ssh to host from hosts table (update regularly with load) ;; - could be netbatch ;; (launch-test db (cadr status) test-conf)) (define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex - (let* ((item-path (item-list->path itemdat)) + (let* ((lock-key (conc "test-" test-id)) + (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) + (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds + (if (car lock) + #t + (if (> (current-seconds) expire-time) + (begin + (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path) + (rmt:no-sync-del! lock-key) ;; destroy the lock + (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; + (begin + (thread-sleep! 1) + (loop (rmt:no-sync-get-lock lock-key) expire-time)))))) + (item-path (item-list->path itemdat)) (contour #f)) ;; NOT READY FOR THIS (args:get-arg "-contour"))) (let loop ((delta (- (current-seconds) *last-launch*)) (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1))) (if (> launch-delay delta) (begin @@ -1414,10 +1427,11 @@ (car fullcmd)) (if useshell '() (cdr fullcmd))))) (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. + (rmt:no-sync-del! lock-key) ;; release the lock for starting this test (if (not launchwait) ;; give the OS a little time to allow the process to start (thread-sleep! 0.01)) (with-output-to-file "mt_launch.log" (lambda () (print "LAUNCHCMD: " (string-intersperse fullcmd " ")) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -867,10 +867,13 @@ (rmt:send-receive 'no-sync-get/default #f `(,var ,default))) (define (rmt:no-sync-del! var) (rmt:send-receive 'no-sync-del! #f `(,var))) +(define (rmt:no-sync-get-lock keyname) + (rmt:send-receive 'no-sync-get-lock #f `(,keyname))) + ;;====================================================================== ;; A R C H I V E S ;;====================================================================== (define (rmt:archive-get-allocations testname itempath dneeded)