@@ -105,11 +105,14 @@ ;; any of the other stuff that tests:test-set-status! does. Let's just ;; force RUNNING/n/a ;; (thread-sleep! 0.3) ;; (tests:test-force-state-status! run-id test-id "RUNNING" "n/a") - (rmt:set-state-status-and-roll-up-items run-id test-name item-path "RUNNING" #f #f) + (common:soft-lock + (lambda () + (rmt:set-state-status-and-roll-up-items run-id test-name item-path "RUNNING" #f #f)) + "set-state-status-roll-up") ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here ;; if there is a runscript do it first (if fullrunscript (let ((pid (process-run fullrunscript))) @@ -1353,13 +1356,33 @@ (when do-scan? (debug:print 1 *default-log-port* "INFO: search and mark zombie tests") (rmt:set-var key (current-seconds)) (rmt:find-and-mark-incomplete run-id #f)))) - - - +;; weak locking - slow down concurrent activities but don't entirely stop them from +;; overlapping. wait for up to 5 seconds +;; +(define (common:soft-lock proc keyname #!key (wait-time 5)) + (let* ((start-time (current-seconds)) + (my-key (conc (current-process-id) "-" (get-host-name)))) + (let loop ((key-val (rmt:no-sync-get/default keyname #f))) + (if (and key-val + (not (equal? key-val my-key)) + (< (- (current-seconds) start-time) wait-time)) + (begin + (thread-sleep! 0.5) + (debug:print-info 0 *default-log-port* "Still trying to lock for " keyname) + (loop (current-seconds))) ;; try again + (begin ;; either we got the lock or we timed out - proceed as if + (rmt:no-sync-set keyname my-key) + (if (equal? (rmt:no-sync-get/default keyname #f) my-key) + (begin + (debug:print-info 0 *default-log-port* "Got lock for " keyname) + (proc) + (rmt:no-sync-del! keyname) + (debug:print-info 0 *default-log-port* "Released lock for " keyname)) + (common:soft-lock proc keyname wait-time: wait-time))))))) ;; 1. look though disks list for disk with most space ;; 2. create run dir on disk, path name is meaningful ;; 3. create link from run dir to megatest runs area ;; 4. remotely run the test on allocated host @@ -1466,11 +1489,14 @@ ;; prevent overlapping actions - set to LAUNCHED as early as possible ;; ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f) + (common:soft-lock + (lambda () + (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f)) + "set-state-status-roll-up") ;; (pp (hash-table->alist tconfig)) (set! diskpath (get-best-disk *configdat* tconfig)) (if diskpath (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) (set! work-area (car dat))