Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -1238,23 +1238,23 @@ ;; - could be ssh to host from hosts table (update regularly with load) ;; - could be netbatch ;; (launch-test db (cadr status) test-conf)) (define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex - (let* ((lock-key (conc "test-" test-id)) - (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) - (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds - (if (car lock) - #t - (if (> (current-seconds) expire-time) - (begin - (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path) - (rmt:no-sync-del! lock-key) ;; destroy the lock - (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; - (begin - (thread-sleep! 1) - (loop (rmt:no-sync-get-lock lock-key) expire-time)))))) + (let* ( ;; (lock-key (conc "test-" test-id)) + ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) + ;; (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds + ;; (if (car lock) + ;; #t + ;; (if (> (current-seconds) expire-time) + ;; (begin + ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path) + ;; (rmt:no-sync-del! lock-key) ;; destroy the lock + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; + ;; (begin + ;; (thread-sleep! 1) + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)))))) (item-path (item-list->path itemdat)) (contour #f)) ;; NOT READY FOR THIS (args:get-arg "-contour"))) (let loop ((delta (- (current-seconds) *last-launch*)) (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1))) (if (> launch-delay delta) @@ -1427,11 +1427,11 @@ (car fullcmd)) (if useshell '() (cdr fullcmd))))) (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. - (rmt:no-sync-del! lock-key) ;; release the lock for starting this test + ;; (rmt:no-sync-del! lock-key) ;; release the lock for starting this test (if (not launchwait) ;; give the OS a little time to allow the process to start (thread-sleep! 0.01)) (with-output-to-file "mt_launch.log" (lambda () (print "LAUNCHCMD: " (string-intersperse fullcmd " ")) Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -1,7 +1,7 @@ ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. (declare (unit megatest-version)) -(define megatest-version 1.6424) +(define megatest-version 1.6425) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -1963,23 +1963,24 @@ ;; (resolve-pathname run-dir) (common:nice-path run-dir) #f)) (clean-mode (or mode 'remove-all)) (test-id (db:test-get-id test)) - (lock-key (conc "test-" test-id)) - (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) - (expire-time (+ (current-seconds) 30))) ;; give up on getting the lock and steal it after 15 seconds - (if (car lock) - #t - (if (> (current-seconds) expire-time) - (begin - (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to clean test with id " test-id) - (rmt:no-sync-del! lock-key) ;; destroy the lock - (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; - (begin - (thread-sleep! 1) - (loop (rmt:no-sync-get-lock lock-key) expire-time))))))) + ;; (lock-key (conc "test-" test-id)) + ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) + ;; (expire-time (+ (current-seconds) 30))) ;; give up on getting the lock and steal it after 15 seconds + ;; (if (car lock) + ;; #t + ;; (if (> (current-seconds) expire-time) + ;; (begin + ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to clean test with id " test-id) + ;; (rmt:no-sync-del! lock-key) ;; destroy the lock + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; + ;; (begin + ;; (thread-sleep! 1) + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time))))))) + ) (case clean-mode ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f)) ((remove-all) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f)) ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f))) (debug:print-info 1 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) @@ -2016,11 +2017,12 @@ ;; Only delete the records *after* removing the directory. If things fail we have a record (case clean-mode ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f)) ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f)) (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test)))) - (rmt:no-sync-del! lock-key))) + ;; (rmt:no-sync-del! lock-key) + )) ;;====================================================================== ;; Routines for manipulating runs ;;======================================================================