Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -165,10 +165,12 @@ (define-inline (dboard:uidat-get-keycol vec)(vector-ref vec 0)) (define-inline (dboard:uidat-get-lftcol vec)(vector-ref vec 1)) (define-inline (dboard:uidat-get-header vec)(vector-ref vec 2)) (define-inline (dboard:uidat-get-runsvec vec)(vector-ref vec 3)) + +(if (get-environment-variable "MT_RUN_AREA_HOME")(change-directory (get-environment-variable "MT_RUN_AREA_HOME"))) (define (message-window msg) (iup:show (iup:dialog (iup:vbox Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -141,11 +141,11 @@ (db (sqlite3:open-database dbpath)) (olddb (db:open-megatest-db)) (write-access (file-write-access? dbpath)) (handler (make-busy-timeout 136000))) (if (and dbexists (not write-access)) - (set! *db-write-access* #f)) ;; only unset so other db's also can use this control + (set! *db-write-access* #f)) ;; only unset so other db's lso can use this control (if write-access (begin (if (not dbexists) (begin (db:initialize-run-id-db db) @@ -480,23 +480,30 @@ #f)) (define (open-run-close-exception-handling proc idb . params) (handle-exceptions exn - (begin - (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.") - (debug:print 0 " " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain) - (thread-sleep! (random 120)) - (debug:print-info 0 "trying db call one more time....") - (apply open-run-close-no-exception-handling proc idb params)) + (let ((sleep-time (random 30)) + (err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) + (case err-status + ((busy) + (thread-sleep! sleep-time)) + (else + (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (print "exn=" (condition->list exn)) + (debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn)) + (print-call-chain) + (thread-sleep! sleep-time) + (debug:print-info 0 "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up"))) + (apply open-run-close-exception-handling proc idb params)) (apply open-run-close-no-exception-handling proc idb params))) ;; (define open-run-close -(define open-run-close ;; (if (debug:debug-mode 2) +(define open-run-close open-run-close-exception-handling) ;; open-run-close-no-exception-handling - open-run-close-exception-handling) + ;; open-run-close-exception-handling) ;;) (define (db:initialize-main-db db) (let* ((configdat (car *configinfo*)) ;; tut tut, global warning... (keys (keys:config-get-fields configdat)) @@ -611,10 +618,12 @@ comment TEXT DEFAULT '', status TEXT DEFAULT 'n/a', type TEXT DEFAULT '', CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));") (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_rundat ( + ;; Why use FULL here? This data is not that critical + ;; (sqlite3:execute db "PRAGMA synchronous = FULL;") id INTEGER PRIMARY KEY, test_id INTEGER, update_time TIMESTAMP, cpuload INTEGER DEFAULT -1, diskfree INTEGER DEFAULT -1, @@ -1976,10 +1985,35 @@ (hash-table-set! tests-hash full-testname testdat)))) results) (if (null? tal) (map cdr (hash-table->alist tests-hash)) ;; return a list of the most recent tests (loop (car tal)(cdr tal)))))))))) + (let* ((remtries 10) + (proc #f)) + (set! proc (lambda (remtries) + (if (> remtries 0) + (handle-exceptions + exn + (let ((sleep-time (random 30)) + (err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) + (case err-status + ((busy) + (thread-sleep! sleep-time) + (proc 10)) ;; we never give up on busy + (else + (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn)) + (print-call-chain) + (debug:print 0 "Sleeping for " sleep-time) + (thread-sleep! sleep-time) + (debug:print-info 0 "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up") + (proc (- remtries 1))))) + (apply sqlite3:execute db query params)) + (debug:print 0 "ERROR: too many attempts to access db were made and no sucess. query: " + query ", params: " params)))) + (proc remtries)) (define (db:test-get-records-for-index-file dbstruct run-id test-name) (let ((res '())) (sqlite3:for-each-row (lambda (id itempath state status run_duration logf-id comment-id) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -453,22 +453,25 @@ ;; prereqstrs)) (give-up #f)) ;; We can get here when a prereq has not been run due to *it* having a prereq that failed. ;; We need to use this to dequeue this item as CANNOTRUN - (for-each (lambda (prereq) - (if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN) - (set! give-up #t))) - prereqstrs) + ;; + (if (member testmode '(toplevel)) + (for-each (lambda (prereq) + (if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN) + (set! give-up #t))) + prereqstrs)) + (if (and give-up (not (and (null? tal)(null? reg)))) (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records)) (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records))) (debug:print 1 "WARNING: test " hed " has discarded prerequisites, removing it from the queue") (let ((test-id (rmt:get-test-id run-id hed ""))) - (mt:test-set-state-status-by-id test-id "DEQUED" "PREQ_FAIL" "Failed to run due to failed prerequisites")) + (mt:test-set-state-status-by-id test-id "DEQUEUED" "PREQ_FAIL" "Failed to run due to failed prerequisites")) (if (and (null? trimmed-tal) (null? trimmed-reg)) #f (list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull)