Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -100,11 +100,11 @@ (begin (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res) (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res)) start-res) (begin ;; login failed but have a server record, clean out the record and try again - (debug:print-info 0 *default-log-port* "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) + (debug:print-info 0 *default-log-port* "client:setup, login failed, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332 (case *transport-type* ((http)(http-transport:close-connections))) (remote-conndat-set! runremote #f) ;; (hash-table-delete! runremote run-id) (thread-sleep! 1) (client:setup-http areapath remaining-tries: (- remaining-tries 1)) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -217,30 +217,40 @@ (file-write (if file-exists (file-write-access? fname) dir-writable ))) ;;(mutex-lock! *db-open-mutex*) ;; tried this mutex, not clear it helped. (if file-write ;; dir-writable - (let (;; (lock (obtain-dot-lock fname 1 5 10)) - (db (sqlite3:open-database fname))) - (sqlite3:set-busy-handler! db (make-busy-timeout 136000)) - ;; (db:set-sync db) - (sqlite3:execute db "PRAGMA synchronous = 0;") - (if (not file-exists) - (begin - (if (and (configf:lookup *configdat* "setup" "use-wal") - (string-match "^/tmp/.*" fname)) ;; this is a file in /tmp - (sqlite3:execute db "PRAGMA journal_mode=WAL;") - (print "Creating " fname " in NON-WAL mode.")) - (initproc db))) - ;; (release-dot-lock fname) - ;;(mutex-unlock! *db-open-mutex*) - db) - (begin - (debug:print 2 *default-log-port* "WARNING: opening db in non-writable dir " fname) - (let ((db (sqlite3:open-database fname))) - ;;(mutex-unlock! *db-open-mutex*) - db))))) ;; ) + (condition-case + (let ((db (sqlite3:open-database fname))) + (sqlite3:set-busy-handler! db (make-busy-timeout 136000)) + (sqlite3:execute db "PRAGMA synchronous = 0;") + (if (not file-exists) + (begin + (if (and (configf:lookup *configdat* "setup" "use-wal") + (string-match "^/tmp/.*" fname)) ;; this is a file in /tmp + (sqlite3:execute db "PRAGMA journal_mode=WAL;") + (print "Creating " fname " in NON-WAL mode.")) + (initproc db))) + db) + (exn (io-error) (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) + (exn (corrupt) (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed.")) + (exn (busy) (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back.")) + (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem.")) + (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn)))) + + (condition-case + (begin + (debug:print 2 *default-log-port* "WARNING: opening db in non-writable dir " fname) + (let ((db (sqlite3:open-database fname))) + ;;(mutex-unlock! *db-open-mutex*) + db)) + (exn (io-error) (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) + (exn (corrupt) (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed.")) + (exn (busy) (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back.")) + (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem.")) + (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn)))) + ))) @@ -1577,11 +1587,11 @@ (if (> (length all-ids) 0) (begin (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as INCOMPLETE") (for-each (lambda (test-id) - (db:test-set-state-status dbstruct run-id test-id "COMPLETE" "DEAD" "Test failed to complete")) + (db:test-set-state-status dbstruct run-id test-id "COMPLETED" "DEAD" "Test failed to complete")) ;; fix for one aspect of Randy's ticket 1405717332 all-ids)))))))) ;; ALL REPLACED BY THE BLOCK ABOVE ;; ;; (sqlite3:execute Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -255,11 +255,21 @@ (db-file-path (db:dbfile-path)) ;; 0)) (dbstruct-local (db:setup)) ;; make-dbr:dbstruct path: dbdir local: #t))) (read-only (not (file-write-access? db-file-path))) (start (current-milliseconds)) (resdat (if (not (and read-only qry-is-write)) - (api:execute-requests dbstruct-local (vector (symbol->string cmd) params)) + (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params)))) + (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. + exn ;; This is an attempt to detect that situation and recover gracefully + (begin + (debug:print0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn)) + (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy + (if (and (vector? v) + (> (vector-length v) 1)) + (let ((newvec (vector (vector-ref v 0)(vector-ref v 1)))) + newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record + (vector #t '())))) ;; we could also check that the returned types are valid (vector #t '()))) (success (vector-ref resdat 0)) (res (vector-ref resdat 1)) (duration (- (current-milliseconds) start))) (if (and read-only qry-is-write) @@ -277,11 +287,11 @@ ;; (rmt:update-db-stats run-id cmd params duration) ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it (if qry-is-write (let ((start-time (current-seconds))) (mutex-lock! *db-multi-sync-mutex*) - (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) +/ (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) (mutex-unlock! *db-multi-sync-mutex*))))) res)) (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((run-id (if run-id run-id 0)) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -1027,56 +1027,61 @@ ;; summarize test in to a file test-summary.html in the test directory ;; (define (tests:summarize-test run-id test-id) (let* ((test-dat (rmt:get-test-info-by-id run-id test-id)) - (steps-dat (rmt:get-steps-for-test run-id test-id)) - (test-name (db:test-get-testname test-dat)) - (item-path (db:test-get-item-path test-dat)) - (full-name (db:test-make-full-name test-name item-path)) - (oup (open-output-file (conc (db:test-get-rundir test-dat) "/test-summary.html"))) - (status (db:test-get-status test-dat)) - (color (common:get-color-from-status status)) - (logf (db:test-get-final_logf test-dat)) - (steps-dat (tests:get-compressed-steps run-id test-id))) - ;; (dcommon:get-compressed-steps #f 1 30045) - ;; (#("wasting_time" "23:36:13" "23:36:21" "0" "8.0s" "wasting_time.log")) - - (s:output-new - oup - (s:html - (s:title "Summary for " full-name) - (s:body - (s:h2 "Summary for " full-name) - (s:table 'cellspacing "0" 'border "1" - (s:tr (s:td "run id") (s:td (db:test-get-run_id test-dat)) - (s:td "test id") (s:td (db:test-get-id test-dat))) - (s:tr (s:td "testname") (s:td test-name) - (s:td "itempath") (s:td item-path)) - (s:tr (s:td "state") (s:td (db:test-get-state test-dat)) - (s:td "status") (s:td (s:a 'href logf (s:font 'color color status)))) - (s:tr (s:td "TestDate") (s:td (seconds->work-week/day-time - (db:test-get-event_time test-dat))) - (s:td "Duration") (s:td (seconds->hr-min-sec (db:test-get-run_duration test-dat))))) - (s:h3 "Log files") - (s:table - 'cellspacing "0" 'border "1" - (s:tr (s:td "Final log")(s:td (s:a 'href logf logf)))) - (s:table - 'cellspacing "0" 'border "1" - (s:tr (s:td "Step Name")(s:td "Start")(s:td "End")(s:td "Status")(s:td "Duration")(s:td "Log File")) - (map (lambda (step-dat) - (s:tr (s:td (tdb:steps-table-get-stepname step-dat)) - (s:td (tdb:steps-table-get-start step-dat)) - (s:td (tdb:steps-table-get-end step-dat)) - (s:td (tdb:steps-table-get-status step-dat)) - (s:td (tdb:steps-table-get-runtime step-dat)) - (s:td (let ((step-log (tdb:steps-table-get-log-file step-dat))) - (s:a 'href step-log step-log))))) - steps-dat)) - ))) - (close-output-port oup))) + (out-dir (db:test-get-rundir test-dat)) + (out-file (conc out-dir "/test-summary.html"))) + ;; first verify we are able to write the output file + (if (not (file-write-access? out-dir)) + (debug:print 0 *default-log-port* "ERROR: cannot write test-summary.html to " out-dir) + (let* (;; (steps-dat (rmt:get-steps-for-test run-id test-id)) + (test-name (db:test-get-testname test-dat)) + (item-path (db:test-get-item-path test-dat)) + (full-name (db:test-make-full-name test-name item-path)) + (oup (open-output-file out-file)) + (status (db:test-get-status test-dat)) + (color (common:get-color-from-status status)) + (logf (db:test-get-final_logf test-dat)) + (steps-dat (tests:get-compressed-steps run-id test-id))) + ;; (dcommon:get-compressed-steps #f 1 30045) + ;; (#("wasting_time" "23:36:13" "23:36:21" "0" "8.0s" "wasting_time.log")) + + (s:output-new + oup + (s:html + (s:title "Summary for " full-name) + (s:body + (s:h2 "Summary for " full-name) + (s:table 'cellspacing "0" 'border "1" + (s:tr (s:td "run id") (s:td (db:test-get-run_id test-dat)) + (s:td "test id") (s:td (db:test-get-id test-dat))) + (s:tr (s:td "testname") (s:td test-name) + (s:td "itempath") (s:td item-path)) + (s:tr (s:td "state") (s:td (db:test-get-state test-dat)) + (s:td "status") (s:td (s:a 'href logf (s:font 'color color status)))) + (s:tr (s:td "TestDate") (s:td (seconds->work-week/day-time + (db:test-get-event_time test-dat))) + (s:td "Duration") (s:td (seconds->hr-min-sec (db:test-get-run_duration test-dat))))) + (s:h3 "Log files") + (s:table + 'cellspacing "0" 'border "1" + (s:tr (s:td "Final log")(s:td (s:a 'href logf logf)))) + (s:table + 'cellspacing "0" 'border "1" + (s:tr (s:td "Step Name")(s:td "Start")(s:td "End")(s:td "Status")(s:td "Duration")(s:td "Log File")) + (map (lambda (step-dat) + (s:tr (s:td (tdb:steps-table-get-stepname step-dat)) + (s:td (tdb:steps-table-get-start step-dat)) + (s:td (tdb:steps-table-get-end step-dat)) + (s:td (tdb:steps-table-get-status step-dat)) + (s:td (tdb:steps-table-get-runtime step-dat)) + (s:td (let ((step-log (tdb:steps-table-get-log-file step-dat))) + (s:a 'href step-log step-log))))) + steps-dat)) + ))) + (close-output-port oup))))) ;; MUST BE CALLED local! ;; (define (tests:test-get-paths-matching keynames target fnamepatt #!key (res '()))