Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -68,33 +68,33 @@ (db:set-sync db) db)) ;; keeping it around for debugging purposes only (define (open-run-close-no-exception-handling proc idb . params) - (let* ((db (if idb idb (open-db))) - (res #f)) - (set! res (apply proc db params)) - (if (not idb)(sqlite3:finalize! db)) - res)) + (let* ((db (if idb idb (open-db))) + (res #f)) + (set! res (apply proc db params)) + (if (not idb)(sqlite3:finalize! db)) + res)) (define (open-run-close-exception-handling proc idb . params) - (let ((runner (lambda () - (let* ((db (if idb idb (open-db))) - (res #f)) - (set! res (apply proc db params)) - (if (not idb)(sqlite3:finalize! db)) - res)))) - (handle-exceptions - exn - (begin - (debug:print 0 "EXCEPTION: database probably overloaded?") - (debug:print 0 " " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain) - (thread-sleep! (random 120)) - (debug:print 0 "trying db call one more time....") - (runner)) - (runner)))) + (let ((runner (lambda () + (let* ((db (if idb idb (open-db))) + (res #f)) + (set! res (apply proc db params)) + (if (not idb)(sqlite3:finalize! db)) + res)))) + (handle-exceptions + exn + (begin + (debug:print 0 "EXCEPTION: database probably overloaded?") + (debug:print 0 " " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain) + (thread-sleep! (random 120)) + (debug:print 0 "trying db call one more time....") + (runner)) + (runner)))) (define open-run-close open-run-close-exception-handling) (define *global-delta* 0) (define *last-global-delta-printed* 0) @@ -268,24 +268,24 @@ units TEXT, comment TEXT DEFAULT '', status TEXT DEFAULT 'n/a', type TEXT DEFAULT '', CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));" - "CREATE TABLE IF NOT EXISTS test_steps ( + "CREATE TABLE IF NOT EXISTS test_steps ( id INTEGER PRIMARY KEY, test_id INTEGER, stepname TEXT, state TEXT DEFAULT 'NOT_STARTED', status TEXT DEFAULT 'n/a', event_time TIMESTAMP, comment TEXT DEFAULT '', logfile TEXT DEFAULT '', CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state));" - ;; test_meta can be used for handing commands to the test - ;; e.g. KILLREQ - ;; the ackstate is set to 1 once the command has been completed - "CREATE TABLE IF NOT EXISTS test_meta ( + ;; test_meta can be used for handing commands to the test + ;; e.g. KILLREQ + ;; the ackstate is set to 1 once the command has been completed + "CREATE TABLE IF NOT EXISTS test_meta ( id INTEGER PRIMARY KEY, var TEXT, val TEXT, ackstate INTEGER DEFAULT 0, CONSTRAINT metadat_constraint UNIQUE (var));"))) @@ -470,17 +470,17 @@ (string-intersperse remfields ","))) (qrystr (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? " ;; Generate: " AND x LIKE 'keypatt' ..." (if (null? keypatts) "" (conc " AND " - (string-join - (map (lambda (keypatt) - (let ((key (car keypatt)) - (patt (cadr keypatt))) - (db:patt->like key patt))) - keypatts) - " AND "))) + (string-join + (map (lambda (keypatt) + (let ((key (car keypatt)) + (patt (cadr keypatt))) + (db:patt->like key patt))) + keypatts) + " AND "))) " ORDER BY event_time DESC " (if (number? count) (conc " LIMIT " count) "") (if (number? offset) @@ -524,11 +524,11 @@ (conc "SELECT " keystr " FROM runs WHERE id=?;") run-id) (let ((finalres (vector header res))) (hash-table-set! *run-info-cache* run-id finalres) finalres)))) - + (define (db:set-comment-for-run db run-id comment) (sqlite3:execute db "UPDATE runs SET comment=? WHERE id=?;" comment run-id)) ;; does not (obviously!) removed dependent data. But why not!!? (define (db:delete-run db run-id) @@ -900,11 +900,11 @@ res)))) (define (db:test-set-log! db test-id logf) (if (string? logf) (sqlite3:execute db "UPDATE tests SET final_logf=? WHERE id=?;" - logf test-id) + logf test-id) (debug:print 0 "ERROR: db:test-set-log! called with non-string log file name " logf))) ;;====================================================================== ;; Misc. test related queries ;;====================================================================== @@ -1016,13 +1016,13 @@ (define (db:updater) (let loop ((start-time (current-time))) (thread-sleep! 15) ;; move save time around to minimize regular collisions? (db:write-cached-data) (loop start-time))) - -(define (cdb:test-set-status-state test-id status state #!key (msg #f)) - (debug:print 4 "INFO: Adding status/state to queue: " status "/" state) + +(define (cdb:test-set-status-state test-id status state msg) + (debug:print 4 "INFO: cdb:test-set-status-state test-id=" test-id ", status=" status ", state=" state ", msg=" msg) (mutex-lock! *incoming-mutex*) (if msg (set! *incoming-data* (cons (vector 'state-status-msg (current-seconds) (list state status msg test-id)) @@ -1033,19 +1033,31 @@ *incoming-data*))) (mutex-unlock! *incoming-mutex*) (if *cache-on* (debug:print 6 "INFO: *cache-on* is " *cache-on* ", skipping cache write") (db:write-cached-data))) - + (define (cdb:test-rollup-iterated-pass-fail test-id) (debug:print 4 "INFO: Adding " test-id " for iterated rollup to the queue") (mutex-lock! *incoming-mutex*) (set! *incoming-data* (cons (vector 'iterated-p/f-rollup (current-seconds) (list test-id test-id test-id test-id)) *incoming-data*)) (mutex-unlock! *incoming-mutex*) + (if *cache-on* + (debug:print 6 "INFO: *cache-on* is " *cache-on* ", skipping cache write") + (db:write-cached-data))) + +(define (cdb:pass-fail-counts test-id fail-count pass-count) + (debug:print 4 "INFO: Adding " test-id " for setting pass/fail counts to the queue") + (mutex-lock! *incoming-mutex*) + (set! *incoming-data* (cons (vector 'pass-fail-counts + (current-seconds) + (list fail-count pass-count test-id)) + *incoming-data*)) + (mutex-unlock! *incoming-mutex*) (if *cache-on* (debug:print 6 "INFO: *cache-on* is " *cache-on* ", skipping cache write") (db:write-cached-data))) ;; The queue is a list of vectors where the zeroth slot indicates the type of query to @@ -1055,10 +1067,11 @@ (define (db:write-cached-data) (open-run-close (lambda (db . params) (let ((state-status-stmt (sqlite3:prepare db "UPDATE tests SET state=?,status=? WHERE id=?;")) (state-status-msg-stmt (sqlite3:prepare db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;")) + (pass-fail-counts-stmt (sqlite3:prepare db "UPDATE tests SET fail_count=?,pass_count=? WHERE id=?;")) (iterated-rollup-stmt (sqlite3:prepare db "UPDATE tests SET status=CASE WHEN (SELECT fail_count FROM tests WHERE id=?) > 0 THEN 'FAIL' WHEN (SELECT pass_count FROM tests WHERE id=?) > 0 AND (SELECT status FROM tests WHERE id=?) NOT IN ('WARN','FAIL') @@ -1073,26 +1086,31 @@ (if (> (length data) 0) (debug:print 4 "INFO: Writing cached data " data)) (sqlite3:with-transaction db (lambda () + (debug:print 4 "INFO: flushing " data " to db") (for-each (lambda (entry) (let ((params (vector-ref entry 2))) - (debug:print 4 "INFO: flushing " entry " to db") + (debug:print 4 "INFO: Applying " entry " to params " params) (case (vector-ref entry 0) ((state-status) (apply sqlite3:execute state-status-stmt params)) ((state-status-msg) (apply sqlite3:execute state-status-msg-stmt params)) ((iterated-p/f-rollup) (apply sqlite3:execute iterated-rollup-stmt params)) + ((pass-fail-counts) + (apply sqlite3:execute pass-fail-counts-stmt params)) (else (debug:print 0 "ERROR: Queued entry not recognised " entry))))) data))) (sqlite3:finalize! state-status-stmt) (sqlite3:finalize! state-status-msg-stmt) (sqlite3:finalize! iterated-rollup-stmt) + (sqlite3:finalize! pass-fail-counts-stmt) + (set! *last-db-access* (current-seconds)) )) #f)) (define (db:roll-up-pass-fail-counts db run-id test-name item-path status) (if (and (not (equal? item-path "")) @@ -1255,11 +1273,13 @@ (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'pass') AS pass_count;" test-id test-id) (sqlite3:finalize! tdb) ;; Now rollup the counts to the central megatest.db - (sqlite3:execute db "UPDATE tests SET fail_count=?,pass_count=? WHERE id=?;" fail-count pass-count test-id) + (rdb:pass-fail-counts test-id fail-count pass-count) + ;; (sqlite3:execute db "UPDATE tests SET fail_count=?,pass_count=? WHERE id=?;" + ;; fail-count pass-count test-id) (thread-sleep! 1) ;; play nice with the queue by ensuring the rollup is at least one second later than the set ;; if the test is not FAIL then set status based on the fail and pass counts. (rdb:test-rollup-iterated-pass-fail test-id) @@ -1411,12 +1431,12 @@ (set! result (append (if (null? tests) (list waitontest-name) tests) result))) ;; if the test is not found then clearly the waiton is not met... ;; (if (not ever-seen)(set! result (cons waitontest-name result))))) (if (not ever-seen) (set! result (append (if (null? tests)(list waitontest-name) tests) result))))) - waitons) - (delete-duplicates result)))) + waitons) + (delete-duplicates result)))) (define (db:teststep-set-status! db test-id teststep-name state-in status-in item-path comment logfile) (debug:print 4 "test-id: " test-id " teststep-name: " teststep-name) (let* ((tdb (db:open-test-db-by-test-id db test-id)) (state (check-valid-items "state" state-in)) @@ -1563,25 +1583,32 @@ ;;====================================================================== ;; REMOTE DB ACCESS VIA RPC ;;====================================================================== (define (rdb:open-run-close procname . remargs) - (if *runremote* - (let ((host (vector-ref *runremote* 0)) - (port (vector-ref *runremote* 1))) - (apply (rpc:procedure 'rdb:open-run-close host port) procname remargs)) - (apply open-run-close (eval procname) remargs))) - -(define (rdb:test-set-status-state test-id status state) - (if *runremote* - (let ((host (vector-ref *runremote* 0)) - (port (vector-ref *runremote* 1))) - (apply (rpc:procedure 'cdb:test-set-status-state host port) test-id status state)) - (cdb:test-set-status-state test-id status state))) + (if *runremote* + (let ((host (vector-ref *runremote* 0)) + (port (vector-ref *runremote* 1))) + (apply (rpc:procedure 'rdb:open-run-close host port) procname remargs)) + (apply open-run-close (eval procname) remargs))) + +(define (rdb:test-set-status-state test-id status state msg) + (if *runremote* + (let ((host (vector-ref *runremote* 0)) + (port (vector-ref *runremote* 1))) + ((rpc:procedure 'cdb:test-set-status-state host port) test-id status state msg)) + (cdb:test-set-status-state test-id status state msg))) (define (rdb:test-rollup-iterated-pass-fail test-id) - (if *runremote* - (let ((host (vector-ref *runremote* 0)) - (port (vector-ref *runremote* 1))) - (apply (rpc:procedure 'cdb:test-rollup-iterated-pass-fail host port) test-id)) - (cdb:test-rollup-iterated-pass-fail test-id))) + (if *runremote* + (let ((host (vector-ref *runremote* 0)) + (port (vector-ref *runremote* 1))) + (apply (rpc:procedure 'cdb:test-rollup-iterated-pass-fail host port) test-id)) + (cdb:test-rollup-iterated-pass-fail test-id))) + +(define (rdb:pass-fail-counts test-id fail-count pass-count) + (if *runremote* + (let ((host (vector-ref *runremote* 0)) + (port (vector-ref *runremote* 1))) + (apply (rpc:procedure 'cdb:pass-fail-counts host port) test-id fail-count pass-count)) + (cdb:pass-fail-counts test-id fail-count pass-count))) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -387,15 +387,17 @@ (cond ((not (patt-list-match item-path item-patts)) ;; else the run is stuck, temporarily or permanently ;; but should check if it is due to lack of resources vs. prerequisites (debug:print 1 "INFO: Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " item-patts) + (thread-sleep! *global-delta*) (if (not (null? tal)) (loop (car tal)(cdr tal) reruns))) ((not (hash-table-ref/default test-registery (conc test-name "/" item-path) #f)) (open-run-close db:tests-register-test #f run-id test-name item-path) (hash-table-set! test-registery (conc test-name "/" item-path) #t) + (thread-sleep! *global-delta*) (loop (car newtal)(cdr newtal) reruns)) ((not have-resources) ;; simply try again after waiting a second (thread-sleep! (+ 1 *global-delta*)) (debug:print 1 "INFO: no resources to run new tests, waiting ...") ;; could have done hed tal here but doing car/cdr of newtal to rotate tests @@ -420,13 +422,15 @@ ;; the waiton is FAIL so no point in trying to run hed ever again (if (not (null? tal)) (if (vector? hed) (begin (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed) " from the launch list as it has prerequistes that are FAIL") + (thread-sleep! *global-delta*) (loop (car tal)(cdr tal) (cons hed reruns))) (begin (debug:print 1 "WARN: Test not processed correctly. Could be a race condition in your test implementation? " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") + (thread-sleep! *global-delta*) (loop hed tal reruns))))))))) ;; case where an items came in as a list been processed ((and (list? items) ;; thus we know our items are already calculated (not itemdat)) ;; and not yet expanded into the list of things to be done @@ -451,11 +455,13 @@ (tests:testqueue-set-item_path! new-test-record my-item-path) (hash-table-set! test-records newtestname new-test-record) (set! tal (cons newtestname tal)))))) ;; since these are itemized create new test names testname/itempath items) (if (not (null? tal)) - (loop (car tal)(cdr tal) reruns))) + (begin + (thread-sleep! *global-delta*) + (loop (car tal)(cdr tal) reruns)))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ((or (procedure? items)(eq? items 'have-procedure)) (let ((can-run-more (open-run-close runs:can-run-more-tests #f test-record))) @@ -484,30 +490,35 @@ (open-run-close set-megatest-env-vars #f run-id) ;; these may be needed by the launching process (let ((items-list (items:get-items-from-config tconfig))) (if (list? items-list) (begin (tests:testqueue-set-items! test-record items-list) + (thread-sleep! *global-delta*) (loop hed tal reruns)) (begin (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this") (exit 1)))))) ((null? fails) (debug:print 4 "INFO: fails is null, moving on in the queue but keeping " hed " for now") + (thread-sleep! *global-delta*) (loop (car newtal)(cdr newtal) reruns)) ;; an issue with prereqs not yet met? ((and (not (null? fails))(eq? testmode 'normal)) (debug:print 1 "INFO: test " hed " (mode=" testmode ") has failed prerequisite(s); " (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") ", removing it from to-do list") (if (not (null? tal)) - (loop (car tal)(cdr tal)(cons hed reruns)))) + (begin + (thread-sleep! *global-delta*) + (loop (car tal)(cdr tal)(cons hed reruns))))) (else (debug:print 8 "ERROR: No handler for this condition.") ;; "\n hed: " hed ;; "\n fails: " (string-intersperse (map db:test-get-testname fails) ",") ;; "\n testmode: " testmode ;; "\n prereqs-not-met: " (pretty-string prereqs-not-met) ;; "\n items: " items) + (thread-sleep! *global-delta*) (loop (car newtal)(cdr newtal) reruns)))) ;; if can't run more just loop with next possible test (begin (debug:print 4 "INFO: processing the case with a lambda for items or 'have-procedure. Moving through the queue without dropping " hed) (thread-sleep! (+ 1 *global-delta*)) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -58,33 +58,49 @@ (rpc:publish-procedure! 'remote:run (lambda (procstr . params) (server:autoremote procstr params))) + (rpc:publish-procedure! + 'serve:login + (lambda (toppath) + (set! *last-db-access* (current-seconds)) + (if (equal? *toppath* toppath) + (begin + (debug:print 2 "INFO: login successful") + #t) + #f))) + ;;====================================================================== ;; db specials here ;;====================================================================== ;; remote call to open-run-close (rpc:publish-procedure! 'rdb:open-run-close (lambda (procname . remargs) - (debug:print 4 "INFO: rdb:open-run-close " procname " " remargs) + (debug:print 4 "INFO: Remote call of rdb:open-run-close " procname " " remargs) (set! *last-db-access* (current-seconds)) (apply open-run-close (eval procname) remargs))) (rpc:publish-procedure! 'cdb:test-set-status-state - (lambda (test-id status state) - (debug:print 4 "INFO: cdb:test-set-status-state " test-id " " status "/" state) - (apply cdb:test-set-status-state test-id status statue))) + (lambda (test-id status state msg) + (debug:print 4 "INFO: Remote call of cdb:test-set-status-state test-id=" test-id ", status=" status ", state=" state ", msg=" msg) + (cdb:test-set-status-state test-id status state msg))) (rpc:publish-procedure! 'cdb:test-rollup-iterated-pass-fail (lambda (test-id) - (debug:print 4 "INFO: cdb:test-rollup-iterated-pass-fail " test-id) + (debug:print 4 "INFO: Remote call of cdb:test-rollup-iterated-pass-fail " test-id) (apply cdb:test-rollup-iterated-pass-fail test-id))) + (rpc:publish-procedure! + 'cdb:pass-fail-counts + (lambda (test-id fail-count pass-count) + (debug:print 4 "INFO: Remote call of cdb:pass-fail-counts " test-id " passes: " pass-count " fails: " fail-count) + (apply cdb:pass-fail-counts test-id fail count-pass-count))) + ;;====================================================================== ;; end of publish-procedure section ;;====================================================================== (set! *rpc:listener* rpc:listener) @@ -103,11 +119,11 @@ ;; server last used then start shutdown (let loop ((count 0)) (thread-sleep! 20) ;; no need to do this very often (let ((numrunning (db:get-count-tests-running db))) (if (or (not (> numrunning 0)) - (> *last-db-access* (+ (current-seconds) 20))) + (> *last-db-access* (+ (current-seconds) 60))) (begin (debug:print 0 "INFO: Starting to shutdown the server side") (sqlite3:execute db "DELETE FROM metadat WHERE var='SERVER'"); ;; AND val like ?;" ;; host:port) ;; need to delete only *my* server entry (future use) (thread-sleep! 10) @@ -138,15 +154,16 @@ (let ((portn (string->number port))) (debug:print 2 "INFO: Setting up to connect to host " host ":" port) (handle-exceptions exn (begin - (print "Exception: " ((condition-property-accessor 'exn 'message) exn)) - (open-run-close - (lambda (db . param) - (sqlite3:execute db "DELETE FROM metadat WHERE var='SERVER'")) - #f) + (debug:print 0 "ERROR: Failed to open a connection to the server at host: " host " port: " port) + (debug:print 0 " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn)) + ;; (open-run-close + ;; (lambda (db . param) + ;; (sqlite3:execute db "DELETE FROM metadat WHERE var='SERVER'")) + ;; #f) (set! *runremote* #f)) (if (and (not (args:get-arg "-server")) ;; no point in the server using the server using the server ((rpc:procedure 'serve:login host portn) *toppath*)) (begin (debug:print 2 "INFO: Connected to " host ":" port) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -112,10 +112,11 @@ (map cdr (hash-table->alist tests-hash)) ;; return a list of the most recent tests (loop (car tal)(cdr tal)))))))))) ;; Do not rpc this one, do the underlying calls!!! (define (tests:test-set-status! test-id state status comment dat) + (debug:print 4 "INFO: tests:test-set-status! test-id=" test-id ", state=" state ", status=" status ", dat=" dat) (let* ((db #f) (real-status status) (otherdat (if dat dat (make-hash-table))) (testdat (open-run-close db:get-test-info-by-id db test-id)) (run-id (db:test-get-run_id testdat)) @@ -139,12 +140,12 @@ (if waived (set! real-status "WAIVED")) (debug:print 4 "real-status " real-status ", waived " waived ", status " status) ;; update the primary record IF state AND status are defined (if (and state status) - ;; (rdb:open-run-close 'cdb:test-set-state-status #f test-id real-status state)) ;; this one works - (rdb:test-set-status-state test-id real-status state)) + ;; (rdb:open-run-close 'cdb:test-set-status-state #f test-id real-status state)) ;; this one works + (rdb:test-set-status-state test-id real-status state #f)) ;; if status is "AUTO" then call rollup (note, this one modifies data in test ;; run area, do not rpc it (yet) (if (and test-id state status (equal? status "AUTO")) (open-run-close db:test-data-rollup db test-id status))