Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -155,10 +155,71 @@ CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));") ;; Must do this *after* running patch db !! No more. (db:set-var db "MEGATEST_VERSION" megatest-version) )) +;; Create the sqlite db for the individual test(s) +(define (open-test-db testpath) + (let* ((dbpath (conc testpath "/testdat.db")) + (dbexists (file-exists? dbpath)) + (db (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) + (handler (make-busy-timeout (if (args:get-arg "-override-timeout") + (string->number (args:get-arg "-override-timeout")) + 36000)))) + (debug:print 4 "INFO: test dbpath=" dbpath) + (sqlite3:set-busy-handler! db handler) + (if (not dbexists) + (begin + (sqlite3:execute db "PRAGMA synchronous = FULL;") + (debug:print 0 "Initialized test database " dbpath) + (db:testdb-initialize db))) + (sqlite3:execute db "PRAGMA synchronous = 0;") + db)) + +(define (db:testdb-initialize db) + (for-each + (lambda (sqlcmd) + (sqlite3:execute db sqlcmd)) + (list "CREATE TABLE IF NOT EXISTS test_rundat ( + id INTEGER PRIMARY KEY, + update_time TIMESTAMP, + cpuload INTEGER DEFAULT -1, + diskfree INTEGER DEFAULT -1, + diskusage INTGER DEFAULT -1);" + "CREATE TABLE IF NOT EXISTS test_data ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + category TEXT DEFAULT '', + variable TEXT, + value REAL, + expected REAL, + tol REAL, + units TEXT, + comment TEXT DEFAULT '', + status TEXT DEFAULT 'n/a', + type TEXT DEFAULT '', + CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));" + "CREATE TABLE IF NOT EXISTS test_steps ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + stepname TEXT, + state TEXT DEFAULT 'NOT_STARTED', + status TEXT DEFAULT 'n/a', + event_time TIMESTAMP, + comment TEXT DEFAULT '', + logfile TEXT DEFAULT '', + CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state));" + ;; test_meta can be used for handing commands to the test + ;; e.g. KILLREQ + ;; the ackstate is set to 1 once the command has been completed + "CREATE TABLE IF NOT EXISTS test_meta ( + id INTEGER PRIMARY KEY, + var TEXT, + val TEXT, + ackstate INTEGER DEFAULT 0, + CONSTRAINT metadat_constraint UNIQUE (var));"))) + ;;====================================================================== ;; TODO: ;; put deltas into an assoc list with version numbers ;; apply all from last to current ;;====================================================================== @@ -725,53 +786,53 @@ ;;====================================================================== ;; QUEUE UP META, TEST STATUS AND STEPS ;;====================================================================== -(define (db:updater db) - (let loop ((start-time (current-time))) - (thread-sleep! 0.5) ;; move save time around to minimize regular collisions? - (db:write-cached-data db) - (loop start-time))) - -(define (db:test-update-meta-info db test-id minutes cpuload diskfree tmpfree) - (mutex-lock! *incoming-mutex*) - (set! *incoming-data* (cons (vector 'meta-info - (current-seconds) - (list cpuload - diskfree - minutes - test-id)) ;; run-id test-name item-path minutes cpuload diskfree tmpfree) - *incoming-data*)) - (mutex-unlock! *incoming-mutex*) - (if *cache-on* - (debug:print 6 "INFO: *cache-on* is " *cache-on* ", skipping cache write as part of test-update-meta-info") - (db:write-cached-data db))) - -(define (db:write-cached-data db) - (let ((meta-stmt (sqlite3:prepare db "UPDATE tests SET cpuload=?,diskfree=?,run_duration=?,state='RUNNING' WHERE id=? AND state NOT IN ('COMPLETED','KILLREQ','KILLED');")) - (step-stmt (sqlite3:prepare db "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);")) ;; strftime('%s','now')#f) - (data (sort *incoming-data* (lambda (a b)(< (vector-ref a 1)(vector-ref b 1)))))) - (if (> (length data) 0) - (debug:print 4 "Writing cached data " data)) - (mutex-lock! *incoming-mutex*) - (sqlite3:with-transaction - db - (lambda () - (for-each (lambda (entry) - (case (vector-ref entry 0) - ((meta-info) - (apply sqlite3:execute meta-stmt (vector-ref entry 2))) - ((step-status) - (apply sqlite3:execute step-stmt (vector-ref entry 2))) - (else - (debug:print 0 "ERROR: Queued entry not recognised " entry)))) - data))) - (sqlite3:finalize! meta-stmt) ;; sqlite is the bottleneck, clear the statements asap? - (sqlite3:finalize! step-stmt) - (set! *incoming-data* '()) - (mutex-unlock! *incoming-mutex*))) +;; (define (db:updater db) +;; (let loop ((start-time (current-time))) +;; (thread-sleep! 0.5) ;; move save time around to minimize regular collisions? +;; (db:write-cached-data db) +;; (loop start-time))) +;; +;; (define (db:test-update-meta-info db test-id minutes cpuload diskfree tmpfree) +;; (mutex-lock! *incoming-mutex*) +;; (set! *incoming-data* (cons (vector 'meta-info +;; (current-seconds) +;; (list cpuload +;; diskfree +;; minutes +;; test-id)) ;; run-id test-name item-path minutes cpuload diskfree tmpfree) +;; *incoming-data*)) +;; (mutex-unlock! *incoming-mutex*) +;; (if *cache-on* +;; (debug:print 6 "INFO: *cache-on* is " *cache-on* ", skipping cache write as part of test-update-meta-info") +;; (db:write-cached-data db))) +;; +;; (define (db:write-cached-data db) +;; (let ((meta-stmt (sqlite3:prepare db "UPDATE tests SET cpuload=?,diskfree=?,run_duration=?,state='RUNNING' WHERE id=? AND state NOT IN ('COMPLETED','KILLREQ','KILLED');")) +;; (step-stmt (sqlite3:prepare db "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);")) ;; strftime('%s','now')#f) +;; (data (sort *incoming-data* (lambda (a b)(< (vector-ref a 1)(vector-ref b 1)))))) +;; (if (> (length data) 0) +;; (debug:print 4 "Writing cached data " data)) +;; (mutex-lock! *incoming-mutex*) +;; (sqlite3:with-transaction +;; db +;; (lambda () +;; (for-each (lambda (entry) +;; (case (vector-ref entry 0) +;; ((meta-info) +;; (apply sqlite3:execute meta-stmt (vector-ref entry 2))) +;; ((step-status) +;; (apply sqlite3:execute step-stmt (vector-ref entry 2))) +;; (else +;; (debug:print 0 "ERROR: Queued entry not recognised " entry)))) +;; data))) +;; (sqlite3:finalize! meta-stmt) ;; sqlite is the bottleneck, clear the statements asap? +;; (sqlite3:finalize! step-stmt) +;; (set! *incoming-data* '()) +;; (mutex-unlock! *incoming-mutex*))) (define (db:roll-up-pass-fail-counts db run-id test-name item-path status) (if (and (not (equal? item-path "")) (or (equal? status "PASS") (equal? status "WARN") @@ -1079,11 +1140,11 @@ (current-seconds) ;; FIXME - this should not update the logfile unless it is specified. (list test-id teststep-name state-in status-in (current-seconds) (if comment comment "") (if logfile logfile ""))) *incoming-data*)) (mutex-unlock! *incoming-mutex*) - (if (not *cache-on*)(db:write-cached-data db)) + ;; (if (not *cache-on*)(db:write-cached-data db)) #t)) ;;====================================================================== ;; Extract ods file from the db ;;====================================================================== ADDED docs/plan.txt Index: docs/plan.txt ================================================================== --- /dev/null +++ docs/plan.txt @@ -0,0 +1,13 @@ + +Move test specific db to test dir +================================= + +. Create teststats.db +. Redirect test run stats to teststats.db +. Redirect test steps data to teststats.db +. Redirect test_data to teststats.db +. Direct dboard to get stats from teststats.db +. Redirect kill requests to teststats.db +. Kill requests need to kill all processes in the tree +. Roll up overall stats to megatest.db every five minutes or when test done +. Add any necessary tests Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -64,10 +64,11 @@ (runname (assoc/default 'runname cmdinfo)) (megatest (assoc/default 'megatest cmdinfo)) (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo)) (fullrunscript (if runscript (conc testpath "/" runscript) #f)) (db #f) + (tdb #f) (rollup-status 0)) (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name)) ;; apply pre-overrides before other variables. The pre-override vars must not ;; clobbers things from the official sources such as megatest.config and runconfigs.config @@ -91,26 +92,36 @@ (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path))) (change-directory top-path) (if (not (setup-for-run)) (begin (debug:print 0 "Failed to setup, exiting") + ;; (sqlite3:finalize! db) + ;; (sqlite3:finalize! tdb) (exit 1))) (change-directory *toppath*) ;; now can find our db (set! db (open-db)) (if (not (args:get-arg "-server")) (server:client-setup db)) ;; (set! *cache-on* #t) (set-megatest-env-vars db run-id) ;; these may be needed by the launching process (change-directory work-area) + + ;; Open up the test specific database + (set! tdb (open-test-db work-area)) + (on-exit (lambda () + (debug:print 0 "Finalizing both tdb and db!!!") + (sqlite3:finalize! tdb) + (sqlite3:finalize! db))) + (set-run-config-vars db run-id) ;; environment overrides are done *before* the remaining critical envars. (alist->env-vars env-ovrd) (set-megatest-env-vars db run-id) (set-item-env-vars itemdat) (save-environment-as-files "megatest") - (test-set-meta-info db run-id test-name itemdat) + (test-set-meta-info db tdb run-id test-name itemdat) (test-set-status! db test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f) (if (args:get-arg "-xterm") (set! fullrunscript "xterm") (if (and fullrunscript (not (file-execute-access? fullrunscript))) (system (conc "chmod ug+x " fullrunscript)))) @@ -117,15 +128,10 @@ ;; We are about to actually kick off the test ;; so this is a good place to remove the records for ;; any previous runs ;; (db:test-remove-steps db run-id testname itemdat) - ;; from here on out we will open and close the db - ;; on every access to reduce the probablitiy of - ;; contention or stuck access on nfs. - (sqlite3:finalize! db) - (let* ((m (make-mutex)) (kill-job? #f) (exit-info (vector #t #t #t)) (job-thread #f) (runit (lambda () @@ -254,20 +260,22 @@ (- (current-seconds) start-seconds))))) (kill-tries 0)) (let loop ((minutes (calc-minutes))) - (let* ((db (open-db)) - (cpuload (get-cpu-load)) - (diskfree (get-df (current-directory))) - (tmpfree (get-df "/tmp"))) - (if (not (args:get-arg "-server")) - (server:client-setup db)) - (if (not cpuload) (begin (debug:print 0 "WARNING: CPULOAD not found.") (set! cpuload "n/a"))) - (if (not diskfree) (begin (debug:print 0 "WARNING: DISKFREE not found.") (set! diskfree "n/a"))) + ;; (let* (;; (db (open-db)) + ;; (cpuload (get-cpu-load)) + ;; (diskfree (get-df (current-directory))) + ;; (tmpfree (get-df "/tmp"))) + (begin + ;; (if (not (args:get-arg "-server")) + ;; (server:client-setup db)) + ;; (if (not cpuload) (begin (debug:print 0 "WARNING: CPULOAD not found.") (set! cpuload "n/a"))) + ;; (if (not diskfree) (begin (debug:print 0 "WARNING: DISKFREE not found.") (set! diskfree "n/a"))) (set! kill-job? (test-get-kill-request db run-id test-name itemdat)) - (rdb:test-update-meta-info db test-id minutes cpuload diskfree tmpfree) + (test-set-meta-info db tdb run-id test-name itemdat minutes: minutes) + ;; (rdb:test-update-meta-info db test-id minutes cpuload diskfree tmpfree) (if kill-job? (begin (mutex-lock! m) (let* ((pid (vector-ref exit-info 0))) (if (number? pid) @@ -289,26 +297,27 @@ (begin (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process") (test-set-status! db test-id "KILLED" "FAIL" (args:get-arg "-m") #f) (sqlite3:finalize! db) + (sqlite3:finalize! tdb) (exit 1)))) (set! kill-tries (+ 1 kill-tries)) (mutex-unlock! m))) - (sqlite3:finalize! db) + ;; (sqlite3:finalize! db) (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses (loop (calc-minutes))))))) (th1 (make-thread monitorjob)) (th2 (make-thread runit))) (set! job-thread th2) (thread-start! th1) (thread-start! th2) (thread-join! th2) (mutex-lock! m) - (set! db (open-db)) - (if (not (args:get-arg "-server")) - (server:client-setup db)) + ;; (set! db (open-db)) + ;; (if (not (args:get-arg "-server")) + ;; (server:client-setup db)) (let* ((item-path (item-list->path itemdat)) (testinfo (rdb:get-test-info db run-id test-name item-path))) (if (not (equal? (db:test-get-state testinfo) "COMPLETED")) (begin (debug:print 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status) @@ -341,10 +350,11 @@ ;; (exec-results (cmd-run->list fullrunscript)) ;; (list ">" (conc test-name "-run.log")))) ;; (success exec-results)) ;; (eq? (cadr exec-results) 0))) (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n") (sqlite3:finalize! db) + (sqlite3:finalize! tdb) (if (not (vector-ref exit-info 1)) (exit 4))))))) ;; set up the very basics needed for doing anything here. (define (setup-for-run) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -337,11 +337,13 @@ (fails (calc-fails prereqs-not-met)) (non-completed (calc-not-completed prereqs-not-met))) (debug:print 8 "INFO: have-resources: " have-resources " prereqs-not-met: " (string-intersperse (map (lambda (t) - (conc (db:test-get-state t)"/"(db:test-get-status t))) + (if (string? t) + (conc " WARNING: t is a string=" t ) + (conc (db:test-get-state t)"/"(db:test-get-status t)))) prereqs-not-met) ", ") " fails: " fails) ;; Don't know at this time if the test have been launched at some time in the past ;; i.e. is this a re-launch? (cond ((and have-resources Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -401,24 +401,42 @@ (define (test-get-kill-request db run-id test-name itemdat) (let* ((item-path (item-list->path itemdat)) (testdat (db:get-test-info db run-id test-name item-path))) (equal? (test:get-state testdat) "KILLREQ"))) -(define (test-set-meta-info db run-id testname itemdat) - (let ((item-path (item-list->path itemdat)) - (cpuload (get-cpu-load)) - (hostname (get-host-name)) - (diskfree (get-df (current-directory))) - (uname (get-uname "-srvpio"))) - (sqlite3:execute db "UPDATE tests SET host=?,cpuload=?,diskfree=?,uname=? WHERE run_id=? AND testname=? AND item_path=?;" - hostname - cpuload - diskfree - uname - run-id - testname - item-path))) +(define (test:tdb-get-rundat-count tdb) + (let ((res 0)) + (sqlite3:for-each-row + (lambda (count) + (set! res count)) + tdb + "SELECT count(id) FROM test_rundat;") + res)) + +(define (test-set-meta-info db tdb run-id testname itemdat #!key (minutes #f)) + (let* ((num-records (test:tdb-get-rundat-count tdb)) + (item-path (item-list->path itemdat)) + (cpuload (get-cpu-load)) + (diskfree (get-df (current-directory)))) + (if (eq? (modulo num-records 10) 0) ;; every ten records update central + (begin + (sqlite3:execute db "UPDATE tests SET cpuload=?,diskfree=? WHERE run_id=? AND testname=? AND item_path=?;" + cpuload + diskfree + run-id + testname + item-path) + (if (eq? num-records 0) + (begin + (sqlite3:execute db "UPDATE tests SET uname=?,host=? WHERE run_id=? AND testname=? AND item_path=?;" + (get-uname "-srvpio") (get-host-name) run-id testname item-path) + (if minutes + (sqlite3:execute db "UPDATE tests SET minutes=? WHERE run_id=? AND testname=? AND item_path=?;" + minutes run-id testname item-path)))))) + (sqlite3:execute tdb "INSERT INTO test_rundat (cpuload,diskfree) VALUES (?,?);" + cpuload diskfree))) + ;;====================================================================== ;; A R C H I V I N G ;;======================================================================