Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -157,35 +157,36 @@ (db:set-var db "MEGATEST_VERSION" megatest-version) )) ;; Create the sqlite db for the individual test(s) (define (open-test-db testpath) - (let* ((dbpath (conc testpath "/.testdat.db")) + (let* ((dbpath (conc testpath "/testdat.db")) (dbexists (file-exists? dbpath)) (db (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) (handler (make-busy-timeout (if (args:get-arg "-override-timeout") (string->number (args:get-arg "-override-timeout")) 36000)))) (debug:print 4 "INFO: test dbpath=" dbpath) (sqlite3:set-busy-handler! db handler) (if (not dbexists) - (db:testdb-initialize db)) + (begin + (sqlite3:execute db "PRAGMA synchronous = FULL;") + (debug:print 0 "Initialized test database " dbpath) + (db:testdb-initialize db))) (sqlite3:execute db "PRAGMA synchronous = 0;") db)) (define (db:testdb-initialize db) (for-each (lambda (sqlcmd) - (sqlite3:exectute db sqlcmd)) + (sqlite3:execute db sqlcmd)) (list "CREATE TABLE IF NOT EXISTS test_rundat ( id INTEGER PRIMARY KEY, - event_time TIMESTAMP, + update_time TIMESTAMP, cpuload INTEGER DEFAULT -1, - uname TEXT DEFAULT '', diskfree INTEGER DEFAULT -1, - diskusage INTGER DEFAULT -1, - run_duration INTEGER DEFAULT 0);" + diskusage INTGER DEFAULT -1);" "CREATE TABLE IF NOT EXISTS test_data ( id INTEGER PRIMARY KEY, test_id INTEGER, category TEXT DEFAULT '', variable TEXT, @@ -785,53 +786,53 @@ ;;====================================================================== ;; QUEUE UP META, TEST STATUS AND STEPS ;;====================================================================== -(define (db:updater db) - (let loop ((start-time (current-time))) - (thread-sleep! 0.5) ;; move save time around to minimize regular collisions? - (db:write-cached-data db) - (loop start-time))) - -(define (db:test-update-meta-info db test-id minutes cpuload diskfree tmpfree) - (mutex-lock! *incoming-mutex*) - (set! *incoming-data* (cons (vector 'meta-info - (current-seconds) - (list cpuload - diskfree - minutes - test-id)) ;; run-id test-name item-path minutes cpuload diskfree tmpfree) - *incoming-data*)) - (mutex-unlock! *incoming-mutex*) - (if *cache-on* - (debug:print 6 "INFO: *cache-on* is " *cache-on* ", skipping cache write as part of test-update-meta-info") - (db:write-cached-data db))) - -(define (db:write-cached-data db) - (let ((meta-stmt (sqlite3:prepare db "UPDATE tests SET cpuload=?,diskfree=?,run_duration=?,state='RUNNING' WHERE id=? AND state NOT IN ('COMPLETED','KILLREQ','KILLED');")) - (step-stmt (sqlite3:prepare db "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);")) ;; strftime('%s','now')#f) - (data (sort *incoming-data* (lambda (a b)(< (vector-ref a 1)(vector-ref b 1)))))) - (if (> (length data) 0) - (debug:print 4 "Writing cached data " data)) - (mutex-lock! *incoming-mutex*) - (sqlite3:with-transaction - db - (lambda () - (for-each (lambda (entry) - (case (vector-ref entry 0) - ((meta-info) - (apply sqlite3:execute meta-stmt (vector-ref entry 2))) - ((step-status) - (apply sqlite3:execute step-stmt (vector-ref entry 2))) - (else - (debug:print 0 "ERROR: Queued entry not recognised " entry)))) - data))) - (sqlite3:finalize! meta-stmt) ;; sqlite is the bottleneck, clear the statements asap? - (sqlite3:finalize! step-stmt) - (set! *incoming-data* '()) - (mutex-unlock! *incoming-mutex*))) +;; (define (db:updater db) +;; (let loop ((start-time (current-time))) +;; (thread-sleep! 0.5) ;; move save time around to minimize regular collisions? +;; (db:write-cached-data db) +;; (loop start-time))) +;; +;; (define (db:test-update-meta-info db test-id minutes cpuload diskfree tmpfree) +;; (mutex-lock! *incoming-mutex*) +;; (set! *incoming-data* (cons (vector 'meta-info +;; (current-seconds) +;; (list cpuload +;; diskfree +;; minutes +;; test-id)) ;; run-id test-name item-path minutes cpuload diskfree tmpfree) +;; *incoming-data*)) +;; (mutex-unlock! *incoming-mutex*) +;; (if *cache-on* +;; (debug:print 6 "INFO: *cache-on* is " *cache-on* ", skipping cache write as part of test-update-meta-info") +;; (db:write-cached-data db))) +;; +;; (define (db:write-cached-data db) +;; (let ((meta-stmt (sqlite3:prepare db "UPDATE tests SET cpuload=?,diskfree=?,run_duration=?,state='RUNNING' WHERE id=? AND state NOT IN ('COMPLETED','KILLREQ','KILLED');")) +;; (step-stmt (sqlite3:prepare db "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);")) ;; strftime('%s','now')#f) +;; (data (sort *incoming-data* (lambda (a b)(< (vector-ref a 1)(vector-ref b 1)))))) +;; (if (> (length data) 0) +;; (debug:print 4 "Writing cached data " data)) +;; (mutex-lock! *incoming-mutex*) +;; (sqlite3:with-transaction +;; db +;; (lambda () +;; (for-each (lambda (entry) +;; (case (vector-ref entry 0) +;; ((meta-info) +;; (apply sqlite3:execute meta-stmt (vector-ref entry 2))) +;; ((step-status) +;; (apply sqlite3:execute step-stmt (vector-ref entry 2))) +;; (else +;; (debug:print 0 "ERROR: Queued entry not recognised " entry)))) +;; data))) +;; (sqlite3:finalize! meta-stmt) ;; sqlite is the bottleneck, clear the statements asap? +;; (sqlite3:finalize! step-stmt) +;; (set! *incoming-data* '()) +;; (mutex-unlock! *incoming-mutex*))) (define (db:roll-up-pass-fail-counts db run-id test-name item-path status) (if (and (not (equal? item-path "")) (or (equal? status "PASS") (equal? status "WARN") Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -64,10 +64,11 @@ (runname (assoc/default 'runname cmdinfo)) (megatest (assoc/default 'megatest cmdinfo)) (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo)) (fullrunscript (if runscript (conc testpath "/" runscript) #f)) (db #f) + (tdb #f) (rollup-status 0)) (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name)) ;; apply pre-overrides before other variables. The pre-override vars must not ;; clobbers things from the official sources such as megatest.config and runconfigs.config @@ -91,26 +92,36 @@ (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path))) (change-directory top-path) (if (not (setup-for-run)) (begin (debug:print 0 "Failed to setup, exiting") + ;; (sqlite3:finalize! db) + ;; (sqlite3:finalize! tdb) (exit 1))) (change-directory *toppath*) ;; now can find our db (set! db (open-db)) (if (not (args:get-arg "-server")) (server:client-setup db)) ;; (set! *cache-on* #t) (set-megatest-env-vars db run-id) ;; these may be needed by the launching process (change-directory work-area) + + ;; Open up the test specific database + (set! tdb (open-test-db work-area)) + (on-exit (lambda () + (debug:print 0 "Finalizing both tdb and db!!!") + (sqlite3:finalize! tdb) + (sqlite3:finalize! db))) + (set-run-config-vars db run-id) ;; environment overrides are done *before* the remaining critical envars. (alist->env-vars env-ovrd) (set-megatest-env-vars db run-id) (set-item-env-vars itemdat) (save-environment-as-files "megatest") - (test-set-meta-info db run-id test-name itemdat) + (test-set-meta-info db tdb run-id test-name itemdat) (test-set-status! db test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f) (if (args:get-arg "-xterm") (set! fullrunscript "xterm") (if (and fullrunscript (not (file-execute-access? fullrunscript))) (system (conc "chmod ug+x " fullrunscript)))) @@ -117,15 +128,10 @@ ;; We are about to actually kick off the test ;; so this is a good place to remove the records for ;; any previous runs ;; (db:test-remove-steps db run-id testname itemdat) - ;; from here on out we will open and close the db - ;; on every access to reduce the probablitiy of - ;; contention or stuck access on nfs. - (sqlite3:finalize! db) - (let* ((m (make-mutex)) (kill-job? #f) (exit-info (vector #t #t #t)) (job-thread #f) (runit (lambda () @@ -254,20 +260,22 @@ (- (current-seconds) start-seconds))))) (kill-tries 0)) (let loop ((minutes (calc-minutes))) - (let* ((db (open-db)) - (cpuload (get-cpu-load)) - (diskfree (get-df (current-directory))) - (tmpfree (get-df "/tmp"))) - (if (not (args:get-arg "-server")) - (server:client-setup db)) - (if (not cpuload) (begin (debug:print 0 "WARNING: CPULOAD not found.") (set! cpuload "n/a"))) - (if (not diskfree) (begin (debug:print 0 "WARNING: DISKFREE not found.") (set! diskfree "n/a"))) + ;; (let* (;; (db (open-db)) + ;; (cpuload (get-cpu-load)) + ;; (diskfree (get-df (current-directory))) + ;; (tmpfree (get-df "/tmp"))) + (begin + ;; (if (not (args:get-arg "-server")) + ;; (server:client-setup db)) + ;; (if (not cpuload) (begin (debug:print 0 "WARNING: CPULOAD not found.") (set! cpuload "n/a"))) + ;; (if (not diskfree) (begin (debug:print 0 "WARNING: DISKFREE not found.") (set! diskfree "n/a"))) (set! kill-job? (test-get-kill-request db run-id test-name itemdat)) - (rdb:test-update-meta-info db test-id minutes cpuload diskfree tmpfree) + (test-set-meta-info db tdb run-id testname itemdat minutes: minutes) + ;; (rdb:test-update-meta-info db test-id minutes cpuload diskfree tmpfree) (if kill-job? (begin (mutex-lock! m) (let* ((pid (vector-ref exit-info 0))) (if (number? pid) @@ -289,26 +297,27 @@ (begin (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process") (test-set-status! db test-id "KILLED" "FAIL" (args:get-arg "-m") #f) (sqlite3:finalize! db) + (sqlite3:finalize! tdb) (exit 1)))) (set! kill-tries (+ 1 kill-tries)) (mutex-unlock! m))) - (sqlite3:finalize! db) + ;; (sqlite3:finalize! db) (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses (loop (calc-minutes))))))) (th1 (make-thread monitorjob)) (th2 (make-thread runit))) (set! job-thread th2) (thread-start! th1) (thread-start! th2) (thread-join! th2) (mutex-lock! m) - (set! db (open-db)) - (if (not (args:get-arg "-server")) - (server:client-setup db)) + ;; (set! db (open-db)) + ;; (if (not (args:get-arg "-server")) + ;; (server:client-setup db)) (let* ((item-path (item-list->path itemdat)) (testinfo (rdb:get-test-info db run-id test-name item-path))) (if (not (equal? (db:test-get-state testinfo) "COMPLETED")) (begin (debug:print 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status) @@ -341,10 +350,11 @@ ;; (exec-results (cmd-run->list fullrunscript)) ;; (list ">" (conc test-name "-run.log")))) ;; (success exec-results)) ;; (eq? (cadr exec-results) 0))) (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n") (sqlite3:finalize! db) + (sqlite3:finalize! tdb) (if (not (vector-ref exit-info 1)) (exit 4))))))) ;; set up the very basics needed for doing anything here. (define (setup-for-run) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -401,24 +401,39 @@ (define (test-get-kill-request db run-id test-name itemdat) (let* ((item-path (item-list->path itemdat)) (testdat (db:get-test-info db run-id test-name item-path))) (equal? (test:get-state testdat) "KILLREQ"))) -(define (test-set-meta-info db run-id testname itemdat) - (let ((item-path (item-list->path itemdat)) - (cpuload (get-cpu-load)) - (hostname (get-host-name)) - (diskfree (get-df (current-directory))) - (uname (get-uname "-srvpio"))) - (sqlite3:execute db "UPDATE tests SET host=?,cpuload=?,diskfree=?,uname=? WHERE run_id=? AND testname=? AND item_path=?;" - hostname - cpuload - diskfree - uname - run-id - testname - item-path))) +(define (test:tdb-get-rundat-count tdb) + (let ((res 0)) + (sqlite3:for-each-row + (lambda (count) + (set! res count)) + tdb + "SELECT count(id) FROM test_rundat;") + res)) + +(define (test-set-meta-info db tdb run-id testname itemdat) + (let* ((num-records (test:tdb-get-rundat-count tdb)) + (item-path (item-list->path itemdat)) + (cpuload (get-cpu-load)) + ;; (hostname (get-host-name)) + (diskfree (get-df (current-directory)))) + (if (eq? (modulo num-records 10) 0) ;; every ten records update central + (begin + (sqlite3:execute db "UPDATE tests SET cpuload=?,diskfree=? WHERE run_id=? AND testname=? AND item_path=?;" + cpuload + diskfree + run-id + testname + item-path) + (if (eq? num-records 0) + (sqlite3:execute db "UPDATE tests SET uname=?,hostname=? WHERE run_id=? AND testname=? AND item_path=?;" + (get-uname "-srvpio") (get-host-name) run-id testname item-path)))) + (sqlite3:execute tdb "INSERT INTO test_rundat (cpuload,diskfree) VALUES (?,?);" + cpuload diskfree))) + ;;====================================================================== ;; A R C H I V I N G ;;======================================================================