Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -2,13 +2,13 @@ PREFIX=. SRCFILES = common.scm items.scm launch.scm \ ods.scm runconfig.scm server.scm configf.scm \ db.scm keys.scm margs.scm megatest-version.scm \ - process.scm runs.scm + process.scm runs.scm tasks.scm -GUISRCF = dashboard.scm dashboard-tests.scm +GUISRCF = dashboard.scm dashboard-tests.scm dashboard-guimonitor.scm OFILES = $(SRCFILES:%.scm=%.o) GOFILES = $(GUISRCF:%.scm=%.o) HELPERS=$(addprefix $(PREFIX)/bin/,mt_laststep mt_runstep mt_ezstep) @@ -23,10 +23,11 @@ # Special dependencies for the includes db.o launch.o runs.o dashboard-tests.o dashboard.o megatest.o : db_records.scm runs.o dashboard.o dashboard-tests.o : run_records.scm keys.o db.o runs.o launch.o megatest.o : key_records.scm +tasks.o dashboard-tasks.o : task_records.scm $(OFILES) $(GOFILES) : common_records.scm %.o : %.scm csc -c $< Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -111,11 +111,11 @@ (lambda (testmeta)(db:testmeta-get-reviewed testmeta))) (store-meta "tags" (iup:label (db:testmeta-get-tags testmeta) #:expand "HORIZONTAL") (lambda (testmeta)(db:testmeta-get-tags testmeta))) (store-meta "description" - (iup:label (db:testmeta-get-description testmeta) #:expand "HORIZONTAL") + (iup:label (db:testmeta-get-description testmeta) #:size "x50"); #:expand "HORIZONTAL") (lambda (testmeta)(db:testmeta-get-description testmeta))) ))))) ;;====================================================================== @@ -200,16 +200,16 @@ (iup:hbox (iup:label "Comment:") (iup:textbox #:action (lambda (val a b) (db:test-set-state-status-by-id *db* test-id #f #f b) (set! newcomment b)) #:value (db:test-get-comment testdat) - #:expand "YES")) + #:expand "HORIZONTAL")) (apply iup:hbox (iup:label "STATE:" #:size "30x") (let* ((btns (map (lambda (state) (let ((btn (iup:button state - #:expand "YES" #:size "50x" #:font "Courier New, -10" + #:expand "HORIZONTAL" #:size "50x" #:font "Courier New, -10" #:action (lambda (x) (db:test-set-state-status-by-id *db* test-id state #f #f) (db:test-set-state! testdat state))))) btn)) (list "COMPLETED" "NOT_STARTED" "RUNNING" "REMOTEHOSTSTART" "KILLED" "KILLREQ")))) @@ -225,11 +225,11 @@ btns)) (apply iup:hbox (iup:label "STATUS:" #:size "30x") (let* ((btns (map (lambda (status) (let ((btn (iup:button status - #:expand "YES" #:size "50x" #:font "Courier New, -10" + #:expand "HORIZONTAL" #:size "50x" #:font "Courier New, -10" #:action (lambda (x) (db:test-set-state-status-by-id *db* test-id #f status #f) (db:test-set-status! testdat status))))) btn)) (list "PASS" "WARN" "FAIL" "CHECK" "n/a" "WAIVED")))) @@ -325,11 +325,11 @@ (iup:attribute-set! lbl "TITLE" newval) ;(mutex-unlock! mx1) ))))) lbl)) (store-button store-label) - (command-text-box (iup:textbox #:expand "YES" #:font "Courier New, -10")) + (command-text-box (iup:textbox #:expand "HORIZONTAL" #:font "Courier New, -10")) (command-launch-button (iup:button "Execute!" #:action (lambda (x) (let ((cmd (iup:attribute command-text-box "VALUE"))) (system (conc cmd " &")))))) (run-test (lambda (x) (iup:attribute-set! Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -26,10 +26,11 @@ (declare (uses configf)) (declare (uses process)) (declare (uses launch)) (declare (uses runs)) (declare (uses dashboard-tests)) +(declare (uses dashboard-guimonitor)) (declare (uses megatest-version)) (include "common_records.scm") (include "db_records.scm") (include "run_records.scm") @@ -39,12 +40,12 @@ version " megatest-version " license GPL, Copyright (C) Matt Welland 2011 Usage: dashboard [options] -h : this help - -run runid : control run identified by runid -test testid : control test identified by testid + -guimonitor : control panel for runs Misc -rows N : set number of rows ")) @@ -55,10 +56,11 @@ "-run" "-test" "-debug" ) (list "-h" + "-guimonitor" "-v" "-q" ) args:arg-hash 0)) @@ -459,10 +461,11 @@ #:action (lambda (obj unk val) (set! *last-db-update-time* 0) (update-search "item-name" val))))) (iup:hbox (iup:button "Quit" #:action (lambda (obj)(sqlite3:finalize! *db*)(exit))) + (iup:button "Monitor" #:action (lambda (obj)(system (conc (car (argv))" -guimonitor &")))) )) ;; (iup:button "<- Left" #:action (lambda (obj)(set! *start-run-offset* (+ *start-run-offset* 1)))) ;; (iup:button "Up ^" #:action (lambda (obj)(set! *start-test-offset* (if (> *start-test-offset* 0)(- *start-test-offset* 1) 0)))) ;; (iup:button "Down v" #:action (lambda (obj)(set! *start-test-offset* (if (>= *start-test-offset* (length *alltestnamelst*))(length *alltestnamelst*)(+ *start-test-offset* 1))))) ;; (iup:button "Right ->" #:action (lambda (obj)(set! *start-run-offset* (if (> *start-run-offset* 0)(- *start-run-offset* 1) 0)))) @@ -658,14 +661,16 @@ (if testid (examine-test *db* testid) (begin (print "ERROR: testid is not a number " (args:get-arg "-test")) (exit 1))))) + ((args:get-arg "-guimonitor") + (gui-monitor *db*)) (else (set! uidat (make-dashboard-buttons *num-runs* *num-tests* *dbkeys*)) (iup:callback-set! *tim* "ACTION_CB" (lambda (x) (run-update x))))) ;(print x))))) (iup:main-loop) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -125,11 +125,11 @@ tol REAL, units TEXT, comment TEXT DEFAULT '', status TEXT DEFAULT 'n/a', type TEXT DEFAULT '', - CONSTRAINT test_data UNIQUE (test_id,category,variable));") + CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));") (sqlite3:execute db "CREATE TABLE IF NOT EXISTS task_queue (id INTEGER PRIMARY KEY, action TEXT DEFAULT '', owner TEXT, state TEXT DEFAULT 'new', target TEXT DEFAULT '', @@ -136,16 +136,17 @@ name TEXT DEFAULT '', test TEXT DEFAULT '', item TEXT DEFAULT '', creation_time TIMESTAMP, execution_time TIMESTAMP;") - (sqlite3:execute db "CREATE monitors (id INTEGER PRIMARY KEY, + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY, pid INTEGER, start_time TIMESTAMP, last_update TIMESTAMP, hostname TEXT, - username TEXT);") + username TEXT, + CONSTRAINT monitors_constraint UNIQUE (pid,hostname));") ;; Must do this *after* running patch db !! No more. (db:set-var db "MEGATEST_VERSION" megatest-version) )) db)) @@ -218,14 +219,31 @@ (patch-db)) ((< mver 1.27) (db:set-var db "MEGATEST_VERSION" 1.27) (sqlite3:execute db "ALTER TABLE test_data ADD COLUMN type TEXT DEFAULT '';") (patch-db)) - ((< mver 1.28) - (db:set-var db "MEGATEST_VERSION" 1.28) + ((< mver 1.29) + (db:set-var db "MEGATEST_VERSION" 1.29) (sqlite3:execute db "ALTER TABLE test_steps ADD COLUMN logfile TEXT;") (sqlite3:execute db "ALTER TABLE tests ADD COLUMN shortdir TEXT DEFAULT '';") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, + action TEXT DEFAULT '', + owner TEXT, + state TEXT DEFAULT 'new', + target TEXT DEFAULT '', + name TEXT DEFAULT '', + test TEXT DEFAULT '', + item TEXT DEFAULT '', + creation_time TIMESTAMP, + execution_time TIMESTAMP);") + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY, + pid INTEGER, + start_time TIMESTAMP, + last_update TIMESTAMP, + hostname TEXT, + username TEXT, + CONSTRAINT monitors_constraint UNIQUE (pid,hostname));") (patch-db)) ((< mver megatest-version) (db:set-var db "MEGATEST_VERSION" megatest-version)))))) ;;====================================================================== Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -1,7 +1,7 @@ ;; Always use two digit decimal ;; 1.01, 1.02...1.10,1.11 ... 1.99,2.00.. (declare (unit megatest-version)) -(define megatest-version 1.28) +(define megatest-version 1.29) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -86,18 +86,19 @@ -rerun FAIL,WARN... : re-run if called on a test that previously ran (nullified if -keepgoing is also specified) -rebuild-db : bring the database schema up to date -rollup : fill run (set by :runname) with latest test(s) from prior runs with same keys - -rename-run : rename run (set by :runname) to , requires keys -update-meta : update the tests metadata for all tests + -env2file fname : write the environment to fname.csh and fname.sh + +Spreadsheet generation -extract-ods : extract an open document spreadsheet from the database -pathmod path : insert path, i.e. path/runame/itempath/logfile.html will clear the field if no rundir/testname/itempath/logfile if it contains forward slashes the path will be converted to windows style - -env2file fname : write the environment to fname.csh and fname.sh Helpers -runstep stepname ... : take remaining params as comand and execute as stepname log will be in stepname.log. Best to put command in quotes -logpro file : with -exec apply logpro file to stepname.log, creates Index: task_records.scm ================================================================== --- task_records.scm +++ task_records.scm @@ -21,12 +21,13 @@ (define-inline (tasks:task-get-item vec) (vector-ref vec 7)) (define-inline (tasks:task-get-creation_time vec) (vector-ref vec 8)) (define-inline (tasks:task-get-execution_time vec) (vector-ref vec 9)) -;; make-vector-record tasks monitor pid start_time last_update hostname username +;; make-vector-record tasks monitor id pid start_time last_update hostname username (define (make-tasks:monitor)(make-vector 5)) -(define-inline (tasks:monitor-get-pid vec) (vector-ref vec 0)) -(define-inline (tasks:monitor-get-start_time vec) (vector-ref vec 1)) -(define-inline (tasks:monitor-get-last_update vec) (vector-ref vec 2)) -(define-inline (tasks:monitor-get-hostname vec) (vector-ref vec 3)) -(define-inline (tasks:monitor-get-username vec) (vector-ref vec 4)) +(define-inline (tasks:monitor-get-id vec) (vector-ref vec 0)) +(define-inline (tasks:monitor-get-pid vec) (vector-ref vec 1)) +(define-inline (tasks:monitor-get-start_time vec) (vector-ref vec 2)) +(define-inline (tasks:monitor-get-last_update vec) (vector-ref vec 3)) +(define-inline (tasks:monitor-get-hostname vec) (vector-ref vec 4)) +(define-inline (tasks:monitor-get-username vec) (vector-ref vec 5)) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -7,23 +7,17 @@ ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') -(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking) +(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking format) (import (prefix sqlite3 sqlite3:)) -(declare (unit runs)) +(declare (unit tasks)) (declare (uses db)) (declare (uses common)) -(declare (uses items)) -(declare (uses runconfig)) -(include "common_records.scm") -(include "key_records.scm") -(include "db_records.scm") -(include "run_records.scm") (include "task_records.scm") ;;====================================================================== ;; Tasks and Task monitors ;;====================================================================== @@ -42,10 +36,11 @@ (define (tasks:register-monitor db) (let* ((pid (current-process-id)) (hostname (get-host-name)) (userinfo (user-information (current-user-id))) (username (car userinfo))) + (print "Register monitor, pid: " pid ", hostname: " hostname ", username: " username) (sqlite3:execute db "INSERT INTO monitors (pid,start_time,last_update,hostname,username) VALUES (?,strftime('%s','now'),strftime('%s','now'),?,?);" pid hostname username))) (define (tasks:get-num-alive-monitors db) (let ((res 0)) @@ -55,58 +50,183 @@ db "SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;" (car (user-information (current-user-id)))) res)) +;; register a task +(define (tasks:add db action owner target runname test item) + (sqlite3:execute db "INSERT INTO tasks_queue (action,owner,state,target,name,test,item,creation_time,execution_time) + VALUES (?,?,'new',?,?,?,?,strftime('%s','now'),0);" + action + owner + target + runname + test + item)) + +(define (keys:key-vals-hash->target keys key-params) + (let ((tmp (hash-table-ref/default key-params (vector-ref (car keys) 0) ""))) + (if (> (length keys) 1) + (for-each (lambda (key) + (set! tmp (conc tmp "/" (hash-table-ref/default key-params (vector-ref key 0) "")))) + (cdr keys))) + tmp)) + +;; for use from the gui +(define (tasks:add-from-params db action keys key-params var-params) + (let ((target (keys:key-vals-hash->target keys key-params)) + (owner (car (user-information (current-user-id)))) + (runname (hash-table-ref/default var-params "runname" #f)) + (testpatts (hash-table-ref/default var-params "testpatts" "%")) + (itempatts (hash-table-ref/default var-params "itempatts" "%"))) + (tasks:add db action owner target runname testpatts itempatts))) + ;; return one task from those who are 'new' OR 'waiting' AND more than 10sec old ;; (define (tasks:snag-a-task db) (let ((res #f)) (with-transaction db (lambda () + ;; execution time is updated with every snag, wait 10 secs before doing anything with the queue (sqlite3:for-each-row (lambda (id . rem) (set! res (apply vector id rem))) db - "SELECT id,action,owner,state,target,name,test,item,creation_time,exectution_time + "SELECT id,action,owner,state,target,name,test,item,creation_time,execution_time FROM tasks_queue WHERE - state='new' OR (state='waiting' AND - last_update+10 > strftime('%s','now')) - LIMIT 1;") + state='new' OR + (state='waiting' AND execution_time+10 > strftime('%s','now')) OR + state='reset' + ORDER BY state ASC LIMIT 1;") (if res ;; yep, have work to be done (begin - (sqlite3:execute db "UPDATE tasks_queue SET state='inprogress' WHERE id=?;" + (sqlite3:execute db "UPDATE tasks_queue SET state='inprogress',execution_time=strftime('%s','now') WHERE id=?;" (tasks:task-get-id res)) - res)))))) + res) + #f))))) + +(define (tasks:reset-stuck-tasks db) + (let ((res '())) + (sqlite3:for-each-row + (lambda (id delta) + (set! res (cons id res))) + db + "SELECT id,strftime('%s','now')-execution_time AS delta FROM tasks_queue WHERE state='inprogress' AND delta>700 ORDER BY delta DESC LIMIT 2;") + (sqlite3:execute + db + (conc "UPDATE tasks_queue SET state='reset' WHERE id IN ('" (string-intersperse (map conc res) "','") "');")))) + +;; return all tasks in the tasks_queue table +;; +(define (tasks:get-tasks db types states) + (let ((res '())) + (sqlite3:for-each-row + (lambda (id . rem) + (set! res (cons (apply vector id rem) res))) + db + (conc "SELECT id,action,owner,state,target,name,test,item,creation_time,execution_time + FROM tasks_queue " + ;; WHERE + ;; state IN " statesstr " AND + ;; action IN " actionsstr + " ORDER BY creation_time DESC;")) + res)) (define (tasks:start-monitor db) (if (> (tasks:get-num-alive-monitors db) 2) ;; have two running, no need for more (debug:print 1 "INFO: Not starting monitor, already have more than two running") (let* ((megatestdb (conc *toppath* "/megatest.db")) (last-db-update 0)) ;; (file-modification-time megatestdb))) (task:register-monitor db) - (let loop ((count 0)) + (let loop ((count 0) + (next-touch 0)) ;; next-touch is the time where we need to update last_update ;; if the db has been modified we'd best look at the task queue - (let ((modtime (file-modification-time megatestdb))) + (let ((modtime (file-modification-time megatestdbpath ))) (if (> modtime last-db-update) - (let* ((task (tasks:snag-a-task db)) - (action (if task (tasks:task-get-action task) #f))) - (if action - (case (string->symbol action) - ((run) (tasks:start-run db task)) - ((remove) (tasks:remove-runs db task)) - ((lock) (tasks:lock-runs db task)) - ((monitor) (tasks:start-monitor db task)) - ((rollup) (tasks:rollup-runs db task)) - ((updatemeta)(tasks:update-meta db task)) - ((kill) (tasks:kill-monitors db task)))) - ;; WARNING: Possible race conditon here!! - ;; should this update be immediately after the task-get-action call above? - (set! modtime (file-modification-time megatestdb))))) - (loop (+ count 1)))))) - - - - - + (tasks:process-queue db last-db-update megatestdb next-touch)) + ;; WARNING: Possible race conditon here!! + ;; should this update be immediately after the task-get-action call above? + (if (> (current-seconds) next-touch) + (begin + (tasks:monitors-update db) + (loop (+ count 1)(+ (current-seconds) 240))) + (loop (+ count 1) next-touch))))))) + +(define (tasks:process-queue db megatestdbpath) + (let* ((task (tasks:snag-a-task db)) + (action (if task (tasks:task-get-action task) #f))) + (if action + (case (string->symbol action) + ((run) (tasks:start-run db task)) + ((remove) (tasks:remove-runs db task)) + ((lock) (tasks:lock-runs db task)) + ;; ((monitor) (tasks:start-monitor db task)) + ((rollup) (tasks:rollup-runs db task)) + ((updatemeta)(tasks:update-meta db task)) + ((kill) (tasks:kill-monitors db task)))))) + +(define (tasks:get-monitors db) + (let ((res '())) + (sqlite3:for-each-row + (lambda (a . rem) + (set! res (cons (apply vector a rem) res))) + db + "SELECT id,pid,strftime('%m/%d/%Y %H:%M',datetime(start_time,'unixepoch'),'localtime'),strftime('%m/%d/%Y %H:%M:%S',datetime(last_update,'unixepoch'),'localtime'),hostname,username FROM monitors ORDER BY last_update ASC;") + (reverse res) + )) + +(define (tasks:tasks->text tasks) + (let ((fmtstr "~10a~10a~10a~12a~20a~12a~12a~12a")) + (conc (format #f fmtstr "id" "action" "owner" "state" "target" "runname" "testpatts" "itempatts") "\n" + (string-intersperse + (map (lambda (task) + (format #f fmtstr + (tasks:task-get-id task) + (tasks:task-get-action task) + (tasks:task-get-owner task) + (tasks:task-get-state task) + (tasks:task-get-target task) + (tasks:task-get-name task) + (tasks:task-get-test task) + (tasks:task-get-item task))) + tasks) "\n")))) + +(define (tasks:monitors->text-table monitors) + (let ((fmtstr "~4a~8a~20a~20a~10a~10a")) + (conc (format #f fmtstr "id" "pid" "start time" "last update" "hostname" "user") "\n" + (string-intersperse + (map (lambda (monitor) + (format #f fmtstr + (tasks:monitor-get-id monitor) + (tasks:monitor-get-pid monitor) + (tasks:monitor-get-start_time monitor) + (tasks:monitor-get-last_update monitor) + (tasks:monitor-get-hostname monitor) + (tasks:monitor-get-username monitor))) + monitors) + "\n")))) + +;; update the last_update field with the current time and +;; if any monitors appear dead, remove them +(define (tasks:monitors-update db) + (sqlite3:execute db "UPDATE monitors SET last_update=strftime('%s','now') WHERE pid=? AND hostname=?;" + (current-process-id) + (get-host-name)) + (let ((deadlist '())) + (sqlite3:for-each-row + (lambda (id pid host last-update delta) + (print "Going to delete stale record for monitor with pid " pid " on host " host " last updated " delta " seconds ago") + (set! deadlist (cons id deadlist))) + db + "SELECT id,pid,hostname,last_update,strftime('%s','now')-last_update AS delta FROM monitors WHERE delta > 700;") + (sqlite3:execute db (conc "DELETE FROM monitors WHERE id IN ('" (string-intersperse (map conc deadlist) "','") "');"))) + ) + +(define (tasks:remove-monitor-record db) + (sqlite3:execute db "DELETE FROM monitors WHERE pid=? AND hostname=?;" + (current-process-id) + (get-host-name))) + +(define (tasks:start-run db task) + (print "Starting run " task)) Index: tests/megatest.config ================================================================== --- tests/megatest.config +++ tests/megatest.config @@ -7,14 +7,15 @@ # exectutable /path/to/megatest # max_concurrent_jobs 4 runsdir /tmp/runs [jobtools] +useshell yes # ## launcher launches jobs, the job is managed on the target host ## by megatest, comment out launcher to run local # workhosts localhost hermes -launcher nbfake +# launcher nbfake # launcher nodanggood ## use "xterm -e csi -- " as a launcher to examine the launch environment. ## exit with (exit) ## get a shell with (system "bash")