Index: dashboard-guimonitor.scm ================================================================== --- dashboard-guimonitor.scm +++ dashboard-guimonitor.scm @@ -179,11 +179,11 @@ ;; (print "On-exit called") (tasks:remove-monitor-record tdb) (sqlite3:finalize! tdb)))) (define (gui-monitor db) - (let ((keys (rdb:get-keys db)) + (let ((keys (db:get-keys db)) (tdb (tasks:open-db))) (tasks:register-monitor db tdb) ;;; let the other monitors know we are here (control-panel db tdb keys) ;(tasks:remove-monitor-record db) ;(sqlite3:finalize! db) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -23,16 +23,16 @@ ;;====================================================================== (define (tasks:open-db) (let* ((dbpath (conc *toppath* "/monitor.db")) (exists (file-exists? dbpath)) - (tdb (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) + (mdb (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) (handler (make-busy-timeout 36000))) - (sqlite3:set-busy-handler! tdb handler) + (sqlite3:set-busy-handler! mdb handler) (if (not exists) (begin - (sqlite3:execute tdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, action TEXT DEFAULT '', owner TEXT, state TEXT DEFAULT 'new', target TEXT DEFAULT '', name TEXT DEFAULT '', @@ -40,18 +40,27 @@ item TEXT DEFAULT '', keylock TEXT, params TEXT, creation_time TIMESTAMP, execution_time TIMESTAMP);") - (sqlite3:execute tdb "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY, + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY, pid INTEGER, start_time TIMESTAMP, last_update TIMESTAMP, hostname TEXT, username TEXT, - CONSTRAINT monitors_constraint UNIQUE (pid,hostname));"))) - tdb)) + CONSTRAINT monitors_constraint UNIQUE (pid,hostname));") + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS servers (id INTEGER PRIMARY KEY, + pid INTEGER, + hostname TEXT, + port INTEGER, + start_time TIMESTAMP, + priority INTEGER, + state TEXT;") + + )) + mdb)) ;;====================================================================== ;; Tasks and Task monitors ;;====================================================================== @@ -65,32 +74,32 @@ ;;====================================================================== ;; Task Monitors ;;====================================================================== -(define (tasks:register-monitor db tdb) +(define (tasks:register-monitor db mdb) (let* ((pid (current-process-id)) (hostname (get-host-name)) (userinfo (user-information (current-user-id))) (username (car userinfo))) (print "Register monitor, pid: " pid ", hostname: " hostname ", username: " username) - (sqlite3:execute tdb "INSERT INTO monitors (pid,start_time,last_update,hostname,username) VALUES (?,strftime('%s','now'),strftime('%s','now'),?,?);" + (sqlite3:execute mdb "INSERT INTO monitors (pid,start_time,last_update,hostname,username) VALUES (?,strftime('%s','now'),strftime('%s','now'),?,?);" pid hostname username))) -(define (tasks:get-num-alive-monitors tdb) +(define (tasks:get-num-alive-monitors mdb) (let ((res 0)) (sqlite3:for-each-row (lambda (count) (set! res count)) - tdb + mdb "SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;" (car (user-information (current-user-id)))) res)) ;; register a task -(define (tasks:add tdb action owner target runname test item params) - (sqlite3:execute tdb "INSERT INTO tasks_queue (action,owner,state,target,name,test,item,params,creation_time,execution_time) +(define (tasks:add mdb action owner target runname test item params) + (sqlite3:execute mdb "INSERT INTO tasks_queue (action,owner,state,target,name,test,item,params,creation_time,execution_time) VALUES (?,?,'new',?,?,?,?,?,strftime('%s','now'),0);" action owner target runname @@ -105,28 +114,28 @@ (set! tmp (conc tmp "/" (hash-table-ref/default key-params (vector-ref key 0) "")))) (cdr keys))) tmp)) ;; for use from the gui -(define (tasks:add-from-params tdb action keys key-params var-params) +(define (tasks:add-from-params mdb action keys key-params var-params) (let ((target (keys:key-vals-hash->target keys key-params)) (owner (car (user-information (current-user-id)))) (runname (hash-table-ref/default var-params "runname" #f)) (testpatts (hash-table-ref/default var-params "testpatts" "%")) (itempatts (hash-table-ref/default var-params "itempatts" "%")) (params (hash-table-ref/default var-params "params" ""))) - (tasks:add tdb action owner target runname testpatts itempatts params))) + (tasks:add mdb action owner target runname testpatts itempatts params))) ;; return one task from those who are 'new' OR 'waiting' AND more than 10sec old ;; -(define (tasks:snag-a-task tdb) +(define (tasks:snag-a-task mdb) (let ((res #f) (keytxt (conc (current-process-id) "-" (get-host-name) "-" (car (user-information (current-user-id)))))) ;; first randomly set a new to pid-hostname-hostname (sqlite3:execute - tdb + mdb "UPDATE tasks_queue SET keylock=? WHERE id IN (SELECT id FROM tasks_queue WHERE state='new' OR (state='waiting' AND (strftime('%s','now')-execution_time) > 10) OR state='reset' @@ -133,92 +142,92 @@ ORDER BY RANDOM() LIMIT 1);" keytxt) (sqlite3:for-each-row (lambda (id . rem) (set! res (apply vector id rem))) - tdb + mdb "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time FROM tasks_queue WHERE keylock=? ORDER BY execution_time ASC LIMIT 1;" keytxt) (if res ;; yep, have work to be done (begin - (sqlite3:execute tdb "UPDATE tasks_queue SET state='inprogress',execution_time=strftime('%s','now') WHERE id=?;" + (sqlite3:execute mdb "UPDATE tasks_queue SET state='inprogress',execution_time=strftime('%s','now') WHERE id=?;" (tasks:task-get-id res)) res) #f))) -(define (tasks:reset-stuck-tasks tdb) +(define (tasks:reset-stuck-tasks mdb) (let ((res '())) (sqlite3:for-each-row (lambda (id delta) (set! res (cons id res))) - tdb + mdb "SELECT id,strftime('%s','now')-execution_time AS delta FROM tasks_queue WHERE state='inprogress' AND delta>700 ORDER BY delta DESC LIMIT 2;") (sqlite3:execute - tdb + mdb (conc "UPDATE tasks_queue SET state='reset' WHERE id IN ('" (string-intersperse (map conc res) "','") "');")))) ;; return all tasks in the tasks_queue table ;; -(define (tasks:get-tasks tdb types states) +(define (tasks:get-tasks mdb types states) (let ((res '())) (sqlite3:for-each-row (lambda (id . rem) (set! res (cons (apply vector id rem) res))) - tdb + mdb (conc "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time FROM tasks_queue " ;; WHERE ;; state IN " statesstr " AND ;; action IN " actionsstr " ORDER BY creation_time DESC;")) res)) ;; remove tasks given by a string of numbers comma separated -(define (tasks:remove-queue-entries tdb task-ids) - (sqlite3:execute tdb (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");"))) +(define (tasks:remove-queue-entries mdb task-ids) + (sqlite3:execute mdb (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");"))) ;; -(define (tasks:start-monitor db tdb) - (if (> (tasks:get-num-alive-monitors tdb) 2) ;; have two running, no need for more +(define (tasks:start-monitor db mdb) + (if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more (debug:print-info 1 "Not starting monitor, already have more than two running") (let* ((megatestdb (conc *toppath* "/megatest.db")) (monitordbf (conc *toppath* "/monitor.db")) (last-db-update 0)) ;; (file-modification-time megatestdb))) - (task:register-monitor tdb) + (task:register-monitor mdb) (let loop ((count 0) (next-touch 0)) ;; next-touch is the time where we need to update last_update ;; if the db has been modified we'd best look at the task queue (let ((modtime (file-modification-time megatestdbpath ))) (if (> modtime last-db-update) - (tasks:process-queue db tdb last-db-update megatestdb next-touch)) + (tasks:process-queue db mdb last-db-update megatestdb next-touch)) ;; WARNING: Possible race conditon here!! ;; should this update be immediately after the task-get-action call above? (if (> (current-seconds) next-touch) (begin - (tasks:monitors-update tdb) + (tasks:monitors-update mdb) (loop (+ count 1)(+ (current-seconds) 240))) (loop (+ count 1) next-touch))))))) -(define (tasks:process-queue db tdb) - (let* ((task (tasks:snag-a-task tdb)) +(define (tasks:process-queue db mdb) + (let* ((task (tasks:snag-a-task mdb)) (action (if task (tasks:task-get-action task) #f))) (if action (print "tasks:process-queue task: " task)) (if action (case (string->symbol action) - ((run) (tasks:start-run db tdb task)) - ((remove) (tasks:remove-runs db tdb task)) - ((lock) (tasks:lock-runs db tdb task)) + ((run) (tasks:start-run db mdb task)) + ((remove) (tasks:remove-runs db mdb task)) + ((lock) (tasks:lock-runs db mdb task)) ;; ((monitor) (tasks:start-monitor db task)) - ((rollup) (tasks:rollup-runs db tdb task)) - ((updatemeta)(tasks:update-meta db tdb task)) - ((kill) (tasks:kill-monitors db tdb task)))))) + ((rollup) (tasks:rollup-runs db mdb task)) + ((updatemeta)(tasks:update-meta db mdb task)) + ((kill) (tasks:kill-monitors db mdb task)))))) -(define (tasks:get-monitors tdb) +(define (tasks:get-monitors mdb) (let ((res '())) (sqlite3:for-each-row (lambda (a . rem) (set! res (cons (apply vector a rem) res))) - tdb + mdb "SELECT id,pid,strftime('%m/%d/%Y %H:%M',datetime(start_time,'unixepoch'),'localtime'),strftime('%m/%d/%Y %H:%M:%S',datetime(last_update,'unixepoch'),'localtime'),hostname,username FROM monitors ORDER BY last_update ASC;") (reverse res) )) (define (tasks:tasks->text tasks) @@ -253,31 +262,31 @@ monitors) "\n")))) ;; update the last_update field with the current time and ;; if any monitors appear dead, remove them -(define (tasks:monitors-update tdb) - (sqlite3:execute tdb "UPDATE monitors SET last_update=strftime('%s','now') WHERE pid=? AND hostname=?;" +(define (tasks:monitors-update mdb) + (sqlite3:execute mdb "UPDATE monitors SET last_update=strftime('%s','now') WHERE pid=? AND hostname=?;" (current-process-id) (get-host-name)) (let ((deadlist '())) (sqlite3:for-each-row (lambda (id pid host last-update delta) (print "Going to delete stale record for monitor with pid " pid " on host " host " last updated " delta " seconds ago") (set! deadlist (cons id deadlist))) - tdb + mdb "SELECT id,pid,hostname,last_update,strftime('%s','now')-last_update AS delta FROM monitors WHERE delta > 700;") - (sqlite3:execute tdb (conc "DELETE FROM monitors WHERE id IN ('" (string-intersperse (map conc deadlist) "','") "');"))) + (sqlite3:execute mdb (conc "DELETE FROM monitors WHERE id IN ('" (string-intersperse (map conc deadlist) "','") "');"))) ) -(define (tasks:remove-monitor-record tdb) - (sqlite3:execute tdb "DELETE FROM monitors WHERE pid=? AND hostname=?;" +(define (tasks:remove-monitor-record mdb) + (sqlite3:execute mdb "DELETE FROM monitors WHERE pid=? AND hostname=?;" (current-process-id) (get-host-name))) -(define (tasks:set-state tdb task-id state) - (sqlite3:execute tdb "UPDATE tasks_queue SET state=? WHERE id=?;" +(define (tasks:set-state mdb task-id state) + (sqlite3:execute mdb "UPDATE tasks_queue SET state=? WHERE id=?;" state task-id)) ;;====================================================================== ;; The routines to process tasks @@ -284,11 +293,11 @@ ;;====================================================================== ;; NOTE: It might be good to add one more layer of checking to ensure ;; that no task gets run in parallel. -(define (tasks:start-run db tdb task) +(define (tasks:start-run db mdb task) (let ((flags (make-hash-table))) (hash-table-set! flags "-rerun" "NOT_STARTED") (if (not (string=? (tasks:task-get-params task) "")) (hash-table-set! flags "-setvars" (tasks:task-get-params task))) (print "Starting run " task) @@ -298,13 +307,13 @@ (tasks:task-get-name task) (tasks:task-get-test task) (tasks:task-get-item task) (tasks:task-get-owner task) flags) - (tasks:set-state tdb (tasks:task-get-id task) "waiting"))) + (tasks:set-state mdb (tasks:task-get-id task) "waiting"))) -(define (tasks:rollup-runs db tdb task) +(define (tasks:rollup-runs db mdb task) (let* ((flags (make-hash-table)) (keys (db:get-keys db)) (keyvallst (keys:target->keyval keys (tasks:task-get-target task)))) ;; (hash-table-set! flags "-rerun" "NOT_STARTED") (print "Starting rollup " task) @@ -312,6 +321,6 @@ (runs:rollup-run db keys keyvallst (tasks:task-get-name task) (tasks:task-get-owner task)) - (tasks:set-state tdb (tasks:task-get-id task) "waiting"))) + (tasks:set-state mdb (tasks:task-get-id task) "waiting"))) Index: tests/Makefile ================================================================== --- tests/Makefile +++ tests/Makefile @@ -48,12 +48,12 @@ cd fullrun;$(MEGATEST) $(SERVER) $(LOGGING) & cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log & cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ab -debug $(DEBUG) $(LOGGING) > ab.log 2> ab.log & cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ac -debug $(DEBUG) $(LOGGING) > ac.log 2> ac.log & cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ad -debug $(DEBUG) $(LOGGING) > ad.log 2> ad.log & - # cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log & - # cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log & + cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log & + cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log & test6: fullprep cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %/1 -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -v cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %blahha% -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -debug 10 cd fullrun;$(MEGATEST) -rollup :runname newrun -target ubuntu/nfs/none -debug 10