Index: NOTES ================================================================== --- NOTES +++ NOTES @@ -1,5 +1,9 @@ + +[87cbe68f31] +[be405e8e2e] + # FROM andyjpg on #chicken (let ((original-exit (exit-handler))) (exit-handler (lambda (#!optional (exit-code 0)) (printf "Preparing to exit...\n" exit-code) Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -390,13 +390,25 @@ (define (seconds->time-string sec) (time->string (seconds->local-time sec) "%H:%M:%S")) +(define (seconds->work-week/day-time sec) + (time->string + (seconds->local-time sec) "ww%V.%u %H:%M")) + (define (seconds->work-week/day sec) (time->string - (seconds->local-time sec) "%V.%u")) + (seconds->local-time sec) "ww%V.%u")) + +(define (seconds->year-work-week/day sec) + (time->string + (seconds->local-time sec) "%yww%V.%w")) + +(define (seconds->year-work-week/day-time sec) + (time->string + (seconds->local-time sec) "%yww%V.%w %H:%M")) ;;====================================================================== ;; Colors ;;====================================================================== Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -44,11 +44,12 @@ (list "Testname: " "Item path: " "Current state: " "Current status: " "Test comment: " - "Test id: ")) + "Test id: " + "Test date: ")) (list (iup:label "" #:expand "VERTICAL")))) (apply iup:vbox ; #:expand "YES" (list (store-label "testname" (iup:label (db:test-get-testname testdat) #:expand "HORIZONTAL") @@ -79,10 +80,15 @@ (store-label "testid" (iup:label "TestId " #:expand "HORIZONTAL") (lambda (testdat) (db:test-get-id testdat))) + (store-label "testdate" + (iup:label "TestDate " + #:expand "HORIZONTAL") + (lambda (testdat) + (seconds->work-week/day-time (db:test-get-event_time testdat)))) ))))) ;;====================================================================== ;; Test meta panel ;;====================================================================== @@ -128,26 +134,34 @@ ;;====================================================================== ;; Run info panel ;;====================================================================== (define (run-info-panel keydat testdat runname) - (iup:frame - #:title "Megatest Run Info" ; #:expand "YES" - (iup:hbox ; #:expand "YES" - (apply iup:vbox ; #:expand "YES" - (append (map (lambda (keyval) - (iup:label (conc (car keyval) " ") ; #:expand "HORIZONTAL" - )) - keydat) - (list (iup:label "runname ")(iup:label "run-id")))) - (apply iup:vbox - (append (map (lambda (keyval) - (iup:label (cadr keyval) #:expand "HORIZONTAL")) - keydat) - (list (iup:label runname) - (iup:label (conc (db:test-get-run_id testdat))) - (iup:label "" #:expand "VERTICAL"))))))) + (let* ((run-id (db:test-get-run_id testdat)) + (rundat (cdb:remote-run db:get-run-info #f run-id)) + (header (db:get-header rundat)) + (event_time (db:get-value-by-header (db:get-row rundat) + (db:get-header rundat) + "event_time"))) + (iup:frame + #:title "Megatest Run Info" ; #:expand "YES" + (iup:hbox ; #:expand "YES" + (apply iup:vbox ; #:expand "YES" + (append (map (lambda (keyval) + (iup:label (conc (car keyval) " "))) + keydat) + (list (iup:label "runname ") + (iup:label "run-id") + (iup:label "run-date")))) + (apply iup:vbox + (append (map (lambda (keyval) + (iup:label (cadr keyval) #:expand "HORIZONTAL")) + keydat) + (list (iup:label runname) + (iup:label (conc run-id)) + (iup:label (seconds->year-work-week/day-time event_time)) + (iup:label "" #:expand "VERTICAL")))))))) ;;====================================================================== ;; Host info panel ;;====================================================================== (define (host-info-panel testdat store-label) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -324,10 +324,62 @@ ;;====================================================================== ;; D B U T I L S ;;====================================================================== +;;====================================================================== +;; M A I N T E N A N C E +;;====================================================================== + +;; select end_time-now from +;; (select testname,item_path,event_time+run_duration as +;; end_time,strftime('%s','now') as now from tests where state in +;; ('RUNNING','REMOTEHOSTSTART','LAUNCED')); + + +(define (db:find-and-mark-incomplete db #!key (ovr-deadtime #f)) + (let* ((incompleted '()) + (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) + (deadtime (if (and deadtime-str + (string->number deadtime-str)) + (string->number deadtime-str) + 7200)) ;; two hours + (run-ids (db:get-run-ids db))) ;; iterate over runs to divy up the calls + (if (number? ovr-deadtime)(set! deadtime ovr-deadtime)) + (for-each + (lambda (run-id) + + ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes + ;; + (sqlite3:for-each-row + (lambda (test-id) + (set! incompleted (cons test-id incompleted))) + db + "SELECT id FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time - run_duration) > ? AND state IN ('RUNNING','REMOTEHOSTSTART');" + run-id deadtime) + + ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config + ;; + (sqlite3:for-each-row + (lambda (test-id) + (set! incompleted (cons test-id incompleted))) + db + "SELECT id FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time - run_duration) > ? AND state IN ('LAUNCHED');" + run-id (* 60 60 24))) + run-ids) + + ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. + ;; + (if (> (length incompleted) 0) + (begin + (debug:print 0 "WARNING: Marking test(s); " (string-intersperse (map conc incompleted) ", ") " as INCOMPLETE") + (sqlite3:execute + db + (conc "UPDATE tests SET state='INCOMPLETE' WHERE id IN (" + (string-intersperse (map conc incompleted) ",") + ");")))))) + ;; Clean out old junk and vacuum the database ;; ;; Ultimately do something like this: ;; ;; 1. Look at test records either deleted or part of deleted run: @@ -368,10 +420,11 @@ (sqlite3:for-each-row (lambda (tot) (debug:print-info 0 "Records count after clean: " tot)) count-stmt))) (map sqlite3:finalize! statements) (sqlite3:finalize! count-stmt) + (db:find-and-mark-incomplete db) (sqlite3:execute db "VACUUM;"))) ;;====================================================================== ;; M E T A G E T A N D S E T V A R S ;;====================================================================== @@ -722,10 +775,18 @@ (set! res (cons run-id res))) (db:get-db dbstruct #f) "SELECT id FROM runs;") (reverse res))) +(define (db:get-run-ids db) + (let ((res '())) + (sqlite3:for-each-row + (lambda (id) + (set! res (cons id res))) + db + "SELECT id FROM runs;"))) + ;;====================================================================== ;; K E Y S ;;====================================================================== ;; get key val pairs for a given run-id Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -568,11 +568,11 @@ ;; If there is already a symlink delete it and recreate it. (handle-exceptions exn (begin - (debug:print 0 "ERROR: Failed to re-create link " linktarget ((condition-property-accessor 'exn 'message) exn) ", exiting") + (debug:print 0 "ERROR: Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting") (exit)) (if (symbolic-link? lnktarget) (delete-file lnktarget)) (if (not (file-exists? lnktarget)) (create-symbolic-link test-path lnktarget))))) ;; I suspect this section was deleting test directories under some @@ -630,11 +630,12 @@ (ezsteps (> (length (hash-table-ref/default test-conf "ezsteps" '())) 0)) ;; don't send all the steps, could be big (diskspace (config-lookup test-conf "requirements" "diskspace")) (memory (config-lookup test-conf "requirements" "memory")) (hosts (config-lookup *configdat* "jobtools" "workhosts")) (remote-megatest (config-lookup *configdat* "setup" "executable")) - (run-time-limit (configf:lookup test-conf "requirements" "runtimelim")) + (run-time-limit (or (configf:lookup test-conf "requirements" "runtimelim") + (configf:lookup *configdat* "setup" "runtimelim"))) ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to ;; allow running from dashboard. Extract the path ;; from the called megatest and convert dashboard ;; or dboard to megatest (local-megatest (let* ((lm (car (argv))) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -124,10 +124,11 @@ -list-servers : list the servers -stop-server id : stop server specified by id (see output of -list-servers), use 0 to kill all -repl : start a repl (useful for extending megatest) -load file.scm : load and run file.scm + -mark-incompletes : find and mark incomplete tests Spreadsheet generation -extract-ods fname.ods : extract an open document spreadsheet from the database -pathmod path : insert path, i.e. path/runame/itempath/logfile.html will clear the field if no rundir/testname/itempath/logfile @@ -235,10 +236,11 @@ "-rebuild-db" "-cleanup-db" "-rollup" "-update-meta" "-gen-megatest-area" + "-mark-incompletes" "-logging" "-v" ;; verbose 2, more than normal (normal is 1) "-q" ;; quiet 0, errors/warnings only ) @@ -1092,10 +1094,19 @@ (debug:print 0 "Failed to setup, exiting") (exit 1))) ;; keep this one local (open-run-close db:clean-up #f) (set! *didsomething* #t))) + +(if (args:get-arg "-mark-incompletes") + (begin + (if (not (setup-for-run)) + (begin + (debug:print 0 "Failed to setup, exiting") + (exit 1))) + (open-run-close db:find-and-mark-incomplete #f) + (set! *didsomething* #t))) ;;====================================================================== ;; Wait on a run to complete ;;====================================================================== @@ -1118,11 +1129,11 @@ (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) ;; now can find our db ;; keep this one local - (open-run-close runs:update-all-test_meta db) + (open-run-close runs:update-all-test_meta #f) (set! *didsomething* #t))) ;;====================================================================== ;; Start a repl ;;====================================================================== Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -723,10 +723,15 @@ ;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > (define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry) ;; At this point the list of parent tests is expanded ;; NB// Should expand items here and then insert into the run queue. (debug:print 5 "test-records: " test-records ", flags: " (hash-table->alist flags)) + + ;; Do mark-and-find clean up of db before starting runing of quue + ;; + ;; (cdb:remote-run db:find-and-mark-incomplete #f) + (let ((run-info (cdb:remote-run db:get-run-info #f run-id)) (tests-info (mt:get-tests-for-run run-id #f '() '())) ;; qryvals: "id,testname,item_path")) (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) (test-registry (make-hash-table)) (registry-mutex (make-mutex)) @@ -734,11 +739,12 @@ (max-retries (config-lookup *configdat* "setup" "maxretries")) (max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs"))) (if (and mcj (string->number mcj)) (string->number mcj) 1))) ;; length of the register queue ahead - (reglen (if (number? reglen-in) reglen-in 1))) + (reglen (if (number? reglen-in) reglen-in 1)) + (last-time-incomplete (current-seconds))) ;; Initialize the test-registery hash with tests that already have a record ;; convert state to symbol and use that as the hash value (for-each (lambda (trec) (let ((id (db:test-get-id trec)) @@ -753,10 +759,16 @@ (let loop ((hed (car sorted-test-names)) (tal (cdr sorted-test-names)) (reg '()) ;; registered, put these at the head of tal (reruns '())) (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns)) + + ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes + ;; (if (> (current-seconds)(+ last-time-incomplete 900)) + ;; (begin + ;; (set! last-time-incomplete (current-seconds)) + ;; (cdb:remote-run db:find-and-mark-incomplete #f))) ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) (let* ((test-record (hash-table-ref test-records hed)) (test-name (tests:testqueue-get-testname test-record)) (tconfig (tests:testqueue-get-testconfig test-record)) @@ -1409,17 +1421,17 @@ (cdb:remote-run db:testmeta-update-field #f test-name fld val))))) '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9))))) ;; Update test_meta for all tests (define (runs:update-all-test_meta db) - (let ((test-names (tests:get-valid-tests))) + (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests))) (for-each (lambda (test-name) (let* ((test-conf (mt:lazy-read-test-config test-name))) ;; use the cdb:remote-run instead of passing in db (if test-conf (runs:update-test_meta test-name test-conf)))) - test-names))) + (hash-table-keys test-names)))) ;; This could probably be refactored into one complex query ... (define (runs:rollup-run keys runname user keyvals) (debug:print 4 "runs:rollup-run, keys: " keys " :runname " runname " user: " user) (let* ((db #f) Index: tests/fullrun/megatest.config ================================================================== --- tests/fullrun/megatest.config +++ tests/fullrun/megatest.config @@ -29,10 +29,19 @@ # If set to "default" the old code is used. Otherwise defaults to 200 or uses # numeric value given. # runqueue 20 +# Default runtimelim 1d 1h 1m 10s +# +runtimelim 20m + +# Deadtime - when to consider tests dead (i.e. haven't heard from them in too long) +# Number in seconds, set to 20 seconds here to trigger a little trouble. Default is +# 1800 +# +deadtime 600 # It is possible (but not recommended) to override the rsync command used # to populate the test directories. For test development the following # example can be useful #