Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -6,12 +6,12 @@ ods.scm runconfig.scm server.scm configf.scm \ db.scm keys.scm margs.scm megatest-version.scm \ process.scm runs.scm tasks.scm tests.scm genexample.scm \ fs-transport.scm http-transport.scm \ client.scm gutils.scm synchash.scm daemon.scm mt.scm dcommon.scm \ - tree.scm rmt.scm api.scm tdb.scm \ - ezsteps.scm + tree.scm rmt.scm api.scm tdb.scm \ + ezsteps.scm lock-queue.scm GUISRCF = dashboard-tests.scm dashboard-guimonitor.scm OFILES = $(SRCFILES:%.scm=%.o) GOFILES = $(GUISRCF:%.scm=%.o) Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -387,11 +387,11 @@ (last-update 0) ;; (current-seconds)) (request-update #t) (db #f)) (if (not testdat) (begin - (debug:print 0 "ERROR: No test data found for test " test-id ", exiting") + (debug:print 2 "ERROR: No test data found for test " test-id ", exiting") (exit 1)) (let* ((run-id (if testdat (db:test-get-run_id testdat) #f)) (keydat (if testdat (rmt:get-key-val-pairs run-id) #f)) (rundat (if testdat (rmt:get-run-info run-id) #f)) (runname (if testdat (db:get-value-by-header (db:get-row rundat) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -360,13 +360,13 @@ (sqlite3:execute db (conc "PRAGMA synchronous = 0;")))) db)) (define (db:log-local-event . loglst) (let ((logline (apply conc loglst))) - ;; (pwd (current-directory)) - ;; (cmdline (string-intersperse (argv) " ")) - ;; (pid (current-process-id))) + ;; (pwd (current-directory)) + ;; (cmdline (string-intersperse (argv) " ")) + ;; (pid (current-process-id))) (db:log-event logline))) (define (db:log-event logline) (let ((db (open-logging-db))) (sqlite3:execute db "INSERT INTO log (logline,pwd,cmdline,pid) VALUES (?,?,?,?);" @@ -503,11 +503,11 @@ (map sqlite3:finalize! statements) (sqlite3:finalize! count-stmt) (sqlite3:execute db "VACUUM;"))) ;; (define (db:report-junk-records db) - + ;;====================================================================== ;; meta get and set vars ;;====================================================================== @@ -638,17 +638,17 @@ (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" (let ((res #f)) (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time" comma keystr ") VALUES (?,?,?,?,strftime('%s','now')" comma valslots ");") allvals) (apply sqlite3:for-each-row - (lambda (id) - (set! res id)) - db - (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");"))) - ;(debug:print 4 "qry: " qry) - qry) - qryvals) + (lambda (id) + (set! res id)) + db + (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");"))) + ;(debug:print 4 "qry: " qry) + qry) + qryvals) (sqlite3:execute db "UPDATE runs SET state=?,status=?,event_time=strftime('%s','now') WHERE id=?;" state status res) res) (begin (debug:print 0 "ERROR: Called without all necessary keys") #f)))) @@ -740,11 +740,11 @@ (let* ((stateparts (string-split state "|")) (newstate (conc (car stateparts) "\n" (cadr stateparts)))) (hash-table-set! totals newstate (+ (hash-table-ref/default totals newstate 0) count)) (set! res (cons (list runname newstate count) res)))) db - "SELECT runname,t.state||'|'||t.status AS s,count(t.id) FROM runs AS r INNER JOIN tests AS t ON r.id=t.run_id GROUP BY s,runname ORDER BY r.event_time,s DESC;" ) + "SELECT runname,t.state||'|'||t.status AS s,count(t.id) FROM runs AS r INNER JOIN tests AS t ON r.id=t.run_id GROUP BY s,runname ORDER BY r.event_time,s DESC;" ) ;; (set! res (reverse res)) (for-each (lambda (state) (set! res (cons (list "Totals" state (hash-table-ref totals state)) res))) (sort (hash-table-keys totals) string>=)) res)) @@ -791,27 +791,27 @@ ;; use (get-value-by-header (db:get-header runinfo)(db:get-row runinfo)) (define (db:get-run-info db run-id) ;;(if (hash-table-ref/default *run-info-cache* run-id #f) ;; (hash-table-ref *run-info-cache* run-id) - (let* ((res #f) - (keys (db:get-keys db)) - (remfields (list "id" "runname" "state" "status" "owner" "event_time")) - (header (append keys remfields)) - (keystr (conc (keys->keystr keys) "," - (string-intersperse remfields ",")))) - (debug:print-info 11 "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) - (sqlite3:for-each-row - (lambda (a . x) - (set! res (apply vector a x))) - db - (conc "SELECT " keystr " FROM runs WHERE id=? AND state != 'deleted';") - run-id) - (debug:print-info 11 "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) - (let ((finalres (vector header res))) - ;; (hash-table-set! *run-info-cache* run-id finalres) - finalres))) + (let* ((res #f) + (keys (db:get-keys db)) + (remfields (list "id" "runname" "state" "status" "owner" "event_time")) + (header (append keys remfields)) + (keystr (conc (keys->keystr keys) "," + (string-intersperse remfields ",")))) + (debug:print-info 11 "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) + (sqlite3:for-each-row + (lambda (a . x) + (set! res (apply vector a x))) + db + (conc "SELECT " keystr " FROM runs WHERE id=? AND state != 'deleted';") + run-id) + (debug:print-info 11 "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) + (let ((finalres (vector header res))) + ;; (hash-table-set! *run-info-cache* run-id finalres) + finalres))) (define (db:set-comment-for-run db run-id comment) (debug:print-info 11 "db:set-comment-for-run START run-id: " run-id " comment: " comment) (sqlite3:execute db "UPDATE runs SET comment=? WHERE id=?;" comment run-id) (debug:print-info 11 "db:set-comment-for-run END run-id: " run-id " comment: " comment)) @@ -867,37 +867,37 @@ (debug:print-info 11 "db:get-key-val-pairs END keys: " keys " run-id: " run-id) (reverse res))) ;; get key vals for a given run-id (define (db:get-key-vals db run-id) - (let ((mykeyvals (hash-table-ref/default *keyvals* run-id #f))) + (let ((mykeyvals (hash-table-ref/default *keyvals* run-id #f))) (if mykeyvals mykeyvals - (let* ((keys (db:get-keys db)) - (res '())) - (debug:print-info 11 "db:get-key-vals START keys: " keys " run-id: " run-id) - (for-each - (lambda (key) - (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) - ;; (debug:print 0 "qry: " qry) - (sqlite3:for-each-row - (lambda (key-val) - (set! res (cons key-val res))) - db qry run-id))) - keys) - (debug:print-info 11 "db:get-key-vals END keys: " keys " run-id: " run-id) + (let* ((keys (db:get-keys db)) + (res '())) + (debug:print-info 11 "db:get-key-vals START keys: " keys " run-id: " run-id) + (for-each + (lambda (key) + (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) + ;; (debug:print 0 "qry: " qry) + (sqlite3:for-each-row + (lambda (key-val) + (set! res (cons key-val res))) + db qry run-id))) + keys) + (debug:print-info 11 "db:get-key-vals END keys: " keys " run-id: " run-id) (let ((final-res (reverse res))) (hash-table-set! *keyvals* run-id final-res) final-res))))) ;; The target is keyval1/keyval2..., cached in *target* as it is used often (define (db:get-target db run-id) (let ((mytarg (hash-table-ref/default *target* run-id #f))) (if mytarg mytarg - (let* ((keyvals (db:get-key-vals db run-id)) - (thekey (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/"))) + (let* ((keyvals (db:get-key-vals db run-id)) + (thekey (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/"))) (hash-table-set! *target* run-id thekey) thekey)))) ;;====================================================================== ;; T E S T S @@ -930,18 +930,18 @@ " NOT IN ('" " IN ('") (string-intersperse statuses "','") "')"))) (states-statuses-qry - (cond - ((and states-qry statuses-qry) - (conc " AND ( " states-qry " AND " statuses-qry " ) ")) - (states-qry - (conc " AND " states-qry)) - (statuses-qry - (conc " AND " statuses-qry)) - (else ""))) + (cond + ((and states-qry statuses-qry) + (conc " AND ( " states-qry " AND " statuses-qry " ) ")) + (states-qry + (conc " AND " states-qry)) + (statuses-qry + (conc " AND " statuses-qry)) + (else ""))) (tests-match-qry (tests:match->sqlqry testpatt)) (qry (conc "SELECT " qryvals " FROM tests WHERE run_id=? AND state != 'DELETED' " states-statuses-qry (if tests-match-qry (conc " AND (" tests-match-qry ") ") "") @@ -952,11 +952,11 @@ (conc " ORDER BY " sort-by) ""))) (if limit (conc " LIMIT " limit) "") (if offset (conc " OFFSET " offset) "") ";" - ))) + ))) (debug:print-info 8 "db:get-tests-for-run qry=" qry) (sqlite3:for-each-row (lambda (a . b) ;; id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment) (set! res (cons (apply vector a b) res))) ;; id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment) res))) db @@ -977,13 +977,13 @@ ;; i.e. these lists define what to NOT show. ;; states and statuses are required to be lists, empty is ok ;; not-in #t = above behaviour, #f = must match ;; run-ids is a list of run-ids or a single number or #f for all runs (define (db:get-tests-for-runs db run-ids testpatt states statuses - #!key (not-in #t) - (sort-by #f) - (qryvals "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment")) ;; 'rundir 'event_time + #!key (not-in #t) + (sort-by #f) + (qryvals "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment")) ;; 'rundir 'event_time (debug:print-info 11 "db:get-tests-for-run START run-ids=" run-ids ", testpatt=" testpatt ", states=" states ", statuses=" statuses ", not-in=" not-in ", sort-by=" sort-by) (let* ((res '()) ;; if states or statuses are null then assume match all when not-in is false (states-qry (if (null? states) #f @@ -1012,11 +1012,11 @@ (if tests-match-qry (conc " AND (" tests-match-qry ") ") "") (case sort-by ((rundir) " ORDER BY length(rundir) DESC;") ((event_time) " ORDER BY event_time ASC;") (else ";")) - ))) + ))) (debug:print-info 8 "db:get-tests-for-run qry=" qry) (sqlite3:for-each-row (lambda (a . b) ;; id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment) (set! res (cons (apply vector a b) res))) ;; id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment) res))) db @@ -1105,29 +1105,35 @@ (define (cdb:tests-update-run-duration serverdat test-id minutes) (cdb:client-call serverdat 'update-run-duration #t *default-numtries* minutes test-id)) (define (cdb:tests-update-uname-host serverdat test-id uname hostname) - (cdb:client-call serverdat 'update-uname-host #t *default-numtries* test-id uname hostname)) + (cdb:client-call serverdat 'update-uname-host #t *default-numtries* uname hostname test-id)) + +(define (db:process-triggers test-id newstate newstatus) + #t) ;; speed up for common cases with a little logic +;; NB// Ultimately this will be deprecated in deference to mt:test-set-state-status-by-id +;; (define (db:test-set-state-status-by-id db test-id newstate newstatus newcomment) (cond ((and newstate newstatus newcomment) (sqlite3:execute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus newcomment test-id)) ((and newstate newstatus) (sqlite3:execute db "UPDATE tests SET state=?,status=? WHERE id=?;" newstate newstatus test-id)) (else (if newstate (sqlite3:execute db "UPDATE tests SET state=? WHERE id=?;" newstate test-id)) (if newstatus (sqlite3:execute db "UPDATE tests SET status=? WHERE id=?;" newstatus test-id)) - (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment test-id)) - )) + (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment test-id)))) + (db:process-triggers test-id newstate newstatus) #t) ;; retrun something to keep the remote calls happy -(define (db:test-set-state-status-by-run-id-testname db run-id test-name item-path status state) - (sqlite3:execute db "UPDATE tests SET state=?,status=?,event_time=strftime('%s','now') WHERE run_id=? AND testname=? AND item_path=?;" - state status run-id test-name item-path)) +;; Never used +;; (define (db:test-set-state-status-by-run-id-testname db run-id test-name item-path status state) +;; (sqlite3:execute db "UPDATE tests SET state=?,status=?,event_time=strftime('%s','now') WHERE run_id=? AND testname=? AND item_path=?;" +;; state status run-id test-name item-path)) (define (db:get-count-tests-running db) (let ((res 0)) (sqlite3:for-each-row (lambda (count) @@ -1281,11 +1287,11 @@ (define (db:get-test-info db run-id testname item-path) (db:get-test-info-by-id db (db:get-test-id db run-id testname item-path))) (define (db:test-set-comment db test-id comment) (sqlite3:execute - db + db "UPDATE tests SET comment=? WHERE id=?;" comment test-id)) (define (cdb:test-set-rundir! serverdat run-id test-name item-path rundir) (cdb:client-call serverdat 'test-set-rundir #t *default-numtries* rundir run-id test-name item-path)) @@ -1315,11 +1321,11 @@ ;; BUG: Move the values derived from args to parameters and push to megatest.scm (let* ((testpatt (if (args:get-arg "-testpatt")(args:get-arg "-testpatt") "%")) (statepatt (if (args:get-arg ":state") (args:get-arg ":state") "%")) (statuspatt (if (args:get-arg ":status") (args:get-arg ":status") "%")) (runname (if (args:get-arg ":runname") (args:get-arg ":runname") "%")) - (paths-from-db (cdb:remote-run db:test-get-paths-matching-keynames-target db keynames target res + (paths-from-db (cdb:remote-run db:test-get-paths-matching-keynames-target-new db keynames target res testpatt: testpatt statepatt: statepatt statuspatt: statuspatt runname: runname))) (if fnamepatt @@ -1346,17 +1352,46 @@ (testqry (tests:match->sqlqry testpatt)) (qrystr (conc "SELECT t.rundir FROM tests AS t INNER JOIN runs AS r ON t.run_id=r.id WHERE " keystr " AND r.runname LIKE '" runname "' AND " testqry " AND t.state LIKE '" statepatt "' AND t.status LIKE '" statuspatt "' ORDER BY t.event_time ASC;"))) - (debug:print 3 "qrystr: " qrystr) (sqlite3:for-each-row (lambda (p) (set! res (cons p res))) db qrystr) res)) + +(define (db:test-get-paths-matching-keynames-target-new db keynames target res + #!key + (testpatt "%") + (statepatt "%") + (statuspatt "%") + (runname "%")) + (let* ((row-ids '()) + (keystr (string-intersperse + (map (lambda (key val) + (conc key " like '" val "'")) + keynames + (string-split target "/")) + " AND ")) + (testqry (tests:match->sqlqry testpatt)) + (runsqry (sqlite3:prepare db (conc "SELECT id FROM runs WHERE " keystr " AND runname LIKE '" runname "';"))) + (tstsqry (sqlite3:prepare db (conc "SELECT rundir FROM tests WHERE run_id=? AND " testqry " AND state LIKE '" statepatt "' AND status LIKE '" statuspatt "' ORDER BY event_time ASC;")))) + (sqlite3:for-each-row + (lambda (rid) + (set! row-ids (cons rid row-ids))) + runsqry) + (for-each (lambda (rid) + (sqlite3:for-each-row + (lambda (p) + (set! res (cons p res))) + tstsqry rid)) + row-ids) + (sqlite3:finalize! tstsqry) + (sqlite3:finalize! runsqry) + res)) ;; look through tests from matching runs for a file (define (db:test-get-first-path-matching db keynames target fname) ;; [refpaths] is the section where references to other megatest databases are stored (let ((mt-paths (configf:get-section "refpaths")) @@ -1392,28 +1427,28 @@ ((zmq)(with-output-to-string (lambda ()(serialize obj)))) (else obj))) (define (db:string->obj msg) (case *transport-type* - ((fs) msg) - ((http) - (if (string? msg) - (with-input-from-string - (base64:base64-decode - (string-substitute - (regexp "_") "=" msg #t)) - (lambda ()(deserialize))) - (vector #f #f #f))) ;; crude reply for when things go awry - ((zmq)(with-input-from-string msg (lambda ()(deserialize)))) - (else msg))) + ((fs) msg) + ((http) + (if (string? msg) + (with-input-from-string + (base64:base64-decode + (string-substitute + (regexp "_") "=" msg #t)) + (lambda ()(deserialize))) + (vector #f #f #f))) ;; crude reply for when things go awry + ((zmq)(with-input-from-string msg (lambda ()(deserialize)))) + (else msg))) (define (cdb:use-non-blocking-mode proc) (set! *client-non-blocking-mode* #t) (let ((res (proc))) (set! *client-non-blocking-mode* #f) res)) - + ;; params = 'target cached remparams ;; ;; make-vector-record cdb packet client-sig qtype immediate query-sig params qtime ;; ;; cdb:client-call is the unified interface to all the transports. It dispatches the @@ -1469,35 +1504,35 @@ ;; now get the actual message (let ((myres (db:string->obj (receive-message* sub-socket)))) (if (equal? query-sig (vector-ref myres 1)) (set! res (vector-ref myres 2)) (loop))))))) - ;; (timeout (lambda () - ;; (let loop ((n numretries)) - ;; (thread-sleep! 15) - ;; (if (not res) - ;; (if (> numretries 0) - ;; (begin - ;; (debug:print 2 "WARNING: no reply to query " params ", trying resend") - ;; (debug:print-info 11 "re-sending message") - ;; (send-message push-socket zdat) - ;; (debug:print-info 11 "message re-sent") - ;; (loop (- n 1))) - ;; ;; (apply cdb:client-call *runremote* qtype immediate (- numretries 1) params)) - ;; (begin - ;; (debug:print 0 "ERROR: cdb:client-call timed out " params ", exiting.") - ;; (exit 5)))))))) + ;; (timeout (lambda () + ;; (let loop ((n numretries)) + ;; (thread-sleep! 15) + ;; (if (not res) + ;; (if (> numretries 0) + ;; (begin + ;; (debug:print 2 "WARNING: no reply to query " params ", trying resend") + ;; (debug:print-info 11 "re-sending message") + ;; (send-message push-socket zdat) + ;; (debug:print-info 11 "message re-sent") + ;; (loop (- n 1))) + ;; ;; (apply cdb:client-call *runremote* qtype immediate (- numretries 1) params)) + ;; (begin + ;; (debug:print 0 "ERROR: cdb:client-call timed out " params ", exiting.") + ;; (exit 5)))))))) (debug:print-info 11 "Starting threads") (let ((th1 (make-thread send-receive "send receive")) ;; (th2 (make-thread timeout "timeout")) ) (thread-start! th1) ;; (thread-start! th2) (thread-join! th1) (debug:print-info 11 "cdb:client-call returning res=" res) res)))))) - + (define (cdb:set-verbosity serverdat val) (cdb:client-call serverdat 'set-verbosity #f *default-numtries* val)) (define (cdb:login serverdat keyval signature) (cdb:client-call serverdat 'login #t *default-numtries* keyval megatest-version signature)) @@ -1506,10 +1541,11 @@ (cdb:client-call serverdat 'logout #t *default-numtries* keyval signature)) (define (cdb:num-clients serverdat) (cdb:client-call serverdat 'numclients #t *default-numtries*)) +;; I think this would be more efficient if executed on client side FIXME??? (define (cdb:test-set-status-state serverdat test-id status state msg) (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) (cdb:client-call serverdat 'set-test-start-time #t *default-numtries* test-id)) (if msg (cdb:client-call serverdat 'state-status-msg #t *default-numtries* state status msg test-id) @@ -1525,10 +1561,22 @@ (define (cdb:pass-fail-counts serverdat test-id fail-count pass-count) (cdb:client-call serverdat 'pass-fail-counts #t *default-numtries* fail-count pass-count test-id)) (define (cdb:tests-register-test serverdat run-id test-name item-path) (cdb:client-call serverdat 'register-test #t *default-numtries* run-id test-name item-path)) + +;; more transactioned calls, these for roll-up-pass-fail stuff +(define (cdb:update-pass-fail-counts serverdat run-id test-name) + (cdb:client-call serverdat 'update-fail-pass-counts #t *default-numtries* run-id test-name run-id test-name run-id test-name)) + +(define (cdb:top-test-set-running serverdat run-id test-name) + (cdb:client-call serverdat 'top-test-set-running #t *default-numtries* run-id test-name)) + +(define (cdb:top-test-set-per-pf-counts serverdat run-id test-name) + (cdb:client-call serverdat 'top-test-set-per-pf-counts #t *default-numtries* run-id test-name run-id test-name run-id test-name)) + +;;= (define (cdb:flush-queue serverdat) (cdb:client-call serverdat 'flush #f *default-numtries*)) (define (cdb:kill-server serverdat pid) @@ -1565,13 +1613,18 @@ ;; A G R E G A T E D T R A N S A C T I O N D B W R I T E S ;;====================================================================== (define db:queries (list '(register-test "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');") + ;; Test state and status + '(set-test-state "UPDATE tests SET state=? WHERE id=?;") + '(set-test-status "UPDATE tests SET state=? WHERE id=?;") '(state-status "UPDATE tests SET state=?,status=? WHERE id=?;") - '(set-test-start-time "UPDATE tests SET event_time=strftime('%s','now') WHERE id=?;") '(state-status-msg "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;") + ;; Test comment + '(set-test-comment "UPDATE tests SET comment=? WHERE id=?;") + '(set-test-start-time "UPDATE tests SET event_time=strftime('%s','now') WHERE id=?;") '(pass-fail-counts "UPDATE tests SET fail_count=?,pass_count=? WHERE id=?;") ;; test_data-pf-rollup is used to set a tests PASS/FAIL based on the pass/fail info from the steps '(test_data-pf-rollup "UPDATE tests SET status=CASE WHEN (SELECT fail_count FROM tests WHERE id=?) > 0 THEN 'FAIL' @@ -1588,17 +1641,39 @@ '(update-cpuload-diskfree "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;") '(update-run-duration "UPDATE tests SET run_duration=? WHERE id=?;") '(update-uname-host "UPDATE tests SET uname=?,host=? WHERE id=?;") '(update-test-state "UPDATE tests SET state=? WHERE state=? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") '(update-test-status "UPDATE tests SET status=? WHERE status like ? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") - )) + ;; stuff for roll-up-pass-fail-counts + '(update-fail-pass-counts "UPDATE tests + SET fail_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND status IN ('FAIL','CHECK')), + pass_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND status IN ('PASS','WARN','WAIVED')) + WHERE run_id=? AND testname=? AND item_path='';") + '(top-test-set-running "UPDATE tests SET state='RUNNING' WHERE run_id=? AND testname=? AND item_path='';") + '(top-test-set-per-pf-counts "UPDATE tests + SET state=CASE + WHEN (SELECT count(id) FROM tests + WHERE run_id=? AND testname=? + AND item_path != '' + AND state in ('RUNNING','NOT_STARTED')) > 0 THEN 'RUNNING' + ELSE 'COMPLETED' END, + status=CASE + WHEN fail_count > 0 THEN 'FAIL' + WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' + WHEN (SELECT count(id) FROM tests + WHERE run_id=? AND testname=? + AND item_path != '' + AND status = 'SKIP') > 0 THEN 'SKIP' + ELSE 'UNKNOWN' END + WHERE run_id=? AND testname=? AND item_path='';") + )) ;; do not run these as part of the transaction (define db:special-queries '(rollup-tests-pass-fail - db:roll-up-pass-fail-counts - login - immediate + ;; db:roll-up-pass-fail-counts ;; WHY NOT!? + login + immediate flush sync set-verbosity killserver )) @@ -1617,11 +1692,11 @@ (mutex-unlock! *incoming-mutex*) (if (> (length data) 0) ;; Process if we have data (begin (debug:print-info 7 "Writing cached data " data) - + ;; Prepare the needed sql statements ;; (for-each (lambda (request-item) (let ((stmt-key (vector-ref request-item 0)) (query (vector-ref request-item 1))) @@ -1709,11 +1784,11 @@ (thread-sleep! 0.01) (loop)))) (set! *number-of-writes* (+ *number-of-writes* 1)) (set! *writes-total-delay* (+ *writes-total-delay* (- (current-milliseconds) start-time))) got-it)) - + (define (db:process-queue-item db item) (let* ((stmt-key (cdb:packet-get-qtype item)) (qry-sig (cdb:packet-get-query-sig item)) (return-address (cdb:packet-get-client-sig item)) (params (cdb:packet-get-params item)) @@ -1799,49 +1874,10 @@ db "SELECT id,item_path,state,status,run_duration,final_logf,comment FROM tests WHERE run_id=? AND testname=? AND item_path != '';" run-id test-name) res)) -;; Rollup the pass/fail counts from itemized tests into fail_count and pass_count -;; NOTE: Is this duplicating (db:test-data-rollup db test-id status) ???? -(define (db:roll-up-pass-fail-counts db run-id test-name item-path status) - ;; (cdb:flush-queue *runremote*) - (if (and (not (equal? item-path "")) - (member status '("PASS" "WARN" "FAIL" "WAIVED" "RUNNING" "CHECK" "SKIP"))) - (begin - (sqlite3:execute - db - "UPDATE tests - SET fail_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND status IN ('FAIL','CHECK')), - pass_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND status IN ('PASS','WARN','WAIVED')) - WHERE run_id=? AND testname=? AND item_path='';" - run-id test-name run-id test-name run-id test-name) - ;; (thread-sleep! 0.1) ;; give other processes a chance here, no, better to be done ASAP? - (if (equal? status "RUNNING") ;; running takes priority over all other states, force the test state to RUNNING - (sqlite3:execute db "UPDATE tests SET state=? WHERE run_id=? AND testname=? AND item_path='';" "RUNNING" run-id test-name) - (sqlite3:execute - db - "UPDATE tests - SET state=CASE - WHEN (SELECT count(id) FROM tests - WHERE run_id=? AND testname=? - AND item_path != '' - AND state in ('RUNNING','NOT_STARTED')) > 0 THEN 'RUNNING' - ELSE 'COMPLETED' END, - status=CASE - WHEN fail_count > 0 THEN 'FAIL' - WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' - WHEN (SELECT count(id) FROM tests - WHERE run_id=? AND testname=? - AND item_path != '' - AND status = 'SKIP') > 0 THEN 'SKIP' - ELSE 'UNKNOWN' END - WHERE run_id=? AND testname=? AND item_path='';" - run-id test-name run-id test-name run-id test-name)) - #f) - #f)) - ;;====================================================================== ;; Tests meta data ;;====================================================================== ;; read the record given a testname @@ -1973,17 +2009,17 @@ ;; if the test is not FAIL then set status based on the fail and pass counts. (cdb:test-rollup-test_data-pass-fail *runremote* test-id) ;; (sqlite3:execute ;; db ;;; NOTE: Should this be WARN,FAIL? A WARN is not a FAIL????? BUG FIXME ;; "UPDATE tests - ;; SET status=CASE WHEN (SELECT fail_count FROM tests WHERE id=?) > 0 - ;; THEN 'FAIL' - ;; WHEN (SELECT pass_count FROM tests WHERE id=?) > 0 AND - ;; (SELECT status FROM tests WHERE id=?) NOT IN ('WARN','FAIL') - ;; THEN 'PASS' - ;; ELSE status - ;; END WHERE id=?;" + ;; SET status=CASE WHEN (SELECT fail_count FROM tests WHERE id=?) > 0 + ;; THEN 'FAIL' + ;; WHEN (SELECT pass_count FROM tests WHERE id=?) > 0 AND + ;; (SELECT status FROM tests WHERE id=?) NOT IN ('WARN','FAIL') + ;; THEN 'PASS' + ;; ELSE status + ;; END WHERE id=?;" ;; test-id test-id test-id test-id) )))) (define (db:get-prev-tol-for-test db test-id category variable) ;; Finish me? @@ -2118,15 +2154,15 @@ (set! parent-waiton-met #t))) ;; normal checking of parent items, any parent or parent item not ok blocks running ((and is-completed (or is-ok (eq? mode 'toplevel)) ;; toplevel does not block on FAIL - (and is-ok (eq? mode 'itemmatch))) ;; itemmatch blocks on not ok + (and is-ok (eq? mode 'itemmatch))) ;; itemmatch blocks on not ok (set! item-waiton-met #t))))) tests) - ;; both requirements, parent and item-waiton must be met to NOT add item to - ;; prereq's not met list + ;; both requirements, parent and item-waiton must be met to NOT add item to + ;; prereq's not met list (if (not (or parent-waiton-met item-waiton-met)) (set! result (append (if (null? tests) (list waitontest-name) tests) result))) ;; if the test is not found then clearly the waiton is not met... ;; (if (not ever-seen)(set! result (cons waitontest-name result))))) (if (not ever-seen) ADDED docs/manual/howto.txt Index: docs/manual/howto.txt ================================================================== --- /dev/null +++ docs/manual/howto.txt @@ -0,0 +1,48 @@ + +How To Do Things +================ + +Tricks +------ + +This section is a compendium of a various useful tricks for debugging, +configuring and generally getting the most out of Megatest. + +Debugging Tricks +---------------- + +Examining The Environment +~~~~~~~~~~~~~~~~~~~~~~~~~ + +During Config File Processing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Organising Your Tests and Tasks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +/nfs/ch/disks/ch_unienv_disk005/qa_mrwellan/interim/src/megatest/tests/fdktestqa/testqa +---------------------------- +[tests-paths] +1 #{get misc parent}/simplerun/tests +---------------------------- + +------------------- +[setup] +------------------- + +The runscript method is a brute force way to run scripts where the +user is responsible for setting STATE and STATUS + +------------------- +runscript main.csh +------------------- + + +ww30.2 +cellname/LVS/cellname.LAYOUT_ERRORS + +Error: text open + +ww31.3 +cellname/LVS/cellname.LAYOUT_ERRORS + +Error: text open Index: docs/manual/megatest_manual.txt ================================================================== --- docs/manual/megatest_manual.txt +++ docs/manual/megatest_manual.txt @@ -46,10 +46,11 @@ sqlite3 database. include::../plan.txt[] include::getting_started.txt[] include::writing_tests.txt[] +include::howto.txt[] include::reference.txt[] [appendix] Example Appendix ================ Index: ezsteps.scm ================================================================== --- ezsteps.scm +++ ezsteps.scm @@ -165,9 +165,9 @@ ;; need to update the top test record if PASS or FAIL and this is a subtest (if (not (equal? item-path "")) (cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path new-status)))) ;; for automated creation of the rollup html file this is a good place... (if (not (equal? item-path "")) - (tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no + (tests:summarize-items #f run-id test-id test-name #f)) ;; don't force - just update if no ))) (pop-directory) rollup-status)) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -353,11 +353,12 @@ fullurl (list (cons 'key "thekey") (cons 'cmd cmd) (cons 'params params)) read-string)) - (close-all-connections) + ;; Shouldn't this be a call to the managed call-all-connections stuff above? + (close-all-connections!) (mutex-unlock! *http-mutex*) )) ;; (if cleanup ;; ;; mutex already set ;; (begin Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -1,7 +1,7 @@ -;; Copyright 2006-2012, Matthew Welland. +;; Copyright 2006-2013, Matthew Welland. ;; ;; This program is made available under the GNU GPL version 2.0 or ;; greater. See the accompanying file COPYING for details. ;; ;; This program is distributed WITHOUT ANY WARRANTY; without even the @@ -135,11 +135,11 @@ (alist->env-vars env-ovrd) (set-megatest-env-vars run-id) (set-item-env-vars itemdat) (save-environment-as-files "megatest") ;; open-run-close not needed for test-set-meta-info - (tests:set-meta-info #f test-id run-id test-name itemdat 0 work-area) + (tests:set-full-meta-info #f test-id run-id 0 work-area) (tests:test-set-status! test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f) ;; (cdb:set-test-start-time! *runremote* test-id) (if (args:get-arg "-xterm") (set! fullrunscript "xterm") (if (and fullrunscript (not (file-execute-access? fullrunscript))) @@ -151,10 +151,11 @@ (let* ((m (make-mutex)) (kill-job? #f) (exit-info (vector #t #t #t)) (job-thread #f) + (keep-going #t) (runit (lambda () ;; (let-values ;; (((pid exit-status exit-code) ;; (run-n-wait fullrunscript))) (tests:test-set-status! test-id "RUNNING" "n/a" #f #f) @@ -170,11 +171,11 @@ (vector-set! exit-info 2 exit-code) (set! rollup-status exit-code) (mutex-unlock! m) (if (eq? pid-val 0) (begin - (thread-sleep! 2) + (thread-sleep! 1) (loop (+ i 1))) ))))) ;; then, if runscript ran ok (or did not get called) ;; do all the ezsteps (if any) (if ezsteps @@ -223,11 +224,11 @@ (vector-set! exit-info 1 exit-status) (vector-set! exit-info 2 exit-code) (mutex-unlock! m) (if (eq? pid-val 0) (begin - (thread-sleep! 2) + (thread-sleep! 1) (processloop (+ i 1)))) )) (let ((exinfo (vector-ref exit-info 2)) (logfna (if logpro-used (conc stepname ".html") ""))) ;; testing if procedures called in a remote call cause problems (ans: no or so I suspect) @@ -275,10 +276,11 @@ (round (- (current-seconds) start-seconds))))) (kill-tries 0)) + (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area) (let loop ((minutes (calc-minutes))) (begin (set! kill-job? (or (test-get-kill-request test-id) ;; run-id test-name itemdat)) (and runtlim (let* ((run-seconds (- (current-seconds) start-seconds)) (time-exceeded (> run-seconds runtlim))) @@ -286,14 +288,17 @@ (begin (debug:print-info 0 "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim) #t) #f))))) ;; open-run-close not needed for test-set-meta-info - (tests:set-meta-info #f test-id run-id test-name itemdat minutes work-area) + (tests:set-partial-meta-info #f test-id run-id minutes work-area) (if kill-job? (begin (mutex-lock! m) + ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this + ;; section and the runit section? Or add a loop that tries three times with a 1/4 second + ;; between tries? (let* ((pid (vector-ref exit-info 0))) (if (number? pid) (process-signal pid signal/kill) ;; (begin ;; (debug:print 0 "WARNING: Request received to kill job (attempt # " kill-tries ")") @@ -317,19 +322,25 @@ (sqlite3:finalize! tdb) (exit 1)))) (set! kill-tries (+ 1 kill-tries)) (mutex-unlock! m))) ;; (sqlite3:finalize! db) - (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses - (loop (calc-minutes))))))) - (th1 (make-thread monitorjob)) - (th2 (make-thread runit))) + (if keep-going + (begin + (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses + (if keep-going + (loop (calc-minutes)))))))))) ;; NOTE: Checking twice for keep-going is intentional + (th1 (make-thread monitorjob "monitor job")) + (th2 (make-thread runit "run job"))) (set! job-thread th2) (thread-start! th1) (thread-start! th2) (thread-join! th2) - (thread-sleep! 0.1) ;; give thread th1 a chance to be done TODO: Verify this is needed. + (set! keep-going #f) + (thread-sleep! 1) + (thread-terminate! th1) ;; Not sure if this is a good idea + (thread-sleep! 0.1) ;; give thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec. (mutex-lock! m) (let* ((item-path (item-list->path itemdat)) (testinfo (cdb:get-test-info-by-id *runremote* test-id))) ;; )) ;; run-id test-name item-path))) ;; Am I completed? (if (equal? (db:test-get-state testinfo) "RUNNING") ;; (not (equal? (db:test-get-state testinfo) "COMPLETED")) @@ -338,38 +349,35 @@ ;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test ) (new-status (cond ((not (vector-ref exit-info 1)) "FAIL") ;; job failed to run ((eq? rollup-status 0) - ;; if the current status is AUTO the defer to the calculated value (i.e. leave this AUTO) + ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO) (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS")) ((eq? rollup-status 1) "FAIL") ((eq? rollup-status 2) ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN) (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN")) (else "FAIL")))) ;; (db:test-get-status testinfo))) - (debug:print-info 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status) + (debug:print-info 1 "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (vector-ref exit-info 1) " and rollup-status of " rollup-status) (tests:test-set-status! test-id new-state new-status (args:get-arg "-m") #f) ;; need to update the top test record if PASS or FAIL and this is a subtest - (if (not (equal? item-path "")) - (cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path new-status)) - + ;; NO NEED TO CALL roll-up-pass-fail-counts HERE, THIS IS DONE IN roll-up-pass-fail-counts called by tests:test-set-status! + ;; (if (not (equal? item-path "")) + ;; (begin + ;; (thread-sleep! 0.1) ;; give other processes an opportunity to access the db as rollup is lower priority + ;; (cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path new-status))) )) ;; for automated creation of the rollup html file this is a good place... (if (not (equal? item-path "")) - (tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no - ) + (tests:summarize-items #f run-id test-id test-name #f))) ;; don't force - just update if no (mutex-unlock! m) - ;; (exec-results (cmd-run->list fullrunscript)) ;; (list ">" (conc test-name "-run.log")))) - ;; (success exec-results)) ;; (eq? (cadr exec-results) 0))) (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n") - ;; (sqlite3:finalize! db) - ;; (sqlite3:finalize! tdb) (if (not (vector-ref exit-info 1)) (exit 4))))))) ;; set up the very basics needed for doing anything here. (define (setup-for-run) ADDED lock-queue.scm Index: lock-queue.scm ================================================================== --- /dev/null +++ lock-queue.scm @@ -0,0 +1,134 @@ +;; Copyright 2006-2013, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. + +;;====================================================================== +;; launch a task - this runs on the originating host, tests themselves +;; +;;====================================================================== + +(use sqlite3 srfi-18) +(import (prefix sqlite3 sqlite3:)) + +(declare (unit lock-queue)) +(declare (uses common)) + +;;====================================================================== +;; attempt to prevent overlapping updates of rollup files by queueing +;; update requests in an sqlite db +;;====================================================================== + +(define (lock-queue:open-db fname) + (let* ((actualfname (conc fname ".lockdb")) + (dbexists (file-exists? actualfname)) + (db (sqlite3:open-database actualfname)) + (handler (make-busy-timeout 3600))) + (if dbexists + db + (begin + (sqlite3:execute + db + "CREATE TABLE IF NOT EXISTS queue ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + start_time INTEGER, + state TEXT, + CONSTRAINT queue_constraint UNIQUE (test_id));") + (sqlite3:execute + db + "CREATE TABLE IF NOT EXISTS runlocks ( + id INTEGER PRIMARY KEY, + test_id INTEGER, + run_lock TEXT, + CONSTRAINT runlock_constraint UNIQUE (run_lock));"))) + (sqlite3:set-busy-handler! db handler) + db)) + +(define (lock-queue:set-state db test-id newstate) + (sqlite3:execute db "UPDATE queue SET state=? WHERE test_id=?;" + newstate + test-id)) + +(define (lock-queue:any-younger? db mystart test-id) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (tid) + ;; Actually this should not be needed as mystart cannot be simultaneously less than and test-id same as + (if (not (equal? tid test-id)) + (set! res tid))) + db + "SELECT test_id FROM queue WHERE start_time > ?;" mystart) + res)) + +(define (lock-queue:get-lock db test-id) + (let ((res #f) + (lckqry (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';")) + (mklckqry (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');"))) + (let ((result + (handle-exceptions + exn + #f + (sqlite3:with-transaction + db + (lambda () + (sqlite3:for-each-row (lambda (tid lockstate) + (set! res (list tid lockstate))) + lckqry) + (if res + (if (equal? (car res) test-id) + #t ;; already have the lock + #f) + (begin + (sqlite3:execute mklckqry test-id) + ;; if no error handled then return #t for got the lock + #t))))))) + (sqlite3:finalize! lckqry) + (sqlite3:finalize! mklckqry) + result))) + +(define (lock-queue:release-lock fname test-id) + (let ((db (lock-queue:open-db fname))) + (sqlite3:execute db "DELETE FROM runlocks WHERE test_id=?;" test-id) + (sqlite3:finalize! db))) + +(define (lock-queue:steal-lock db test-id) + (sqlite3:execute db "DELETE FROM runlocks WHERE run_lock='locked';") + (lock-queue:get-lock db test-it)) + +;; returns #f if ok to skip the task +;; returns #t if ok to proceed with task +;; otherwise waits +;; +(define (lock-queue:wait-turn fname test-id) + (let ((db (lock-queue:open-db fname)) + (mystart (current-seconds))) + (sqlite3:execute + db + "INSERT OR REPLACE INTO queue (test_id,start_time,state) VALUES (?,?,'waiting');" + test-id mystart) + (thread-sleep! 1) ;; give other tests a chance to register + (let ((result + (let loop ((younger-waiting (lock-queue:any-younger? db mystart test-id))) + (if younger-waiting + (begin + ;; no need for us to wait. mark in the lock queue db as skipping + (lock-queue:set-state db test-id "skipping") + #f) ;; let the calling process know that nothing needs to be done + (if (lock-queue:get-lock db test-id) + #t + (if (> (- (current-seconds) mystart) 36000) ;; waited too long, steal the lock + (lock-queue:steal-lock db test-id) + (begin + (thread-sleep! 1) + (loop (lock-queue:any-younger? db mystart test-id))))))))) + (sqlite3:finalize! db) + result))) + + +;; (use trace) +;; (trace lock-queue:get-lock lock-queue:release-lock lock-queue:wait-turn lock-queue:any-younger? lock-queue:set-state) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -938,11 +938,11 @@ (if (args:get-arg "-set-toplog") ;; DO NOT run remote (tests:test-set-toplog! db run-id test-name (args:get-arg "-set-toplog"))) (if (args:get-arg "-summarize-items") ;; DO NOT run remote - (tests:summarize-items db run-id test-name #t)) ;; do force here + (tests:summarize-items db run-id test-id test-name #t)) ;; do force here (if (args:get-arg "-runstep") (if (null? remargs) (begin (debug:print 0 "ERROR: nothing specified to run!") (if db (sqlite3:finalize! db)) Index: mt.scm ================================================================== --- mt.scm +++ mt.scm @@ -87,8 +87,31 @@ (define (mt:get-run-stats) (cdb:remote-run db:get-run-stats #f)) ;;====================================================================== -;; S T E P S +;; S T A T E A N D S T A T U S F O R T E S T S ;;====================================================================== +(define (mt:roll-up-pass-fail-counts run-id test-name item-path status) + (if (and (not (equal? item-path "")) + (member status '("PASS" "WARN" "FAIL" "WAIVED" "RUNNING" "CHECK" "SKIP"))) + (begin + (cdb:update-pass-fail-counts *runremote* run-id test-name) + (if (equal? status "RUNNING") + (cdb:top-test-set-running *runremote* run-id test-name) + (cdb:top-test-set-per-pf-counts *runremote* run-id test-name)) + #f) + #f)) + +;; speed up for common cases with a little logic +(define (mt:test-set-state-status-by-id test-id newstate newstatus newcomment) + (cond + ((and newstate newstatus newcomment) + (cdb:client-call *runremote* 'state-status-msg #t *default-numtries* newstate newstatus newcomment test-id)) + ((and newstate newstatus) + (cdb:client-call serverdat 'state-status #t *default-numtries* newstate newstatus test-id)) + (else + (if newstate (cdb:client-call *runremote* 'set-test-state #t *default-numtries* newstate test-id)) + (if newstatus (cdb:client-call *runremote* 'set-test-status #t *default-numtries* newstatus test-id)) + (if newcomment (cdb:client-call *runremote* 'set-test-comment #t *default-numtries* newcomment test-id)))) + (db:process-triggers test-id newstate newstatus)) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -791,11 +791,11 @@ ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer")) (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) (test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path)) (testdat (cdb:get-test-info-by-id *runremote* test-id))) (if (not testdat) - (begin + (let loop () ;; ensure that the path exists before registering the test ;; NOPE: Cannot! Don't know yet which disk area will be assigned.... ;; (system (conc "mkdir -p " new-test-path)) ;; ;; (open-run-close tests:register-test db run-id test-name item-path) @@ -807,11 +807,16 @@ (begin (debug:print 2 "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) (cdb:tests-register-test *runremote* run-id test-name item-path) (set! test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path)))) (debug:print-info 4 "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") - (set! testdat (cdb:get-test-info-by-id *runremote* test-id)))) + (set! testdat (cdb:get-test-info-by-id *runremote* test-id)) + (if (not testdat) + (begin + (debug:print-info 0 "WARNING: server is overloaded, trying again in one second") + (thread-sleep! 1) + (loop))))) (if (not testdat) ;; should NOT happen (debug:print 0 "ERROR: failed to get test record for test-id " test-id)) (set! test-id (db:test-get-id testdat)) (if (file-exists? test-path) (change-directory test-path) @@ -881,11 +886,11 @@ (configf:lookup test-conf "skip" "fileexists")) (if (file-exists? (configf:lookup test-conf "skip" "fileexists")) (set! skip-test (conc "Skipping due to existance of file " (configf:lookup test-conf "skip" "fileexists")))))) (if skip-test (begin - (cdb:remote-run db:test-set-state-status-by-id #f test-id "COMPLETED" "SKIP" skip-test) + (mt:test-set-state-status-by-id test-id "COMPLETED" "SKIP" skip-test) (debug:print-info 1 "SKIPPING Test " full-test-name " due to " skip-test)) (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags)) (begin (print "ERROR: Failed to launch the test. Exiting as soon as possible") (set! *globalexitstatus* 1) ;; @@ -1024,21 +1029,21 @@ ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give ;; up and blow it away. (begin (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") - (cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) "FAILEDKILL" "n/a" #f) + (mt:test-set-state-status-by-id (db:test-get-id test) "FAILEDKILL" "n/a" #f) (thread-sleep! 1)) (begin - (cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) "KILLREQ" "n/a" #f) + (mt:test-set-state-status-by-id (db:test-get-id test) "KILLREQ" "n/a" #f) (thread-sleep! 1))) ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... (if (null? tal) (loop new-test-dat tal) (loop (car tal)(append tal (list new-test-dat))))) (begin - (cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) "REMOVING" "LOCKED" #f) + (mt:test-set-state-status-by-id (db:test-get-id test) "REMOVING" "LOCKED" #f) (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) (if (and real-dir (> (string-length real-dir) 5) (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. (begin ;; let* ((realpath (resolve-pathname run-dir))) @@ -1071,11 +1076,11 @@ (cdb:remote-run db:delete-test-records db #f (db:test-get-id test)) (if (not (null? tal)) (loop (car tal)(cdr tal)))))) ((set-state-status) (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status)) - (cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) (car state-status)(cadr state-status) #f) + (mt:test-set-state-status-by-id (db:test-get-id test) (car state-status)(cadr state-status) #f) (if (not (null? tal)) (loop (car tal)(cdr tal)))) ((run-wait) (debug:print-info 2 "still waiting, " (length tests) " tests still running") (thread-sleep! 10) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -15,10 +15,11 @@ (use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking tcp directory-utils) (import (prefix sqlite3 sqlite3:)) (declare (unit tests)) +(declare (uses lock-queue)) (declare (uses db)) (declare (uses common)) (declare (uses items)) (declare (uses runconfig)) @@ -365,11 +366,11 @@ (db:csv->test-data #f test-id dat)))) ;; need to update the top test record if PASS or FAIL and this is a subtest (if (not (equal? item-path "")) - (cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path status)) + (mt:roll-up-pass-fail-counts run-id test-name item-path status)) (if (or (and (string? comment) (string-match (regexp "\\S+") comment)) waived) (let ((cmt (if waived waived comment))) @@ -378,11 +379,11 @@ (define (tests:test-set-toplog! db run-id test-name logf) (cdb:client-call *runremote* 'tests:test-set-toplog #t 2 logf run-id test-name)) -(define (tests:summarize-items db run-id test-name force) +(define (tests:summarize-items db run-id test-id test-name force) ;; if not force then only update the record if one of these is true: ;; 1. logf is "log/final.log ;; 2. logf is same as outputfilename (let* ((outputfilename (conc "megatest-rollup-" test-name ".html")) (orig-dir (current-directory)) @@ -399,12 +400,13 @@ (debug:print 4 "summarize-items with logf " logf ", outputfilename " outputfilename " and force " force) (if (or (equal? logf "logs/final.log") (equal? logf outputfilename) force) (begin - (if (not (obtain-dot-lock outputfilename 1 5 7)) ;; retry every second for 20 seconds, call it dead after 30 seconds and steal the lock - (print "Failed to obtain lock for " outputfilename) + (if ;; (not (obtain-dot-lock outputfilename 1 5 7)) ;; retry every second for 20 seconds, call it dead after 30 seconds and steal the lock + (not (lock-queue:wait-turn outputfilename test-id)) + (print "Not updating " outputfilename " as another test item has signed up for the job") (begin (print "Obtained lock for " outputfilename) (let ((oup (open-output-file outputfilename)) (counts (make-hash-table)) (statecounts (make-hash-table)) @@ -461,10 +463,11 @@ (print "" "" outtxt "
ItemStateStatusComment
") (release-dot-lock outputfilename))) (close-output-port oup) + (lock-queue:release-lock outputfilename test-id) (change-directory orig-dir) ;; NB// tests:test-set-toplog! is remote internal... (tests:test-set-toplog! db run-id test-name outputfilename) ))))))) @@ -679,42 +682,43 @@ tdb "SELECT count(id) FROM test_rundat;") res)) 0) -(define (tests:update-central-meta-info test-id cpuload diskfree minutes num-records uname hostname) +(define (tests:update-central-meta-info test-id cpuload diskfree minutes uname hostname) ;; This is a good candidate for threading the requests to enable ;; transactionized write at the server (cdb:tests-update-cpuload-diskfree *runremote* test-id cpuload diskfree) - ;; (let ((db (open-db))) - ;; (sqlite3:execute db "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;" - ;; cpuload - ;; diskfree - ;; test-id) - (if minutes - (cdb:tests-update-run-duration *runremote* test-id minutes)) - ;; (sqlite3:execute db "UPDATE tests SET run_duration=? WHERE id=?;" minutes test-id)) - (if (eq? num-records 0) - (cdb:tests-update-uname-host *runremote* test-id uname hostname)) - ;;(sqlite3:execute db "UPDATE tests SET uname=?,host=? WHERE id=?;" uname hostname test-id)) - ;;(sqlite3:finalize! db)) - ) - -(define (tests:set-meta-info db test-id run-id testname itemdat minutes work-area) + (if minutes + (cdb:tests-update-run-duration *runremote* test-id minutes)) + (if (and uname hostname) + (cdb:tests-update-uname-host *runremote* test-id uname hostname))) + +(define (tests:set-full-meta-info db test-id run-id minutes work-area) + ;; DOES cdb:remote-run under the hood! + (let* ((num-records 0) ;; (test:tdb-get-rundat-count tdb)) + (cpuload (get-cpu-load)) + (diskfree (get-df (current-directory))) + (uname (get-uname "-srvpio")) + (hostname (get-host-name))) + (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes) + (tests:update-central-meta-info test-id cpuload diskfree minutes uname hostname))) + +(define (tests:set-partial-meta-info db test-id run-id minutes work-area) ;; DOES cdb:remote-run under the hood! - (let* ((tdb (db:open-test-db-by-test-id db test-id work-area: work-area)) - (num-records (test:tdb-get-rundat-count tdb)) - (cpuload (get-cpu-load)) + (let* ((cpuload (get-cpu-load)) (diskfree (get-df (current-directory)))) - (if (eq? (modulo num-records 10) 0) ;; every ten records update central - (let ((uname (get-uname "-srvpio")) - (hostname (get-host-name))) - (tests:update-central-meta-info test-id cpuload diskfree minutes num-records uname hostname))) + (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes) + ;; Update central with uname and hostname = #f + (tests:update-central-meta-info test-id cpuload diskfree minutes #f #f))) + +(define (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes) + (let ((tdb (db:open-test-db-by-test-id db test-id work-area: work-area))) (sqlite3:execute tdb "INSERT INTO test_rundat (update_time,cpuload,diskfree,run_duration) VALUES (strftime('%s','now'),?,?,?);" cpuload diskfree minutes) (sqlite3:finalize! tdb))) - + ;;====================================================================== ;; A R C H I V I N G ;;====================================================================== (define (test:archive db test-id) Index: tests/Makefile ================================================================== --- tests/Makefile +++ tests/Makefile @@ -128,10 +128,13 @@ for fs in afs nfs zfs; do \ for dpath in none tmp; do \ (cd fullrun;$(MEGATEST) -runtests priority_10_waiton_1 -target $$sys/$$fs/$$dpath :runname $(RUNNAME) &);\ done;done;done +test11 : + cd fullrun;time (for a in 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 ;do (megatest -test-paths -target %/%/% > /dev/null ) & done; wait; ) + minsetup : cd ..;make && make install mkdir -p mintest/runs mintest/links cd mintest;megatest -stop-server 0 cd mintest;megatest -server - -debug $(DEBUG) > server.log 2> server.log & Index: tests/fdktestqa/fdk.config ================================================================== --- tests/fdktestqa/fdk.config +++ tests/fdktestqa/fdk.config @@ -2,11 +2,11 @@ SYSTEM TEXT RELEASE TEXT [setup] # Adjust max_concurrent_jobs to limit how much you load your machines -max_concurrent_jobs 50 +max_concurrent_jobs 150 # This is your link path, you can move it but it is generally better to keep it stable linktree #{shell readlink -f #{getenv PWD}/../simplelinks} [include testqa/configs/megatest.abc.config] Index: tests/fdktestqa/testqa/Makefile ================================================================== --- tests/fdktestqa/testqa/Makefile +++ tests/fdktestqa/testqa/Makefile @@ -14,11 +14,11 @@ bigrun : $(MEGATEST) -runtests bigrun -target a/bigrun :runname a bigrun2 : - $(MEGATEST) -runtests bigrun2 -target a/bigrun2 :runname a + $(MEGATEST) -runtests bigrun2 -target a/bigrun2 :runname a -transport http dashboard : $(DASHBOARD) -rows 20 & compile : Index: tests/fdktestqa/testqa/megatest.config ================================================================== --- tests/fdktestqa/testqa/megatest.config +++ tests/fdktestqa/testqa/megatest.config @@ -1,8 +1,10 @@ [setup] testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. >> TEST_TARG_PATH/mt_launch.log 2>> TEST_TARG_PATH/mt_launch.log -runqueue 2 +runqueue 20 +transport http +launchwait no [include ../fdk.config] [server] -timeout 0.05 +port 9080 Index: tests/fullrun/megatest.config ================================================================== --- tests/fullrun/megatest.config +++ tests/fullrun/megatest.config @@ -16,13 +16,12 @@ [tests-paths] 1 #{get misc parent}/simplerun/tests [setup] -# Set launchwait to yes to use the old launch run code that waits for the launch process to return before -# proceeding. -# launchwait yes +# Set launchwait to no to use the more agressive code that does not wait for the launch to complete before proceeding +launchwait no # Use http instead of direct filesystem access transport http