Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -38,23 +38,30 @@ (define *waiting-queue* (make-hash-table)) (define *test-meta-updated* (make-hash-table)) (define *globalexitstatus* 0) ;; attempt to work around possible thread issues (define *passnum* 0) ;; when running track calls to run-tests or similar +;; SERVER +(define *my-client-signature* #f) (define *rpc:listener* #f) ;; if set up for server communication this will hold the tcp port (define *runremote* #f) ;; if set up for server communication this will hold (define *last-db-access* (current-seconds)) ;; update when db is accessed via server (define *max-cache-size* 0) +(define *logged-in-clients* (make-hash-table)) +(define *client-non-blocking-mode* #f) + (define *target* (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN (define *keys* (make-hash-table)) ;; cache the keys here (define *keyvals* (make-hash-table)) (define *toptest-paths* (make-hash-table)) ;; cache toptest path settings here (define *test-paths* (make-hash-table)) ;; cache test-id to test run paths here (define *test-ids* (make-hash-table)) ;; cache run-id, testname, and item-path => test-id (define *test-info* (make-hash-table)) ;; cache the test info records, update the state, status, run_duration etc. from testdat.db (define *run-info-cache* (make-hash-table)) ;; run info is stable, no need to reget + + ;; Debugging stuff (define *verbosity* 1) (define *logging* #f) @@ -67,10 +74,19 @@ (if res (cadr res)(if (null? default) #f (car default))))) ;;====================================================================== ;; Misc utils ;;====================================================================== + +;; one-of args defined +(define (args-defined? . param) + (let ((res #f)) + (for-each + (lambda (arg) + (if (args:get-arg arg)(set! res #t))) + param) + res)) ;; convert stuff to a number if possible (define (any->number val) (cond ((number? val) val) Index: dashboard-guimonitor.scm ================================================================== --- dashboard-guimonitor.scm +++ dashboard-guimonitor.scm @@ -179,11 +179,11 @@ ;; (print "On-exit called") (tasks:remove-monitor-record tdb) (sqlite3:finalize! tdb)))) (define (gui-monitor db) - (let ((keys (rdb:get-keys db)) + (let ((keys (db:get-keys db)) (tdb (tasks:open-db))) (tasks:register-monitor db tdb) ;;; let the other monitors know we are here (control-panel db tdb keys) ;(tasks:remove-monitor-record db) ;(sqlite3:finalize! db) Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -204,21 +204,21 @@ (iup:frame #:title "Set fields" (iup:vbox (iup:hbox (iup:label "Comment:") (iup:textbox #:action (lambda (val a b) - (open-run-close db:test-set-state-status-by-id *db* test-id #f #f b) + (open-run-close db:test-set-state-status-by-id #f test-id #f #f b) (set! newcomment b)) #:value (db:test-get-comment testdat) #:expand "HORIZONTAL")) (apply iup:hbox (iup:label "STATE:" #:size "30x") (let* ((btns (map (lambda (state) (let ((btn (iup:button state #:expand "HORIZONTAL" #:size "50x" #:font "Courier New, -10" #:action (lambda (x) - (open-run-close db:test-set-state-status-by-id *db* test-id state #f #f) + (open-run-close db:test-set-state-status-by-id #f test-id state #f #f) (db:test-set-state! testdat state))))) btn)) (list "COMPLETED" "NOT_STARTED" "RUNNING" "REMOTEHOSTSTART" "KILLED" "KILLREQ")))) (vector-set! *state-status* 0 (lambda (state color) @@ -234,11 +234,11 @@ (iup:label "STATUS:" #:size "30x") (let* ((btns (map (lambda (status) (let ((btn (iup:button status #:expand "HORIZONTAL" #:size "50x" #:font "Courier New, -10" #:action (lambda (x) - (open-run-close db:test-set-state-status-by-id *db* test-id #f status #f) + (open-run-close db:test-set-state-status-by-id #f test-id #f status #f) (db:test-set-status! testdat status))))) btn)) (list "PASS" "WARN" "FAIL" "CHECK" "n/a" "WAIVED")))) (vector-set! *state-status* 1 (lambda (status color) @@ -265,22 +265,22 @@ (if (not testdat) (begin (debug:print 0 "ERROR: No test data found for test " test-id ", exiting") (exit 1)) (let* ((run-id (if testdat (db:test-get-run_id testdat) #f)) - (keydat (if testdat (open-run-close db:get-key-val-pairs db run-id) #f)) - (rundat (if testdat (open-run-close db:get-run-info db run-id) #f)) + (keydat (if testdat (open-run-close db:get-key-val-pairs #f run-id) #f)) + (rundat (if testdat (open-run-close db:get-run-info #f run-id) #f)) (runname (if testdat (db:get-value-by-header (db:get-row rundat) (db:get-header rundat) "runname") #f)) ;(teststeps (if testdat (db:get-steps-for-test db test-id) #f)) (logfile "/this/dir/better/not/exist") (rundir logfile) (testfullname (if testdat (db:test-get-fullname testdat) "Gathering data ...")) (testname (if testdat (db:test-get-testname testdat) "n/a")) (testmeta (if testdat - (let ((tm (open-run-close db:testmeta-get-record db testname))) + (let ((tm (open-run-close db:testmeta-get-record #f testname))) (if tm tm (make-db:testmeta))) (make-db:testmeta))) (keystring (string-intersperse (map (lambda (keyval) @@ -305,11 +305,11 @@ (refreshdat (lambda () (let* ((curr-mod-time (file-modification-time db-path)) (need-update (or (and (> curr-mod-time db-mod-time) (> (current-seconds) (+ last-update 2))) ;; every two seconds if db touched request-update)) - (newtestdat (if need-update (open-run-close db:get-test-info-by-id db test-id)))) + (newtestdat (if need-update (open-run-close db:get-test-info-by-id #f test-id)))) (cond ((and need-update newtestdat) (set! testdat newtestdat) (set! teststeps (open-run-close db:get-steps-for-test db test-id)) (set! logfile (conc (db:test-get-rundir testdat) "/" (db:test-get-final_logf testdat))) @@ -466,11 +466,11 @@ (db:test-data-get-tol x) (db:test-data-get-status x) (db:test-data-get-units x) (db:test-data-get-type x) (db:test-data-get-comment x))) - (open-run-close db:read-test-data db test-id "%"))) + (open-run-close db:read-test-data #f test-id "%"))) "\n"))) (if (not (equal? currval newval)) (iup:attribute-set! test-data "VALUE" newval ))))) ;; "TITLE" newval))))) test-data))) ))) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -76,29 +76,30 @@ (begin (print "Failed to find megatest.config, exiting") (exit 1))) (define *db* #f) ;; (open-db)) +;; (server:client-launch) ;; HACK ALERT: this is a hack, please fix. (define *read-only* (not (file-read-access? (conc *toppath* "/megatest.db")))) ;; (server:client-setup *db*) (define toplevel #f) (define dlg #f) (define max-test-num 0) -(define *keys* (open-run-close db:get-keys *db*)) +(define *keys* (open-run-close db:get-keys #f)) ;; (define *keys* (db:get-keys *db*)) (define *dbkeys* (map (lambda (x)(vector-ref x 0)) (append *keys* (list (vector "runname" "blah"))))) (define *header* #f) (define *allruns* '()) (define *buttondat* (make-hash-table)) ;; (define *alltestnamelst* '()) (define *searchpatts* (make-hash-table)) (define *num-runs* 8) -(define *tot-run-count* (open-run-close db:get-num-runs *db* "%")) +(define *tot-run-count* (open-run-close db:get-num-runs #f "%")) ;; (define *tot-run-count* (db:get-num-runs *db* "%")) (define *last-update* (current-seconds)) (define *num-tests* 15) (define *start-run-offset* 0) (define *start-test-offset* 0) @@ -162,28 +163,29 @@ (begin (debug:print-info 4 "update-rundat runnamepatt: " runnamepatt " numruns: " numruns " testnamepatt: " testnamepatt " keypatts: " keypatts) (set! *please-update-buttons* #t) (set! *last-db-update-time* modtime) (set! *delayed-update* (- *delayed-update* 1)) - (let* ((allruns (open-run-close db:get-runs *db* runnamepatt numruns ;; (+ numruns 1) ;; (/ numruns 2)) + (let* ((allruns (open-run-close db:get-runs #f runnamepatt numruns ;; (+ numruns 1) ;; (/ numruns 2)) *start-run-offset* keypatts)) (header (db:get-header allruns)) (runs (db:get-rows allruns)) (result '()) (maxtests 0) (states (hash-table-keys *state-ignore-hash*)) (statuses (hash-table-keys *status-ignore-hash*))) + ;; (thread-sleep! 0.1) ;; give some time to other threads (debug:print 6 "update-rundat, got " (length runs) " runs") (if (> (+ *last-update* 300) (current-seconds)) ;; every five minutes (begin (set! *last-update* (current-seconds)) (set! *tot-run-count* (length runs)))) ;; (rdb:get-num-runs *db* runnamepatt)))) (for-each (lambda (run) (let* ((run-id (db:get-value-by-header run header "id")) - (tests (let ((tsts (open-run-close db:get-tests-for-run *db* run-id testnamepatt states statuses))) + (tests (let ((tsts (open-run-close db:get-tests-for-run #f run-id testnamepatt states statuses))) (if *tests-sort-reverse* (reverse tsts) tsts))) - (key-vals (open-run-close db:get-key-vals *db* run-id))) + (key-vals (open-run-close db:get-key-vals #f run-id))) (if (> (length tests) maxtests) (set! maxtests (length tests))) (if (or (not *hide-empty-runs*) ;; this reduces the data burden when set (not (null? tests))) (set! result (cons (vector run tests key-vals) result))))) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -70,36 +70,32 @@ db)) ;; keeping it around for debugging purposes only (define (open-run-close-no-exception-handling proc idb . params) (debug:print-info 11 "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params) - (let* ((db (if idb idb (open-db))) + (let* ((db (if idb + (if (procedure? idb) + (idb) + idb) + (open-db))) (res #f)) (set! res (apply proc db params)) (if (not idb)(sqlite3:finalize! db)) (debug:print-info 11 "open-run-close-no-exception-handling END" ) res)) (define (open-run-close-exception-handling proc idb . params) - (debug:print-info 11 "open-run-close-exception-handling START, idb=" idb ", params=" params) - (let ((runner (lambda () - (let* ((db (if idb idb (open-db))) - (res #f)) - (set! res (apply proc db params)) - (if (not idb)(sqlite3:finalize! db)) - (debug:print-info 11 "open-run-close-no-exception-handling END" ) - res)))) - (handle-exceptions - exn - (begin - (debug:print 0 "EXCEPTION: database probably overloaded?") - (debug:print 0 " " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain) - (thread-sleep! (random 120)) - (debug:print-info 0 "trying db call one more time....") - (runner)) - (runner)))) + (handle-exceptions + exn + (begin + (debug:print 0 "EXCEPTION: database probably overloaded?") + (debug:print 0 " " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain) + (thread-sleep! (random 120)) + (debug:print-info 0 "trying db call one more time....") + (apply open-run-close-no-exception-handling proc idb params)) + (apply open-run-close-no-exception-handling proc idb params))) (define open-run-close open-run-close-exception-handling) (define *global-delta* 0) (define *last-global-delta-printed* 0) @@ -257,10 +253,11 @@ #f))) ;; find and open the testdat.db file for an existing test (define (db:open-test-db-by-test-id db test-id) (let* ((test-path (db:test-get-rundir-from-test-id db test-id))) + (debug:print 3 "TEST PATH: " test-path) (open-test-db test-path))) (define (db:testdb-initialize db) (debug:print 11 "db:testdb-initialize START") (for-each @@ -776,12 +773,12 @@ " run_id=? AND testname=? AND NOT (item_path='' AND testname in (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));"))) ;;(debug:print 0 "QRY: " qry) (sqlite3:execute db qry run-id newstate newstatus testname testname))) testnames)) -(define (db:delete-tests-in-state db run-id state) - (sqlite3:execute db "DELETE FROM tests WHERE state=? AND run_id=?;" state run-id)) +(define (cdb:delete-tests-in-state zmqsocket run-id state) + (cdb:client-call zmqsocket 'delete-tests-in-state #t run-id state)) ;; speed up for common cases with a little logic (define (db:test-set-state-status-by-id db test-id newstate newstatus newcomment) (cond ((and newstate newstatus newcomment) @@ -855,11 +852,11 @@ db "SELECT id FROM tests WHERE run_id=? AND testname=? AND item_path=?;" run-id testname item-path) res)) -(define db:get-test-id db:get-test-id-cached) +(define db:get-test-id db:get-test-id-not-cached) ;; given a test-info record, patch in the latest data from the testdat.db file ;; found in the test run directory (define (db:patch-tdb-data-into-test-info db test-id res) (let ((tdb (db:open-test-db-by-test-id db test-id))) @@ -955,43 +952,32 @@ (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" comment test-id)) -;; -(define (db:test-set-rundir! db run-id test-name item-path rundir) - (sqlite3:execute - db - "UPDATE tests SET rundir=? WHERE run_id=? AND testname=? AND item_path=?;" - rundir run-id test-name item-path)) - -(define (db:test-set-rundir-by-test-id! db test-id rundir) - (sqlite3:execute - db - "UPDATE tests SET rundir=? WHERE id=?" - rundir test-id)) - -;; +(define (cdb:test-set-rundir! zmqsocket run-id test-name item-path rundir) + (cdb:client-call zmqsocket 'test-set-rundir #t rundir run-id test-name item-path)) + +(define (cdb:test-set-rundir-by-test-id zmqsocket test-id rundir) + (cdb:client-call zmqsocket 'test-set-rundir-by-test-id #t rundir test-id)) + (define (db:test-get-rundir-from-test-id db test-id) - (let ((res (hash-table-ref/default *test-paths* test-id #f))) - (if res - res - (begin - (sqlite3:for-each-row - (lambda (tpath) - (set! res tpath)) - db - "SELECT rundir FROM tests WHERE id=?;" - test-id) - (hash-table-set! *test-paths* test-id res) - res)))) - -(define (db:test-set-log! db test-id logf) - (if (string? logf) - (sqlite3:execute db "UPDATE tests SET final_logf=? WHERE id=?;" - logf test-id) - (debug:print 0 "ERROR: db:test-set-log! called with non-string log file name " logf))) + (let ((res #f)) ;; (hash-table-ref/default *test-paths* test-id #f))) + ;; (if res + ;; res + ;; (begin + (sqlite3:for-each-row + (lambda (tpath) + (set! res tpath)) + db + "SELECT rundir FROM tests WHERE id=?;" + test-id) + ;; (hash-table-set! *test-paths* test-id res) + res)) ;; )) + +(define (cdb:test-set-log! zmqsocket test-id logf) + (if (string? logf)(cdb:client-call zmqsocket 'test-set-log #f logf test-id))) ;;====================================================================== ;; Misc. test related queries ;;====================================================================== @@ -1091,20 +1077,20 @@ db qrystr) res)) ;;====================================================================== -;; QUEUE UP META, TEST STATUS AND STEPS +;; QUEUE UP META, TEST STATUS AND STEPS REMOTE ACCESS ;;====================================================================== ;; db:updater is run in a thread to write out the cached data periodically -(define (db:updater) - (debug:print-info 4 "Starting cache processing") - (let loop () - (thread-sleep! 10) ;; move save time around to minimize regular collisions? - (db:write-cached-data) - (loop))) +;; (define (db:updater) +;; (debug:print-info 4 "Starting cache processing") +;; (let loop () +;; (thread-sleep! 10) ;; move save time around to minimize regular collisions? +;; (db:write-cached-data) +;; (loop))) ;; cdb:cached-access is called by the server loop to dispatch commands or queue up ;; db accesses ;; ;; params := qry-name cached? val1 val2 val3 ... @@ -1114,23 +1100,50 @@ "ERROR" (let ((qry-name (car params)) (cached? (cadr params)) (remparam (list-tail params 2))) (debug:print-info 12 "cdb:cached-access qry-name=" qry-name " params=" params) + (if (not cached?)(db:write-cached-data)) ;; Any special calls are dispatched here. ;; Remainder are put in the db queue (case qry-name ((login) ;; login checks that the megatest path matches - (if (null? remparam) - #f ;; no path - fail! - (let ((calling-path (car remparam))) + (if (< (length remparam) 2) ;; should get toppath and signature + '(#f "login failed due to missing params") ;; missing params + (let ((calling-path (car remparam)) + (client-key (cadr remparam))) (if (equal? calling-path *toppath*) - #t ;; path matches - pass! Should vet the caller at this time ... - #f)))) ;; else fail to login + (begin + (hash-table-set! *logged-in-clients* client-key (current-seconds)) + '(#t "successful login")) ;; path matches - pass! Should vet the caller at this time ... + (list #f (conc "Login failed due to mismatch paths: " calling-path ", " *toppath*)))))) + ((logout) + (if (and (> (length remparam) 1) + (eq? *toppath* (car remparam)) + (hash-table-ref/default *logged-in-clients* (cadr remparam) #f)) + #t + #f)) + ((numclients) + (length (hash-table-keys *logged-in-clients*))) ((flush) (db:write-cached-data) #t) + ((immediate) + (db:write-cached-data) + (if (not (null? remparam)) + (apply (car remparam) (cdr remparam)) + "ERROR")) + ((killserver) + ;; (db:write-cached-data) + (debug:print-info 0 "Remotely killed server on host " (get-host-name) " pid " (current-process-id)) + (set! *time-to-exit* #t) + #t) + ((set-verbosity) + (set! *verbosity* (caddr params)) + *verbosity*) + ((get-verbosity) + *verbosity*) (else (mutex-lock! *incoming-mutex*) (set! *last-db-access* (current-seconds)) (set! *incoming-data* (cons (vector qry-name @@ -1150,19 +1163,39 @@ "WRITTEN"))))))) (define (db:obj->string obj)(with-output-to-string (lambda ()(serialize obj)))) (define (db:string->obj msg)(with-input-from-string msg (lambda ()(deserialize)))) +(define (cdb:use-non-blocking-mode proc) + (set! *client-non-blocking-mode* #t) + (let ((res (proc))) + (set! *client-non-blocking-mode* #f) + res)) + (define (cdb:client-call zmq-socket . params) (debug:print-info 11 "cdb:client-call zmq-socket=" zmq-socket " params=" params) (let ((zdat (db:obj->string params)) ;; (with-output-to-string (lambda ()(serialize params)))) (res #f)) (send-message zmq-socket zdat) - (set! res (db:string->obj (receive-message zmq-socket zdat))) + (set! res (db:string->obj (if *client-non-blocking-mode* + (receive-message* zmq-socket zdat) + (receive-message zmq-socket zdat)))) (debug:print-info 11 "zmq-socket " (car params) " res=" res) res)) +(define (cdb:set-verbosity zmq-socket val) + (cdb:client-call zmq-socket 'set-verbosity #f val)) + +(define (cdb:login zmq-socket keyval signature) + (cdb:client-call zmq-socket 'login #t keyval signature)) + +(define (cdb:logout zmq-socket keyval signature) + (cdb:client-call zmq-socket 'logout #t keyval signature)) + +(define (cdb:num-clients zmq-socket) + (cdb:client-call zmq-socket 'numclients #t)) + (define (cdb:test-set-status-state zmqsocket test-id status state msg) (if msg (cdb:client-call zmqsocket 'state-status-msg #t state status msg test-id) (cdb:client-call zmqsocket 'state-status #t state status test-id))) ;; run-id test-name item-path minutes cpuload diskfree tmpfree) @@ -1178,33 +1211,68 @@ (list item-path "")))) (cdb:client-call zmqsocket 'register-test #t run-id test-name item-path))) (define (cdb:flush-queue zmqsocket) (cdb:client-call zmqsocket 'flush #f)) + +(define (cdb:kill-server zmqsocket) + (cdb:client-call zmqsocket 'killserver #f)) + +(define (cdb:roll-up-pass-fail-counts zmqsocket run-id test-name item-path status) + (cdb:client-call zmqsocket 'immediate #f open-run-close db:roll-up-pass-fail-counts #f run-id test-name item-path status)) + +(define (cdb:get-test-info zmqsocket run-id test-name item-path) + (cdb:client-call zmqsocket 'immediate #f open-run-close db:get-test-info #f run-id test-name item-path)) + +(define (cdb:get-test-info-by-id zmqsocket test-id) + (cdb:client-call zmqsocket 'immediate #f open-run-close db:get-test-info-by-id #f test-id)) + +;; db should be db open proc or #f +(define (cdb:remote-run proc db . params) + (apply cdb:client-call *runremote* 'immediate #f open-run-close proc #f params)) + +(define (db:test-get-logfile-info db run-id test-name) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (path final_logf) + (set! logf final_logf) + (set! res (list path final_logf)) + (if (directory? path) + (print "Found path: " path) + (print "No such path: " path))) + db + "SELECT rundir,final_logf FROM tests WHERE run_id=? AND testname=? AND item_path='';" + run-id test-name) + res)) (define db:queries - '((register-test "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');") - (state-status "UPDATE tests SET state=?,status=? WHERE id=?;") - (state-status-msg "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;") - (pass-fail-counts "UPDATE tests SET fail_count=?,pass_count=? WHERE id=?;") - (test_data-pf-rollup "UPDATE tests - SET status=CASE WHEN (SELECT fail_count FROM tests WHERE id=?) > 0 - THEN 'FAIL' - WHEN (SELECT pass_count FROM tests WHERE id=?) > 0 AND - (SELECT status FROM tests WHERE id=?) NOT IN ('WARN','FAIL') - THEN 'PASS' - ELSE status - END WHERE id=?;") - (rollup-tests-pass-fail "UPDATE tests - SET fail_count=(SELECT count(id) FROM tests WHERE - run_id=? AND testname=? AND item_path != '' AND status='FAIL'), - pass_count=(SELECT count(id) FROM tests WHERE - run_id=? AND testname=? AND item_path != '' AND (status='PASS' OR status='WARN' OR status='WAIVED')) - WHERE run_id=? AND testname=? AND item_path='';"))) - -(define db:special-queries '(rollup-tests-pass-fail)) -(define db:run-local-queries '(rollup-tests-pass-fail)) + (list '(register-test "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');") + '(state-status "UPDATE tests SET state=?,status=? WHERE id=?;") + '(state-status-msg "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;") + '(pass-fail-counts "UPDATE tests SET fail_count=?,pass_count=? WHERE id=?;") + ;; test_data-pf-rollup is used to set a tests PASS/FAIL based on the pass/fail info from the steps + '(test_data-pf-rollup "UPDATE tests + SET status=CASE WHEN (SELECT fail_count FROM tests WHERE id=?) > 0 + THEN 'FAIL' + WHEN (SELECT pass_count FROM tests WHERE id=?) > 0 AND + (SELECT status FROM tests WHERE id=?) NOT IN ('WARN','FAIL') + THEN 'PASS' + ELSE status + END WHERE id=?;") + '(test-set-log "UPDATE tests SET final_logf=? WHERE id=?;") + '(test-set-rundir-by-test-id "UPDATE tests SET rundir=? WHERE id=?") + '(test-set-rundir "UPDATE tests SET rundir=? WHERE run_id=? AND testname=? AND item_path=?;") + '(delete-tests-in-state "DELETE FROM tests WHERE state=? AND run_id=?;") + '(tests:test-set-toplog "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';") + )) + +;; do not run these as part of the transaction +(define db:special-queries '(rollup-tests-pass-fail + db:roll-up-pass-fail-counts)) + +;; not used, intended to indicate to run in calling process +(define db:run-local-queries '()) ;; rollup-tests-pass-fail)) ;; The queue is a list of vectors where the zeroth slot indicates the type of query to ;; apply and the second slot is the time of the query and the third entry is a list of ;; values to be applied ;; @@ -1217,57 +1285,91 @@ (set! data (sort *incoming-data* (lambda (a b)(< (vector-ref a 1)(vector-ref b 1))))) (set! *incoming-data* '()) (mutex-unlock! *incoming-mutex*) (if (> (length data) 0) (debug:print-info 4 "Writing cached data " data)) - ;; prepare the needed statements + + ;; prepare the needed statements, do each only once (for-each (lambda (request-item) (let ((stmt-key (vector-ref request-item 0))) (if (not (hash-table-ref/default queries stmt-key #f)) (let ((stmt (alist-ref stmt-key db:queries))) (if stmt (hash-table-set! queries stmt-key (sqlite3:prepare db (car stmt))) - (debug:print 0 "ERROR: Missing query spec for " stmt-key "!")))))) + (if (procedure? stmt-key) + (hash-table-set! queries stmt-key #f) + (debug:print 0 "ERROR: Missing query spec for " stmt-key "!"))))))) data) + + ;; outer loop to handle special queries that cannot be handled in the + ;; transaction. (let outerloop ((special-qry #f) (stmts data)) (if special-qry + ;; handle a query that cannot be part of the grouped queries (let* ((stmt-key (vector-ref special-qry 0)) (qry (hash-table-ref queries stmt-key)) - (params (vector-ref speical-qry 2))) - (apply sqlite3:execute db qry params) + (params (vector-ref special-qry 2))) + (if (string? qry) + (apply sqlite3:execute db qry params) + (if (procedure? stmt-key) + (begin + ;; we are being handed a procedure so call it + (debug:print-info 11 "Running (apply " stmt-key " " db " " params ")") + (apply stmt-key db params)) + (debug:print 0 "ERROR: Unrecognised queued call " qry " " params))) (if (not (null? stmts)) (outerloop #f stmts))) + ;; handle normal queries - (sqlite3:with-transaction - db - (lambda () - (debug:print-info 11 "flushing " stmts " to db") - (if (not (null? stmts)) - (let innerloop ((hed (car stmts)) - (tal (cdr stmts))) - (let ((params (vector-ref hed 2)) - (stmt-key (vector-ref hed 0))) - (if (not (member stmt-key db:special-queries)) - (begin - (debug:print-info 11 "Executing " stmt-key " for " params) - (apply sqlite3:execute (hash-table-ref queries stmt-key) params) - (if (not (null? tal)) - (innerloop (car tal)(cdr tal)))) - (outerloop hed tal))))))))) + (let ((rem (sqlite3:with-transaction + db + (lambda () + (debug:print-info 11 "flushing " stmts " to db") + (if (null? stmts) + stmts + (let innerloop ((hed (car stmts)) + (tal (cdr stmts))) + (let ((params (vector-ref hed 2)) + (stmt-key (vector-ref hed 0))) + (if (or (procedure? stmt-key) + (member stmt-key db:special-queries)) + (begin + (debug:print-info 11 "Handling special statement " stmt-key) + (cons hed tal)) + (begin + (debug:print-info 11 "Executing " stmt-key " for " params) + (apply sqlite3:execute (hash-table-ref queries stmt-key) params) + (if (not (null? tal)) + (innerloop (car tal)(cdr tal)) + '())) + )))))))) + (if (not (null? rem)) + (outerloop (car rem)(cdr rem)))))) (for-each (lambda (stmt-key) (sqlite3:finalize! (hash-table-ref queries stmt-key))) (hash-table-keys queries)) (let ((cache-size (length data))) (if (> cache-size *max-cache-size*) (set! *max-cache-size* cache-size))) )) #f)) +(define (db:test-get-records-for-index-file db run-id test-name) + (let ((res '())) + (sqlite3:for-each-row + (lambda (id itempath state status run_duration logf comment) + (set! res (cons (vector id itempath state status run_duration logf comment) res))) + db + "SELECT id,item_path,state,status,run_duration,final_logf,comment FROM tests WHERE run_id=? AND testname=? AND item_path != '';" + run-id test-name) + res)) + +;; Rollup the pass/fail counts from itemized tests into fail_count and pass_count (define (db:roll-up-pass-fail-counts db run-id test-name item-path status) - (cdb:flush-queue *runremote*) + ;; (cdb:flush-queue *runremote*) (if (and (not (equal? item-path "")) (or (equal? status "PASS") (equal? status "WARN") (equal? status "FAIL") (equal? status "WAIVED") @@ -1291,11 +1393,10 @@ ELSE 'COMPLETED' END, status=CASE WHEN fail_count > 0 THEN 'FAIL' WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' ELSE 'UNKNOWN' END WHERE run_id=? AND testname=? AND item_path='';" run-id test-name run-id test-name)) #f) - #f)) ;;====================================================================== ;; Tests meta data ;;====================================================================== @@ -1430,12 +1531,12 @@ ;; Now rollup the counts to the central megatest.db (cdb:pass-fail-counts *runremote* test-id fail-count pass-count) ;; (sqlite3:execute db "UPDATE tests SET fail_count=?,pass_count=? WHERE id=?;" ;; fail-count pass-count test-id) - - (thread-sleep! 1) ;; play nice with the queue by ensuring the rollup is at least 10ms later than the set + (cdb:flush-queue *runremote*) + ;; (thread-sleep! 1) ;; play nice with the queue by ensuring the rollup is at least 10ms later than the set ;; if the test is not FAIL then set status based on the fail and pass counts. (cdb:test-rollup-test_data-pass-fail *runremote* test-id) ;; (sqlite3:execute ;; db ;;; NOTE: Should this be WARN,FAIL? A WARN is not a FAIL????? BUG FIXME @@ -1730,54 +1831,5 @@ results) ;; brutal clean up (system "rm -rf tempdir"))) ;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%") - - -;;====================================================================== -;; REMOTE DB ACCESS VIA RPC -;;====================================================================== - -;; (define (rdb:test-set-status-state test-id status state msg) -;; (if *runremote* -;; (let ((host (vector-ref *runremote* 0)) -;; (port (vector-ref *runremote* 1))) -;; (handle-exceptions -;; exn -;; (begin -;; (debug:print 0 "EXCEPTION: rpc call failed?") -;; (debug:print 0 " " ((condition-property-accessor 'exn 'message) exn)) -;; (print-call-chain) -;; (cdb:test-set-status-state test-id status state msg)) -;; ((rpc:procedure 'cdb:test-set-status-state host port) test-id status state msg))) -;; (cdb:test-set-status-state test-id status state msg))) -;; -;; (define (rdb:test-rollup-test_data-pass-fail test-id) -;; (if *runremote* -;; (let ((host (vector-ref *runremote* 0)) -;; (port (vector-ref *runremote* 1))) -;; ((rpc:procedure 'cdb:test-rollup-test_data-pass-fail host port) test-id)) -;; (cdb:test-rollup-test_data-pass-fail test-id))) -;; -;; (define (rdb:pass-fail-counts test-id fail-count pass-count) -;; (if *runremote* -;; (let ((host (vector-ref *runremote* 0)) -;; (port (vector-ref *runremote* 1))) -;; ((rpc:procedure 'cdb:pass-fail-counts host port) test-id fail-count pass-count)) -;; (cdb:pass-fail-counts test-id fail-count pass-count))) -;; -;; ;; currently forces a flush of the queue -;; (define (rdb:tests-register-test db run-id test-name item-path) -;; (if *runremote* -;; (let ((host (vector-ref *runremote* 0)) -;; (port (vector-ref *runremote* 1))) -;; ((rpc:procedure 'cdb:tests-register-test host port) db run-id test-name item-path force-write: #t)) -;; (cdb:tests-register-test db run-id test-name item-path force-write: #t))) -;; -;; (define (rdb:flush-queue) -;; (if *runremote* -;; (let ((host (vector-ref *runremote* 0)) -;; (port (vector-ref *runremote* 1))) -;; ((rpc:procedure 'cdb:flush-queue host port))) -;; (cdb:flush-queue))) -;; Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -104,17 +104,17 @@ (exit 1))) ;; Can setup as client for server mode now (server:client-setup) (change-directory *toppath*) - (open-run-close set-megatest-env-vars #f run-id) ;; these may be needed by the launching process + (set-megatest-env-vars run-id) ;; these may be needed by the launching process (change-directory work-area) (open-run-close set-run-config-vars #f run-id) ;; environment overrides are done *before* the remaining critical envars. (alist->env-vars env-ovrd) - (open-run-close set-megatest-env-vars #f run-id) + (set-megatest-env-vars run-id) (set-item-env-vars itemdat) (save-environment-as-files "megatest") (open-run-close test-set-meta-info #f test-id run-id test-name itemdat 0) (tests:test-set-status! test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f) (if (args:get-arg "-xterm") @@ -188,11 +188,11 @@ ;; call the command using mt_ezstep (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "-") " " stepcmd)) (debug:print 4 "script: " script) - (open-run-close db:teststep-set-status! #f test-id stepname "start" "-" #f #f) + (cdb:remote-run db:teststep-set-status! #f test-id stepname "start" "-" #f #f) ;; now launch (let ((pid (process-run script))) (let processloop ((i 0)) (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) (mutex-lock! m) @@ -206,13 +206,13 @@ (processloop (+ i 1)))) )) (let ((exinfo (vector-ref exit-info 2)) (logfna (if logpro-used (conc stepname ".html") ""))) ;; testing if procedures called in a remote call cause problems (ans: no or so I suspect) - (open-run-close db:teststep-set-status! #f test-id stepname "end" exinfo #f logfna)) + (cdb:remote-run db:teststep-set-status! #f test-id stepname "end" exinfo #f logfna)) (if logpro-used - (open-run-close db:test-set-log! #f test-id (conc stepname ".html"))) + (cdb:test-set-log! *runremote* test-id (conc stepname ".html"))) ;; set the test final status (let* ((this-step-status (cond ((and (eq? (vector-ref exit-info 2) 2) logpro-used) 'warn) ((eq? (vector-ref exit-info 2) 0) 'pass) (else 'fail))) @@ -254,11 +254,11 @@ (current-seconds) start-seconds))))) (kill-tries 0)) (let loop ((minutes (calc-minutes))) (begin - (set! kill-job? (open-run-close test-get-kill-request #f test-id)) ;; run-id test-name itemdat)) + (set! kill-job? (test-get-kill-request test-id)) ;; run-id test-name itemdat)) (open-run-close test-set-meta-info #f test-id run-id test-name itemdat minutes) (if kill-job? (begin (mutex-lock! m) (let* ((pid (vector-ref exit-info 0))) @@ -295,11 +295,11 @@ (thread-start! th1) (thread-start! th2) (thread-join! th2) (mutex-lock! m) (let* ((item-path (item-list->path itemdat)) - (testinfo (open-run-close db:get-test-info-by-id #f test-id))) ;; )) ;; run-id test-name item-path))) + (testinfo (cdb:get-test-info-by-id *runremote* test-id))) ;; )) ;; run-id test-name item-path))) ;; Am I completed? (if (not (equal? (db:test-get-state testinfo) "COMPLETED")) (begin (debug:print 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status) (tests:test-set-status! test-id @@ -386,17 +386,17 @@ ;; All log file links should be stored relative to the top of link path ;; ;; - [ - ] ;; (define (create-work-area db run-id test-id test-src-path disk-path testname itemdat) - (let* ((run-info (db:get-run-info db run-id)) + (let* ((run-info (cdb:remote-run db:get-run-info #f run-id)) (item-path (item-list->path itemdat)) (runname (db:get-value-by-header (db:get-row run-info) (db:get-header run-info) "runname")) ;; convert back to db: from rdb: - this is always run at server end - (key-vals (db:get-key-vals db run-id)) + (key-vals (cdb:remote-run db:get-key-vals #f run-id)) (target (string-intersperse key-vals "/")) (not-iterated (equal? "" item-path)) ;; all tests are found at /test-base or /test-base @@ -414,11 +414,11 @@ (lnkbase (conc linktree "/" target "/" runname)) (lnkpath (conc lnkbase "/" testname)) (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path))) ;; Update the rundir path in the test record for all - (db:test-set-rundir-by-test-id! db test-id lnkpathf) + (cdb:test-set-rundir-by-test-id *runremote* test-id lnkpathf) (debug:print 2 "INFO:\n lnkbase=" lnkbase "\n lnkpath=" lnkpath "\n toptest-path=" toptest-path "\n test-path=" test-path) (if (not (file-exists? linktree)) (begin (debug:print 0 "WARNING: linktree did not exist! Creating it now at " linktree) @@ -433,14 +433,15 @@ ;; again. ;; NB - This is not working right - some top tests are not getting the path set!!! (if (not (hash-table-ref/default *toptest-paths* testname #f)) - (let* ((testinfo (db:get-test-info-by-id db test-id)) ;; run-id testname item-path)) + (let* ((testinfo (cdb:get-test-info-by-id *runremote* test-id)) ;; run-id testname item-path)) (curr-test-path (if testinfo (db:test-get-rundir testinfo) #f))) (hash-table-set! *toptest-paths* testname curr-test-path) - (db:test-set-rundir! db run-id testname "" lnkpath) ;; toptest-path) + ;; NB// Was this for the test or for the parent in an iterated test? + (cdb:test-set-rundir! *runremote* run-id testname "" lnkpath) ;; toptest-path) (if (or (not curr-test-path) (not (directory-exists? toptest-path))) (begin (debug:print-info 2 "Creating " toptest-path " and link " lnkpath) (create-directory toptest-path #t) @@ -538,12 +539,12 @@ (diskpath #f) (cmdparms #f) (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) (mt-bindir-path #f) (item-path (item-list->path itemdat)) - (test-id (open-run-close db:get-test-id db run-id test-name item-path)) - (testinfo (open-run-close db:get-test-info-by-id db test-id)) + (test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path)) + (testinfo (cdb:get-test-info-by-id *runremote* test-id)) (mt_target (string-intersperse (map cadr keyvallst) "/")) (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) (if (args:get-arg "-logging")(list "-logging") '())))) (if hosts (set! hosts (string-split hosts))) ;; set the megatest to be called on the remote host @@ -577,12 +578,12 @@ (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) (list 'runname runname) (list 'mt-bindir-path mt-bindir-path))))))) ;; (string-intersperse keyvallst " ")))) ;; clean out step records from previous run if they exist - (debug:print-info 4 "FIXMEEEEE!!!! This can be removed some day, perhaps move all test records to the test db?") - (open-run-close db:delete-test-step-records db test-id) + ;; (debug:print-info 4 "FIXMEEEEE!!!! This can be removed some day, perhaps move all test records to the test db?") + ;; (open-run-close db:delete-test-step-records db test-id) (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir (tests:test-set-status! test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) (cond ((and launcher hosts) ;; must be using ssh hostname (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param))) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -95,10 +95,12 @@ -env2file fname : write the environment to fname.csh and fname.sh -setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are overwritten by values set in config files. -server -|hostname : start the server (reduces contention on megatest.db), use - to automatically figure out hostname + -listservers : list the servers + -killserver host:port|pid : kill server specified by host:port or pid -repl : start a repl (useful for extending megatest) Spreadsheet generation -extract-ods fname.ods : extract an open document spreadsheet from the database -pathmod path : insert path, i.e. path/runame/itempath/logfile.html @@ -153,10 +155,11 @@ ":expected" ":tol" ":units" ;; misc "-server" + "-killserver" "-extract-ods" "-pathmod" "-env2file" "-setvars" "-set-state-status" @@ -178,10 +181,11 @@ ;; misc "-archive" "-repl" "-lock" "-unlock" + "-listservers" ;; queries "-test-paths" ;; get path(s) to a test, ordered by youngest first "-runall" ;; run all tests "-remove-runs" @@ -257,14 +261,79 @@ ;;====================================================================== ;; Start the server - can be done in conjunction with -runall or -runtests (one day...) ;; we start the server if not running else start the client thread ;;====================================================================== + (if (args:get-arg "-server") - (server:launch) - (server:client-launch)) + (begin + (debug:print 1 "Launching server...") + (server:launch))) +(if (or (args:get-arg "-listservers") + (args:get-arg "-killserver")) + (let ((tl (setup-for-run))) + (if tl + (let ((servers (open-run-close tasks:get-all-servers tasks:open-db)) + (fmtstr "~5a~8a~20a~5a~20a~9a~20a~5a\n") + (servers-to-kill '())) + (format #t fmtstr "Id" "Pid" "Host" "Port" "Time" "Priority" "State" "Num Clients") + (format #t fmtstr "==" "===" "====" "====" "====" "========" "=====" "===========") + (for-each + (lambda (server) + (let* ((killinfo (args:get-arg "-killserver")) + (khost-port (if killinfo (if (substring-index ":" killinfo)(string-split ":") #f) #f)) + (kpid (if killinfo (if (substring-index ":" killinfo) #f (string->number killinfo)) #f)) + (id (vector-ref server 0)) + (pid (vector-ref server 1)) + (hostname (vector-ref server 2)) + (port (vector-ref server 3)) + (start-time (vector-ref server 4)) + (priority (vector-ref server 5)) + (state (vector-ref server 6)) + (stat-numc (server:ping hostname port)) + (status (car stat-numc)) + (numclients (cadr stat-numc)) + (killed #f) + (zmq-socket (if status (server:client-connect hostname port) #f))) + ;; no need to login as status of #t indicates we are connecting to correct + ;; server + (if (or (not status) ;; no point in keeping dead records in the db + (and khost-port ;; kill by host/port + (equal? hostname (car khost-port)) + (equal? port (string->number (cadr khost-port))))) + (begin + (open-run-close tasks:server-deregister tasks:open-db hostname port: port) + (if status ;; #t means alive + (begin + (cdb:kill-server zmq-socket) + (debug:print-info 1 "Killed server by host:port at " hostname ":" port)) + (debug:print-info 1 "Removing defunct server record for " hostname ":" port)) + (set! killed #t))) + (if (and kpid + (equal? hostname (car khost-port)) + (equal? kpid pid)) + (begin + (open-run-close tasks:server-deregister tasks:open-db hostname pid: pid) + (set! killed #t) + (if status (cdb:kill-server zmq-socket)) + (debug:print-info 1 "Killed server by pid at " hostname ":" port))) + ;; (if zmq-socket (close-socket zmq-socket)) + (format #t fmtstr id pid hostname port start-time priority + status numclients))) + servers) + (set! *didsomething* #t)))) + ;; if not list or kill then start a client (if appropriate) + (if (or (args-defined? "-h" "-version" "-gen-megatest-area" "-gen-megatest-test") + (eq? (length (hash-table-keys args:arg-hash)) 0)) + (debug:print-info 1 "Server connection not needed") + ;; ping servers only if -runall -runtests + (let ((ping (args-defined? "-runall" "-runtests" "-remove-runs" + "-set-state-status" "-rerun" "-rollup" "-lock" "-unlock" + "-set-values" "-list-runs"))) + (server:client-launch do-ping: ping)))) + ;;====================================================================== ;; Remove old run(s) ;;====================================================================== ;; since several actions can be specified on the command line the removal @@ -673,11 +742,11 @@ (if (args:get-arg "-load-test-data") ;; has sub commands that are rdb: (open-run-close db:load-test-data db test-id)) (if (args:get-arg "-setlog") (let ((logfname (args:get-arg "-setlog"))) - (open-run-close db:test-set-log! db test-id logfname))) + (cdb:test-set-log! *runremote* test-id logfname))) (if (args:get-arg "-set-toplog") (open-run-close tests:test-set-toplog! db run-id test-name (args:get-arg "-set-toplog"))) (if (args:get-arg "-summarize-items") (open-run-close tests:summarize-items db run-id test-name #t)) ;; do force here (if (args:get-arg "-runstep") @@ -716,11 +785,11 @@ (debug:print-info 2 "running \"" cmd "\"") (change-directory startingdir) (set! exitstat (system cmd)) (set! *globalexitstatus* exitstat) ;; no necessary (change-directory testpath) - (open-run-close db:test-set-log! db test-id htmllogfile))) + (cdb:test-set-log! *runremote* test-id htmllogfile))) (let ((msg (args:get-arg "-m"))) (open-run-close db:teststep-set-status! db test-id stepname "end" exitstat msg logfile)) ))) (if (or (args:get-arg "-test-status") (args:get-arg "-set-values")) @@ -834,12 +903,13 @@ ;;====================================================================== ;; Exit and clean up ;;====================================================================== ;; this is the socket if we are a client -(if (socket? *runremote*) - (close-socket *runremote*)) +;; (if (and *runremote* +;; (socket? *runremote*)) +;; (close-socket *runremote*)) (if (not *didsomething*) (debug:print 0 help)) ;; (if *runremote* (rpc:close-all-connections!)) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -71,44 +71,51 @@ ;; Awful. Please FIXME (define *env-vars-by-run-id* (make-hash-table)) (define *current-run-name* #f) -(define (set-megatest-env-vars db run-id) - (let ((keys (db:get-keys db)) +(define (db:get-run-key-val db run-id key) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (val) + (set! res val)) + db + (conc "SELECT " (key:get-fieldname key) " FROM runs WHERE id=?;") + run-id) + res)) + +(define (db:get-run-name-from-id db run-id) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (runname) + (set! res runname)) + db + "SELECT runname FROM runs WHERE id=?;" + run-id) + res)) + +(define (set-megatest-env-vars run-id) + (let ((keys (cdb:remote-run db:get-keys #f)) (vals (hash-table-ref/default *env-vars-by-run-id* run-id #f))) ;; get the info from the db and put it in the cache (if (not vals) (let ((ht (make-hash-table))) (hash-table-set! *env-vars-by-run-id* run-id ht) (set! vals ht) (for-each (lambda (key) - (sqlite3:for-each-row - (lambda (val) - (hash-table-set! vals key val)) - db - (conc "SELECT " (key:get-fieldname key) " FROM runs WHERE id=?;") - run-id)) + (hash-table-set! vals key (cdb:remote-run db:get-run-key-val #f run-id key))) keys))) ;; from the cached data set the vars (hash-table-for-each vals (lambda (key val) (debug:print 2 "setenv " (key:get-fieldname key) " " val) (setenv (key:get-fieldname key) val))) (alist->env-vars (hash-table-ref/default *configdat* "env-override" '())) ;; Lets use this as an opportunity to put MT_RUNNAME in the environment - (if (not *current-run-name*) - (sqlite3:for-each-row - (lambda (runname) - (set! *current-run-name* runname)) - - db - "SELECT runname FROM runs WHERE id=?;" - run-id)) - (setenv "MT_RUNNAME" *current-run-name*) + (setenv "MT_RUNNAME" (cdb:remote-run db:get-run-name-from-id #f run-id)) (setenv "MT_RUN_AREA_HOME" *toppath*) )) (define (set-item-env-vars itemdat) (for-each (lambda (item) @@ -115,15 +122,15 @@ (debug:print 2 "setenv " (car item) " " (cadr item)) (setenv (car item) (cadr item))) itemdat)) (define *last-num-running-tests* 0) -(define (runs:can-run-more-tests db test-record) +(define (runs:can-run-more-tests test-record) (let* ((tconfig (tests:testqueue-get-testconfig test-record)) (jobgroup (config-lookup tconfig "requirements" "jobgroup")) - (num-running (db:get-count-tests-running db)) - (num-running-in-jobgroup (db:get-count-tests-running-in-jobgroup db jobgroup)) + (num-running (cdb:remote-run db:get-count-tests-running #f)) + (num-running-in-jobgroup (cdb:remote-run db:get-count-tests-running-in-jobgroup #f jobgroup)) (max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs"))) (if (and mcj (string->number mcj)) (string->number mcj) #f))) (job-group-limit (config-lookup *configdat* "jobgroups" jobgroup))) @@ -189,22 +196,22 @@ ;; This is a duplicate of run-tests (which has been deprecated). Use this one instead of run tests. ;; keyvals (define (runs:run-tests target runname test-patts user flags) (let* ((db #f) - (keys (open-run-close db:get-keys db)) + (keys (cdb:remote-run db:get-keys #f)) (keyvallst (keys:target->keyval keys target)) - (run-id (open-run-close runs:register-run db keys keyvallst runname "new" "n/a" user)) ;; test-name))) + (run-id (cdb:remote-run runs:register-run #f keys keyvallst runname "new" "n/a" user)) ;; test-name))) (deferred '()) ;; delay running these since they have a waiton clause ;; keepgoing is the defacto modality now, will add hit-n-run a bit later ;; (keepgoing (hash-table-ref/default flags "-keepgoing" #f)) (test-names '()) (runconfigf (conc *toppath* "/runconfigs.config")) (required-tests '()) (test-records (make-hash-table))) - (open-run-close set-megatest-env-vars db run-id) ;; these may be needed by the launching process + (set-megatest-env-vars run-id) ;; these may be needed by the launching process (if (file-exists? runconfigf) (open-run-close setup-env-defaults db runconfigf run-id *already-seen-runconfig-info* "pre-launch-env-vars") (debug:print 0 "WARNING: You do not have a run config file: " runconfigf)) @@ -222,12 +229,12 @@ (begin ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends ;; on test A but test B reached the point on being registered as NOT_STARTED and test ;; A failed for some reason then on re-run using -keepgoing the run can never complete. - (open-run-close db:delete-tests-in-state db run-id "NOT_STARTED") - (open-run-close db:set-tests-state-status db run-id test-names #f "FAIL" "NOT_STARTED" "FAIL"))) + (cdb:delete-tests-in-state *runremote* run-id "NOT_STARTED") + (cdb:remote-run db:set-tests-state-status #f run-id test-names #f "FAIL" "NOT_STARTED" "FAIL"))) ;; from here on out the db will be opened and closed on every call runs:run-tests-queue ;; (sqlite3:finalize! db) ;; now add non-directly referenced dependencies (i.e. waiton) (if (not (null? test-names)) @@ -371,11 +378,11 @@ (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!") (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) (cond ;; OUTER COND ((not items) ;; when false the test is ok to be handed off to launch (but not before) - (let* ((run-limits-info (open-run-close runs:can-run-more-tests #f test-record)) ;; look at the test jobgroup and tot jobs running + (let* ((run-limits-info (open-run-close runs:can-run-more-tests test-record)) ;; look at the test jobgroup and tot jobs running (have-resources (car run-limits-info)) (num-running (list-ref run-limits-info 1)) (num-running-in-jobgroup (list-ref run-limits-info 2)) (max-concurrent-jobs (list-ref run-limits-info 3)) (job-group-limit (list-ref run-limits-info 4)) @@ -474,11 +481,11 @@ (loop (car tal)(cdr tal) reruns)))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ((or (procedure? items)(eq? items 'have-procedure)) - (let ((can-run-more (open-run-close runs:can-run-more-tests #f test-record))) + (let ((can-run-more (runs:can-run-more-tests test-record))) (if can-run-more (let* ((prereqs-not-met (open-run-close db:get-prereqs-not-met #f run-id waitons item-path mode: testmode)) (fails (runs:calc-fails prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met))) (debug:print-info 8 "can-run-more: " can-run-more @@ -500,11 +507,11 @@ (and (eq? testmode 'toplevel) (null? non-completed))) (let ((test-name (tests:testqueue-get-testname test-record))) (setenv "MT_TEST_NAME" test-name) ;; (setenv "MT_RUNNAME" runname) - (open-run-close set-megatest-env-vars #f run-id) ;; these may be needed by the launching process + (set-megatest-env-vars run-id) ;; these may be needed by the launching process (let ((items-list (items:get-items-from-config tconfig))) (if (list? items-list) (begin (tests:testqueue-set-items! test-record items-list) (thread-sleep! *global-delta*) @@ -547,11 +554,11 @@ ;; this case should not happen, added to help catch any bugs ((and (list? items) itemdat) (debug:print 0 "ERROR: Should not have a list of items in a test and the itemspath set - please report this") (exit 1)) ((not (null? reruns)) - (let* ((newlst (open-run-close tests:filter-non-runnable #f run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, + (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, (junked (lset-difference equal? tal newlst))) (debug:print-info 4 "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) (if (< num-retries max-retries) (set! newlst (append reruns newlst))) (set! num-retries (+ num-retries 1)) @@ -598,11 +605,11 @@ (if (not itemdat)(set! itemdat '())) (set! item-path (item-list->path itemdat)) (debug:print 2 "Attempting to launch test " test-name (if (equal? item-path "/") "/" item-path)) (setenv "MT_TEST_NAME" test-name) ;; (setenv "MT_RUNNAME" runname) - (open-run-close set-megatest-env-vars db run-id) ;; these may be needed by the launching process + (set-megatest-env-vars run-id) ;; these may be needed by the launching process (change-directory *toppath*) ;; Here is where the test_meta table is best updated ;; Yes, another use of a global for caching. Need a better way? (if (not (hash-table-ref/default *test-meta-updated* test-name #f)) @@ -611,12 +618,12 @@ (open-run-close runs:update-test_meta db test-name test-conf))) ;; (lambda (itemdat) ;;; ((ripeness "overripe") (temperature "cool") (season "summer")) (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) (new-test-name (if (equal? item-path "") test-name (conc test-name "/" item-path))) ;; just need it to be unique - (test-id (open-run-close db:get-test-id db run-id test-name item-path)) - (testdat (open-run-close db:get-test-info-by-id db test-id))) + (test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path)) + (testdat (cdb:get-test-info-by-id *runremote* test-id))) (if (not testdat) (begin ;; ensure that the path exists before registering the test ;; NOPE: Cannot! Don't know yet which disk area will be assigned.... ;; (system (conc "mkdir -p " new-test-path)) @@ -630,11 +637,11 @@ (begin (debug:print 2 "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) (open-run-close db:tests-register-test #f run-id test-name item-path) (set! test-id (open-run-close db:get-test-id db run-id test-name item-path)))) (debug:print-info 4 "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") - (set! testdat (open-run-close db:get-test-info-by-id db test-id)))) + (set! testdat (cdb:get-test-info-by-id *runremote* test-id)))) (set! test-id (db:test-get-id testdat)) (change-directory test-path) (case (if force ;; (args:get-arg "-force") 'NOT_STARTED (if testdat Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -9,74 +9,75 @@ ;; PURPOSE. (require-extension (srfi 18) extras tcp rpc s11n) (import (prefix rpc rpc:)) -(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo zmq) +(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo zmq md5 message-digest) (import (prefix sqlite3 sqlite3:)) (declare (unit server)) (declare (uses common)) (declare (uses db)) (declare (uses tests)) +(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. (include "common_records.scm") (include "db_records.scm") +(define (server:make-server-url hostport) + (if (not hostport) + #f + (conc "tcp://" (car hostport) ":" (cadr hostport)))) +(define *time-to-exit* #f) + (define (server:run hostn) (debug:print 0 "Attempting to start the server ...") - (let ((host:port (open-run-close db:get-var #f "SERVER"))) ;; do whe already have a server running? - (if host:port - (begin - (debug:print 0 "NOTE: server already running.") - (if (server:client-setup) - (begin - (debug:print-info 0 "Server is alive, not starting another") - ;;(exit) - ) - (begin - (debug:print-info 0 "Server is dead, removing flag and trying again") - (open-run-close db:del-var #f "SERVER") - (server:run hostn)))) - (let* ((zmq-socket #f) - (hostname (if (string=? "-" hostn) - (get-host-name) - hostn)) - (ipaddrstr (let ((ipstr (if (string=? "-" hostn) - (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") - #f))) - (if ipstr ipstr hostname)))) - (set! zmq-socket (server:find-free-port-and-open ipaddrstr zmq-socket 5555)) - (set! *cache-on* #t) - - ;; what to do when we quit - ;; - (on-exit (lambda () - (open-run-close db:del-var #f "SERVER") - (let loop () - (let ((queue-len 0)) - (thread-sleep! (random 5)) - (mutex-lock! *incoming-mutex*) - (set! queue-len (length *incoming-data*)) - (mutex-unlock! *incoming-mutex*) - (if (> queue-len 0) - (begin - (debug:print-info 0 "Queue not flushed, waiting ...") - (loop))))))) - - ;; The heavy lifting - ;; - (let loop () - (let* ((rawmsg (receive-message zmq-socket)) - (params (db:string->obj rawmsg)) ;; (with-input-from-string rawmsg (lambda ()(deserialize)))) - (res #f)) - (debug:print-info 12 "server=> received params=" params) - (set! res (cdb:cached-access params)) - (debug:print-info 12 "server=> processed res=" res) - (send-message zmq-socket (db:obj->string res)) - (loop))))))) + (if (not *toppath*)(setup-for-run)) + (let* ((zmq-socket #f) + (hostname (if (string=? "-" hostn) + (get-host-name) + hostn)) + (ipaddrstr (let ((ipstr (if (string=? "-" hostn) + (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") + #f))) + (if ipstr ipstr hostname)))) + (set! zmq-socket (server:find-free-port-and-open ipaddrstr zmq-socket 5555 0)) + (set! *cache-on* #t) + + ;; what to do when we quit + ;; + (on-exit (lambda () + (open-run-close tasks:server-deregister-self tasks:open-db) + (let loop () + (let ((queue-len 0)) + (thread-sleep! (random 5)) + (mutex-lock! *incoming-mutex*) + (set! queue-len (length *incoming-data*)) + (mutex-unlock! *incoming-mutex*) + (if (> queue-len 0) + (begin + (debug:print-info 0 "Queue not flushed, waiting ...") + (loop))))))) + + ;; The heavy lifting + ;; + (let loop () + (let* ((rawmsg (receive-message* zmq-socket)) + (params (db:string->obj rawmsg)) ;; (with-input-from-string rawmsg (lambda ()(deserialize)))) + (res #f)) + (debug:print-info 12 "server=> received params=" params) + (set! res (cdb:cached-access params)) + (debug:print-info 12 "server=> processed res=" res) + (send-message zmq-socket (db:obj->string res)) + (if (not *time-to-exit*) + (loop) + (begin + (db:write-cached-data) + (open-run-close tasks:server-deregister-self tasks:open-db) + (exit) + )))))) ;; run server:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; (define (server:keep-running) @@ -83,96 +84,183 @@ ;; if none running or if > 20 seconds since ;; server last used then start shutdown (let loop ((count 0)) (thread-sleep! 1) ;; no need to do this very often (db:write-cached-data) - (if (< count 100) - (loop 0) + ;; (print "Server running, count is " count) + (if (< count 10) + (loop (+ count 1)) (let ((numrunning (open-run-close db:get-count-tests-running #f))) (if (or (> numrunning 0) (> (+ *last-db-access* 60)(current-seconds))) (begin (debug:print-info 0 "Server continuing, tests running: " numrunning ", seconds since last db access: " (- (current-seconds) *last-db-access*)) - (loop (+ count 1))) + (loop 0))) (begin - (debug:print-info 0 "Starting to shutdown the server side") + (debug:print-info 0 "Starting to shutdown the server.") ;; need to delete only *my* server entry (future use) - (open-run-close db:del-var #f "SERVER") - (thread-sleep! 10) + (set! *time-to-exit* #t) + (open-run-close tasks:server-deregister-self tasks:open-db) + (thread-sleep! 5) (debug:print-info 0 "Max cached queries was " *max-cache-size*) (debug:print-info 0 "Server shutdown complete. Exiting") - ;; (exit))) - )))))) + (exit)))))) -(define (server:find-free-port-and-open host s port) +(define (server:find-free-port-and-open host s port #!key (trynum 50)) (let ((s (if s s (make-socket 'rep))) (p (if (number? port) port 5555))) (handle-exceptions exn (begin (debug:print 0 "Failed to bind to port " p ", trying next port") (debug:print 0 " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn)) - (server:find-free-port-and-open host s (+ p 1))) + (if (> trynum 0) + (server:find-free-port-and-open host s (+ p 1) trynum: (- trynum 1)) + (debug:print-info 0 "Tried ports from " (- p trynum) " to " p + " but all were in use. Please try a different port range by starting the server with parameter \" -port N\" where N is the starting port number to use"))) (let ((zmq-url (conc "tcp://" host ":" p))) (print "Trying to start server on " zmq-url) (bind-socket s zmq-url) (set! *runremote* #f) (debug:print 0 "Server started on " zmq-url) - (open-run-close db:set-var #f "SERVER" zmq-url) + (open-run-close tasks:server-register tasks:open-db (current-process-id) host p 0 'live) s)))) -(define (server:client-setup) - (let* ((hostinfo (open-run-close db:get-var #f "SERVER")) - (zmq-socket (make-socket 'req))) - (if hostinfo - (begin - (debug:print-info 2 "Setting up to connect to " hostinfo) - (handle-exceptions - exn - (begin - (debug:print 0 "ERROR: Failed to open a connection to the server at: " hostinfo) - (debug:print 0 " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 " perhaps jobs killed with -9? Removing server records") - (open-run-close db:del-var #f "SERVER") - (exit) - #f) - (let ((connect-ok #f)) - (connect-socket zmq-socket hostinfo) - (set! connect-ok (cdb:client-call zmq-socket 'login #t *toppath*)) - (if connect-ok - (begin - (debug:print-info 2 "Logged in and connected to " hostinfo) - (set! *runremote* zmq-socket) - #t) - (begin - (debug:print-info 2 "Failed to login or connect to " hostinfo) - (set! *runremote* #f) - #f))))) - (begin - (debug:print-info 2 "No server available, attempting to start one...") - (system (conc "megatest -server - " (if (args:get-arg "-debug") - (conc "-debug " (args:get-arg "-debug")) - "") - " &")) - (sleep 5) - (server:client-setup))))) +(define (server:mk-signature) + (message-digest-string (md5-primitive) + (with-output-to-string + (lambda () + (write (list (current-directory) + (argv))))))) + +(define (server:get-client-signature) + (if *my-client-signature* *my-client-signature* + (let ((sig (server:mk-signature))) + (set! *my-client-signature* sig) + *my-client-signature*))) + +;; +(define (server:client-connect host port #!key (context #f)) + (debug:print 3 "client-connect " host ":" port) + (let ((connect-ok #f) + (zmq-socket (if context + (make-socket 'req context) + (make-socket 'req))) + (conurl (server:make-server-url (list host port)))) + (if (socket? zmq-socket) + (begin + (connect-socket zmq-socket conurl) + zmq-socket) + #f))) + + +(define (server:client-login zmq-socket) + (cdb:login zmq-socket *toppath* (server:get-client-signature))) + +(define (server:client-logout zmq-socket) + (let ((ok (and (socket? zmq-socket) + (cdb:logout zmq-socket *toppath* (server:get-client-signature))))) + ;; (close-socket zmq-socket) + ok)) + +;; Do all the connection work, start a server if not already running +(define (server:client-setup #!key (numtries 10)(do-ping #f)) + (if (not *toppath*)(setup-for-run)) + (let ((hostinfo (open-run-close tasks:get-best-server tasks:open-db do-ping: do-ping))) + (if hostinfo + (let ((host (car hostinfo)) + (port (cadr hostinfo)) + (zsocket (caddr hostinfo))) + ;; (set! *runremote* zsocket)) + (let* ((host (car hostinfo)) + (port (cadr hostinfo))) + (debug:print-info 2 "Setting up to connect to " hostinfo) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Failed to open a connection to the server at: " hostinfo) + (debug:print 0 " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 " perhaps jobs killed with -9? Removing server records") + (open-run-close tasks:server-deregister tasks:open-db host port: port) + #f) + (let* ((zmq-socket (server:client-connect host port)) + (login-res (server:client-login zmq-socket)) + (connect-ok (if (null? login-res) #f (car login-res))) + (conurl (server:make-server-url hostinfo))) + (if connect-ok + (begin + (debug:print-info 2 "Logged in and connected to " conurl) + (set! *runremote* zmq-socket) + #t) + (begin + (debug:print-info 2 "Failed to login or connect to " conurl) + (set! *runremote* #f) + #f)))))) + (if (> numtries 0) + (let ((exe (car (argv)))) + (debug:print-info 1 "No server available, attempting to start one...") + (process-run exe (list "-server" "-" "-debug" (conc *verbosity*))) + (sleep 5) + (server:client-setup numtries: (- numtries 1) do-ping: do-ping)) + (debug:print-info 1 "Too many retries, giving up"))))) (define (server:launch) (let* ((toppath (setup-for-run))) (debug:print-info 0 "Starting the standalone server") - (if *toppath* - (let* ((th2 (make-thread (lambda () - (server:run (args:get-arg "-server"))))) - (th3 (make-thread (lambda () - (server:keep-running))))) - (thread-start! th3) - (thread-start! th2) - (thread-join! th3) - (set! *didsomething* #t)) - (debug:print 0 "ERROR: Failed to setup for megatest")))) - -(define (server:client-launch) - (if (server:client-setup) + (let ((hostinfo (open-run-close tasks:get-best-server tasks:open-db))) + (if hostinfo + (debug:print-info 1 "NOT starting new server, one is already running on " (car hostinfo) ":" (cadr hostinfo)) + (if *toppath* + (let* ((th2 (make-thread (lambda () + (server:run (args:get-arg "-server"))))) + (th3 (make-thread (lambda () + (server:keep-running))))) + (thread-start! th2) + (thread-start! th3) + (set! *didsomething* #t) + (thread-join! th3)) + (debug:print 0 "ERROR: Failed to setup for megatest")))))) + +(define (server:client-launch #!key (do-ping #f)) + (if (server:client-setup do-ping: do-ping) (debug:print-info 0 "connected as client") (begin (debug:print 0 "ERROR: Failed to connect as client") (exit)))) + +;; ping a server and return number of clients or #f (if no response) +(define (server:ping host port #!key (secs 10)(return-socket #f)) + (cdb:use-non-blocking-mode + (lambda () + (let* ((res #f) + (th1 (make-thread + (lambda () + (let* ((zmq-context (make-context 1)) + (zmq-socket (server:client-connect host port context: zmq-context))) + (if zmq-socket + (if (server:client-login zmq-socket) + (let ((numclients (cdb:num-clients zmq-socket))) + (if (not return-socket) + (begin + (server:client-logout zmq-socket) + (close-socket zmq-socket))) + (set! res (list #t numclients (if return-socket zmq-socket #f)))) + (begin + ;; (close-socket zmq-socket) + (set! res (list #f "CAN'T LOGIN" #f)))) + (set! res (list #f "CAN'T CONNECT" #f))))))) + (th2 (make-thread + (lambda () + (let loop ((count 1)) + (debug:print-info 1 "Ping " count " server on " host " at port " port) + (thread-sleep! 2) + (if (< count (/ secs 2)) + (loop (+ count 1)))) + ;; (thread-terminate! th1) + (set! res (list #f "TIMED OUT" #f)))))) + (thread-start! th2) + (thread-start! th1) + (handle-exceptions + exn + (set! res (list #f "TIMED OUT" #f)) + (thread-join! th1 secs)) + res)))) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -23,16 +23,16 @@ ;;====================================================================== (define (tasks:open-db) (let* ((dbpath (conc *toppath* "/monitor.db")) (exists (file-exists? dbpath)) - (tdb (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) + (mdb (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) (handler (make-busy-timeout 36000))) - (sqlite3:set-busy-handler! tdb handler) + (sqlite3:set-busy-handler! mdb handler) (if (not exists) (begin - (sqlite3:execute tdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, action TEXT DEFAULT '', owner TEXT, state TEXT DEFAULT 'new', target TEXT DEFAULT '', name TEXT DEFAULT '', @@ -40,19 +40,127 @@ item TEXT DEFAULT '', keylock TEXT, params TEXT, creation_time TIMESTAMP, execution_time TIMESTAMP);") - (sqlite3:execute tdb "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY, + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY, pid INTEGER, start_time TIMESTAMP, last_update TIMESTAMP, hostname TEXT, username TEXT, - CONSTRAINT monitors_constraint UNIQUE (pid,hostname));"))) - tdb)) + CONSTRAINT monitors_constraint UNIQUE (pid,hostname));") + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS servers (id INTEGER PRIMARY KEY, + pid INTEGER, + hostname TEXT, + port INTEGER, + start_time TIMESTAMP, + priority INTEGER, + state TEXT, + CONSTRAINT servers_constraint UNIQUE (pid,hostname));") + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS clients (id INTEGER PRIMARY KEY, + server_id INTEGER, + pid INTEGER, + hostname TEXT, + cmdline TEXT, + login_time TIMESTAMP, + logout_time TIMESTAMP DEFAULT -1, + CONSTRAINT clients_constraint UNIQUE (pid,hostname));") + + )) + mdb)) +;;====================================================================== +;; Server and client management +;;====================================================================== + +;; state: 'live, 'shutting-down, 'dead +(define (tasks:server-register mdb pid hostname port priority state) + (sqlite3:execute + mdb + "INSERT OR REPLACE INTO servers (pid,hostname,port,start_time,priority,state) VALUES(?,?,?,strftime('%s','now'),?,?);" + pid hostname port priority (conc state))) + +(define (tasks:server-deregister mdb hostname #!key (port #f)(pid #f)) + (debug:print-info 11 "server-deregister " hostname ", port " port ", pid " pid) + (if pid + (sqlite3:execute mdb "DELETE FROM servers WHERE hostname=? AND pid=?;" hostname pid) + (if port + (sqlite3:execute mdb "DELETE FROM servers WHERE hostname=? AND port=?;" hostname port) + (debug:print 0 "ERROR: tasks:server-deregister called with neither pid nor port specified")))) + +(define (tasks:server-deregister-self mdb) + (tasks:server-deregister mdb (get-host-name) pid: (current-process-id))) + +(define (tasks:server-get-server-id mdb) + ;; dunno yet + 0) + +(define (tasks:client-register mdb pid hostname cmdline) + (sqlite3:execute + mdb + "INSERT OR REPLACE INTO clients (server_id,pid,hostname,cmdline,login_time) VALUES(?,?,?,?,strftime('%s','now'));") + (tasks:server-get-server-id mdb) + pid hostname cmdline) + +(define (tasks:client-logout mdb pid hostname cmdline) + (sqlite3:execute + mdb + "UPDATE clients SET logout_time=strftime('%s','now') WHERE pid=? AND hostname=? AND cmdline=?;" + pid hostname cmdline)) + +(define (tasks:get-logged-in-clients mdb server-id) + (let ((res '())) + (sqlite3:for-each-row + (lambda (id server-id pid hostname cmdline login-time logout-time) + (set! res (cons (vector id server-id pid hostname cmdline login-time lougout-time) res))) + mdb + "SELECT id,server_id,pid,hostname,cmdline,login_time,logout_time FROM clients WHERE server_id=?;" + server-id))) + +(define (tasks:have-clients? mdb server-id) + (null? (tasks:get-logged-in-clients mdb server-id))) + +;; ping each server in the db and return first found that responds. +;; remove any others. will not necessarily remove all! +(define (tasks:get-best-server mdb #!key (do-ping #f)) + (let ((res '()) + (best #f)) + (sqlite3:for-each-row + (lambda (id hostname port) + (set! res (cons (list hostname port) res)) + (debug:print-info 1 "Found " hostname ":" port)) + mdb + "SELECT id,hostname,port FROM servers WHERE state='live' ORDER BY start_time DESC LIMIT 1;") + ;; (print "res=" res) + (if (null? res) #f + (let loop ((hed (car res)) + (tal (cdr res))) + ;; (print "hed=" hed ", tal=" tal) + (let* ((host (car hed)) + (port (cadr hed)) + (ping-res (if do-ping (server:ping host port return-socket: #f) '(#t "NO PING" #f))) + (alive (car ping-res)) + (reason (cadr ping-res)) + (zsocket (caddr ping-res))) + (if alive (list host port zsocket) + ;; remove defunct server from table + (begin + (open-run-close tasks:server-deregister tasks:open-db host port: port) + (if (null? tal) + #f + (loop (car tal)(cdr tal)))))))))) + +(define (tasks:get-all-servers mdb) + (let ((res '())) + (sqlite3:for-each-row + (lambda (id pid hostname port start-time priority state) + (set! res (cons (vector id pid hostname port start-time priority state) res))) + mdb + "SELECT id,pid,hostname,port,start_time,priority,state FROM servers ORDER BY start_time DESC;") + res)) + ;;====================================================================== ;; Tasks and Task monitors ;;====================================================================== @@ -65,32 +173,32 @@ ;;====================================================================== ;; Task Monitors ;;====================================================================== -(define (tasks:register-monitor db tdb) +(define (tasks:register-monitor db mdb) (let* ((pid (current-process-id)) (hostname (get-host-name)) (userinfo (user-information (current-user-id))) (username (car userinfo))) (print "Register monitor, pid: " pid ", hostname: " hostname ", username: " username) - (sqlite3:execute tdb "INSERT INTO monitors (pid,start_time,last_update,hostname,username) VALUES (?,strftime('%s','now'),strftime('%s','now'),?,?);" + (sqlite3:execute mdb "INSERT INTO monitors (pid,start_time,last_update,hostname,username) VALUES (?,strftime('%s','now'),strftime('%s','now'),?,?);" pid hostname username))) -(define (tasks:get-num-alive-monitors tdb) +(define (tasks:get-num-alive-monitors mdb) (let ((res 0)) (sqlite3:for-each-row (lambda (count) (set! res count)) - tdb + mdb "SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;" (car (user-information (current-user-id)))) res)) ;; register a task -(define (tasks:add tdb action owner target runname test item params) - (sqlite3:execute tdb "INSERT INTO tasks_queue (action,owner,state,target,name,test,item,params,creation_time,execution_time) +(define (tasks:add mdb action owner target runname test item params) + (sqlite3:execute mdb "INSERT INTO tasks_queue (action,owner,state,target,name,test,item,params,creation_time,execution_time) VALUES (?,?,'new',?,?,?,?,?,strftime('%s','now'),0);" action owner target runname @@ -105,28 +213,28 @@ (set! tmp (conc tmp "/" (hash-table-ref/default key-params (vector-ref key 0) "")))) (cdr keys))) tmp)) ;; for use from the gui -(define (tasks:add-from-params tdb action keys key-params var-params) +(define (tasks:add-from-params mdb action keys key-params var-params) (let ((target (keys:key-vals-hash->target keys key-params)) (owner (car (user-information (current-user-id)))) (runname (hash-table-ref/default var-params "runname" #f)) (testpatts (hash-table-ref/default var-params "testpatts" "%")) (itempatts (hash-table-ref/default var-params "itempatts" "%")) (params (hash-table-ref/default var-params "params" ""))) - (tasks:add tdb action owner target runname testpatts itempatts params))) + (tasks:add mdb action owner target runname testpatts itempatts params))) ;; return one task from those who are 'new' OR 'waiting' AND more than 10sec old ;; -(define (tasks:snag-a-task tdb) +(define (tasks:snag-a-task mdb) (let ((res #f) (keytxt (conc (current-process-id) "-" (get-host-name) "-" (car (user-information (current-user-id)))))) ;; first randomly set a new to pid-hostname-hostname (sqlite3:execute - tdb + mdb "UPDATE tasks_queue SET keylock=? WHERE id IN (SELECT id FROM tasks_queue WHERE state='new' OR (state='waiting' AND (strftime('%s','now')-execution_time) > 10) OR state='reset' @@ -133,92 +241,92 @@ ORDER BY RANDOM() LIMIT 1);" keytxt) (sqlite3:for-each-row (lambda (id . rem) (set! res (apply vector id rem))) - tdb + mdb "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time FROM tasks_queue WHERE keylock=? ORDER BY execution_time ASC LIMIT 1;" keytxt) (if res ;; yep, have work to be done (begin - (sqlite3:execute tdb "UPDATE tasks_queue SET state='inprogress',execution_time=strftime('%s','now') WHERE id=?;" + (sqlite3:execute mdb "UPDATE tasks_queue SET state='inprogress',execution_time=strftime('%s','now') WHERE id=?;" (tasks:task-get-id res)) res) #f))) -(define (tasks:reset-stuck-tasks tdb) +(define (tasks:reset-stuck-tasks mdb) (let ((res '())) (sqlite3:for-each-row (lambda (id delta) (set! res (cons id res))) - tdb + mdb "SELECT id,strftime('%s','now')-execution_time AS delta FROM tasks_queue WHERE state='inprogress' AND delta>700 ORDER BY delta DESC LIMIT 2;") (sqlite3:execute - tdb + mdb (conc "UPDATE tasks_queue SET state='reset' WHERE id IN ('" (string-intersperse (map conc res) "','") "');")))) ;; return all tasks in the tasks_queue table ;; -(define (tasks:get-tasks tdb types states) +(define (tasks:get-tasks mdb types states) (let ((res '())) (sqlite3:for-each-row (lambda (id . rem) (set! res (cons (apply vector id rem) res))) - tdb + mdb (conc "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time FROM tasks_queue " ;; WHERE ;; state IN " statesstr " AND ;; action IN " actionsstr " ORDER BY creation_time DESC;")) res)) ;; remove tasks given by a string of numbers comma separated -(define (tasks:remove-queue-entries tdb task-ids) - (sqlite3:execute tdb (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");"))) +(define (tasks:remove-queue-entries mdb task-ids) + (sqlite3:execute mdb (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");"))) ;; -(define (tasks:start-monitor db tdb) - (if (> (tasks:get-num-alive-monitors tdb) 2) ;; have two running, no need for more +(define (tasks:start-monitor db mdb) + (if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more (debug:print-info 1 "Not starting monitor, already have more than two running") (let* ((megatestdb (conc *toppath* "/megatest.db")) (monitordbf (conc *toppath* "/monitor.db")) (last-db-update 0)) ;; (file-modification-time megatestdb))) - (task:register-monitor tdb) + (task:register-monitor mdb) (let loop ((count 0) (next-touch 0)) ;; next-touch is the time where we need to update last_update ;; if the db has been modified we'd best look at the task queue (let ((modtime (file-modification-time megatestdbpath ))) (if (> modtime last-db-update) - (tasks:process-queue db tdb last-db-update megatestdb next-touch)) + (tasks:process-queue db mdb last-db-update megatestdb next-touch)) ;; WARNING: Possible race conditon here!! ;; should this update be immediately after the task-get-action call above? (if (> (current-seconds) next-touch) (begin - (tasks:monitors-update tdb) + (tasks:monitors-update mdb) (loop (+ count 1)(+ (current-seconds) 240))) (loop (+ count 1) next-touch))))))) -(define (tasks:process-queue db tdb) - (let* ((task (tasks:snag-a-task tdb)) +(define (tasks:process-queue db mdb) + (let* ((task (tasks:snag-a-task mdb)) (action (if task (tasks:task-get-action task) #f))) (if action (print "tasks:process-queue task: " task)) (if action (case (string->symbol action) - ((run) (tasks:start-run db tdb task)) - ((remove) (tasks:remove-runs db tdb task)) - ((lock) (tasks:lock-runs db tdb task)) + ((run) (tasks:start-run db mdb task)) + ((remove) (tasks:remove-runs db mdb task)) + ((lock) (tasks:lock-runs db mdb task)) ;; ((monitor) (tasks:start-monitor db task)) - ((rollup) (tasks:rollup-runs db tdb task)) - ((updatemeta)(tasks:update-meta db tdb task)) - ((kill) (tasks:kill-monitors db tdb task)))))) + ((rollup) (tasks:rollup-runs db mdb task)) + ((updatemeta)(tasks:update-meta db mdb task)) + ((kill) (tasks:kill-monitors db mdb task)))))) -(define (tasks:get-monitors tdb) +(define (tasks:get-monitors mdb) (let ((res '())) (sqlite3:for-each-row (lambda (a . rem) (set! res (cons (apply vector a rem) res))) - tdb + mdb "SELECT id,pid,strftime('%m/%d/%Y %H:%M',datetime(start_time,'unixepoch'),'localtime'),strftime('%m/%d/%Y %H:%M:%S',datetime(last_update,'unixepoch'),'localtime'),hostname,username FROM monitors ORDER BY last_update ASC;") (reverse res) )) (define (tasks:tasks->text tasks) @@ -253,31 +361,31 @@ monitors) "\n")))) ;; update the last_update field with the current time and ;; if any monitors appear dead, remove them -(define (tasks:monitors-update tdb) - (sqlite3:execute tdb "UPDATE monitors SET last_update=strftime('%s','now') WHERE pid=? AND hostname=?;" +(define (tasks:monitors-update mdb) + (sqlite3:execute mdb "UPDATE monitors SET last_update=strftime('%s','now') WHERE pid=? AND hostname=?;" (current-process-id) (get-host-name)) (let ((deadlist '())) (sqlite3:for-each-row (lambda (id pid host last-update delta) (print "Going to delete stale record for monitor with pid " pid " on host " host " last updated " delta " seconds ago") (set! deadlist (cons id deadlist))) - tdb + mdb "SELECT id,pid,hostname,last_update,strftime('%s','now')-last_update AS delta FROM monitors WHERE delta > 700;") - (sqlite3:execute tdb (conc "DELETE FROM monitors WHERE id IN ('" (string-intersperse (map conc deadlist) "','") "');"))) + (sqlite3:execute mdb (conc "DELETE FROM monitors WHERE id IN ('" (string-intersperse (map conc deadlist) "','") "');"))) ) -(define (tasks:remove-monitor-record tdb) - (sqlite3:execute tdb "DELETE FROM monitors WHERE pid=? AND hostname=?;" +(define (tasks:remove-monitor-record mdb) + (sqlite3:execute mdb "DELETE FROM monitors WHERE pid=? AND hostname=?;" (current-process-id) (get-host-name))) -(define (tasks:set-state tdb task-id state) - (sqlite3:execute tdb "UPDATE tasks_queue SET state=? WHERE id=?;" +(define (tasks:set-state mdb task-id state) + (sqlite3:execute mdb "UPDATE tasks_queue SET state=? WHERE id=?;" state task-id)) ;;====================================================================== ;; The routines to process tasks @@ -284,11 +392,11 @@ ;;====================================================================== ;; NOTE: It might be good to add one more layer of checking to ensure ;; that no task gets run in parallel. -(define (tasks:start-run db tdb task) +(define (tasks:start-run db mdb task) (let ((flags (make-hash-table))) (hash-table-set! flags "-rerun" "NOT_STARTED") (if (not (string=? (tasks:task-get-params task) "")) (hash-table-set! flags "-setvars" (tasks:task-get-params task))) (print "Starting run " task) @@ -298,13 +406,13 @@ (tasks:task-get-name task) (tasks:task-get-test task) (tasks:task-get-item task) (tasks:task-get-owner task) flags) - (tasks:set-state tdb (tasks:task-get-id task) "waiting"))) + (tasks:set-state mdb (tasks:task-get-id task) "waiting"))) -(define (tasks:rollup-runs db tdb task) +(define (tasks:rollup-runs db mdb task) (let* ((flags (make-hash-table)) (keys (db:get-keys db)) (keyvallst (keys:target->keyval keys (tasks:task-get-target task)))) ;; (hash-table-set! flags "-rerun" "NOT_STARTED") (print "Starting rollup " task) @@ -312,6 +420,6 @@ (runs:rollup-run db keys keyvallst (tasks:task-get-name task) (tasks:task-get-owner task)) - (tasks:set-state tdb (tasks:task-get-id task) "waiting"))) + (tasks:set-state mdb (tasks:task-get-id task) "waiting"))) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -108,11 +108,11 @@ #f)) ;; get the previous record for when this test was run where all keys match but runname ;; returns #f if no such test found, returns a single test record if found (define (test:get-previous-test-run-record db run-id test-name item-path) - (let* ((keys (db:get-keys db)) + (let* ((keys (cdb:remote-run db:get-keys #f)) (selstr (string-intersperse (map (lambda (x)(vector-ref x 0)) keys) ",")) (qrystr (string-intersperse (map (lambda (x)(conc (vector-ref x 0) "=?")) keys) " AND ")) (keyvals #f)) ;; first look up the key values from the run selected by run-id (sqlite3:for-each-row @@ -132,11 +132,11 @@ ;; if found then return that matching test record (debug:print 4 "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids) (if (null? prev-run-ids) #f (let loop ((hed (car prev-run-ids)) (tal (cdr prev-run-ids))) - (let ((results (db:get-tests-for-run db hed (conc test-name "/" item-path)'() '()))) + (let ((results (cdb:remote-run db:get-tests-for-run #f hed (conc test-name "/" item-path)'() '()))) (debug:print 4 "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results) (if (and (null? results) (not (null? tal))) (loop (car tal)(cdr tal)) (if (null? results) #f @@ -144,11 +144,11 @@ ;; get the previous records for when these tests were run where all keys match but runname ;; NB// Merge this with test:get-previous-test-run-records? This one looks for all matching tests ;; can use wildcards. Also can likely be factored in with get test paths? (define (test:get-matching-previous-test-run-records db run-id test-name item-path) - (let* ((keys (db:get-keys db)) + (let* ((keys (cdb:remote-run db:get-keys #f)) (selstr (string-intersperse (map (lambda (x)(vector-ref x 0)) keys) ",")) (qrystr (string-intersperse (map (lambda (x)(conc (vector-ref x 0) "=?")) keys) " AND ")) (keyvals #f) (tests-hash (make-hash-table))) ;; first look up the key values from the run selected by run-id @@ -170,11 +170,11 @@ (debug:print 4 "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids) (if (null? prev-run-ids) '() ;; no previous runs? return null (let loop ((hed (car prev-run-ids)) (tal (cdr prev-run-ids))) - (let ((results (db:get-tests-for-run db hed (conc test-name "/" item-path) '() '()))) + (let ((results (cdb:remote-run db:get-tests-for-run #f hed (conc test-name "/" item-path) '() '()))) (debug:print 4 "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path " results: " (intersperse results "\n")) ;; Keep only the youngest of any test/item combination (for-each (lambda (testdat) @@ -194,11 +194,11 @@ (define (tests:test-set-status! test-id state status comment dat) (debug:print-info 4 "tests:test-set-status! test-id=" test-id ", state=" state ", status=" status ", dat=" dat) (let* ((db #f) (real-status status) (otherdat (if dat dat (make-hash-table))) - (testdat (open-run-close db:get-test-info-by-id db test-id)) + (testdat (cdb:get-test-info-by-id *runremote* test-id)) (run-id (db:test-get-run_id testdat)) (test-name (db:test-get-testname testdat)) (item-path (db:test-get-item-path testdat)) ;; before proceeding we must find out if the previous test (where all keys matched except runname) ;; was WAIVED if this test is FAIL @@ -257,48 +257,45 @@ expected "," tol "," units "," dcomment ",," ;; extra comma for status type ))) - (open-run-close db:csv->test-data db test-id + (cdb:remote-run db:csv->test-data #f test-id dat)))) ;; need to update the top test record if PASS or FAIL and this is a subtest - (open-run-close db:roll-up-pass-fail-counts db run-id test-name item-path status) + (cdb:remote-run db:roll-up-pass-fail-counts #f run-id test-name item-path status) (if (or (and (string? comment) (string-match (regexp "\\S+") comment)) waived) (let ((cmt (if waived waived comment))) - (open-run-close db:test-set-comment db test-id cmt))) + (cdb:remote-run db:test-set-comment #f test-id cmt))) )) + (define (tests:test-set-toplog! db run-id test-name logf) - (sqlite3:execute db "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';" - logf run-id test-name)) + (cdb:client-call *runremote* 'tests:test-set-toplog #t logf run-id test-name)) (define (tests:summarize-items db run-id test-name force) ;; if not force then only update the record if one of these is true: ;; 1. logf is "log/final.log ;; 2. logf is same as outputfilename - (let ((outputfilename (conc "megatest-rollup-" test-name ".html")) - (orig-dir (current-directory)) - (logf #f)) + (let* ((outputfilename (conc "megatest-rollup-" test-name ".html")) + (orig-dir (current-directory)) + (logf-info (cdb:remote-run db:test-get-logfile-info #f run-id test-name)) + (logf (if logf-info (cadr logf-info) #f)) + (path (if logf-info (car logf-info) #f))) ;; This query finds the path and changes the directory to it for the test - (sqlite3:for-each-row - (lambda (path final_logf) - (set! logf final_logf) - (if (directory? path) - (begin - (print "Found path: " path) - (change-directory path)) - ;; (set! outputfilename (conc path "/" outputfilename))) - (print "No such path: " path))) - db - "SELECT rundir,final_logf FROM tests WHERE run_id=? AND testname=? AND item_path='';" - run-id test-name) - (print "summarize-items with logf " logf) + (set! logf (car logf-info)) + (if (directory? path) + (begin + (print "Found path: " path) + (change-directory path)) + ;; (set! outputfilename (conc path "/" outputfilename))) + (print "No such path: " path)) + (debug:print 1 "summarize-items with logf " logf) (if (or (equal? logf "logs/final.log") (equal? logf outputfilename) force) (begin (if (obtain-dot-lock outputfilename 1 20 30) ;; retry every second for 20 seconds, call it dead after 30 seconds and steal the lock @@ -306,33 +303,38 @@ (print "Failed to obtain lock for " outputfilename)) (let ((oup (open-output-file outputfilename)) (counts (make-hash-table)) (statecounts (make-hash-table)) (outtxt "") - (tot 0)) + (tot 0) + (testdat (cdb:remote-run db:test-get-records-for-index-file run-id test-name))) (with-output-to-port oup (lambda () (set! outtxt (conc outtxt "Summary: " test-name "

Summary for " test-name "

")) - (sqlite3:for-each-row - (lambda (id itempath state status run_duration logf comment) - (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0))) - (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0))) - (set! outtxt (conc outtxt "" - " " itempath "" - "" state "" - "" status "" - "" (if (equal? comment "") - " " - comment) "" - ""))) - db - "SELECT id,item_path,state,status,run_duration,final_logf,comment FROM tests WHERE run_id=? AND testname=? AND item_path != '';" - run-id test-name) - + (for-each + (lambda (testrecord) + (let ((id (vector-ref testrecord 0)) + (itempath (vector-ref testrecord 1)) + (state (vector-ref testrecord 2)) + (status (vector-ref testrecord 3)) + (run_duration (vector-ref testrecord 4)) + (logf (vector-ref testrecord 5)) + (comment (vector-ref testrecord 6))) + (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0))) + (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0))) + (set! outtxt (conc outtxt "" + " " itempath "" + "" state "" + "" status "" + "" (if (equal? comment "") + " " + comment) "" + "")))) + testdat) (print "
") ;; Print out stats for status (set! tot 0) (print "") (for-each (lambda (state) @@ -356,10 +358,11 @@ "" outtxt "

State stats

ItemStateStatusComment
") (release-dot-lock outputfilename))) (close-output-port oup) (change-directory orig-dir) + ;; NB// tests:test-set-toplog! is remote internal... (tests:test-set-toplog! db run-id test-name outputfilename) ))))) (define (get-all-legal-tests) (let* ((tests (glob (conc *toppath* "/tests/*"))) @@ -423,22 +426,22 @@ #t ;; if a is a higher priority than b then we are good to go #f)))))))) ;; for each test: ;; -(define (tests:filter-non-runnable db run-id testkeynames testrecordshash) +(define (tests:filter-non-runnable run-id testkeynames testrecordshash) (let ((runnables '())) (for-each (lambda (testkeyname) (let* ((test-record (hash-table-ref testrecordshash testkeyname)) (test-name (tests:testqueue-get-testname test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) (item-path (tests:testqueue-get-item_path test-record)) (waitons (tests:testqueue-get-waitons test-record)) (keep-test #t) - (test-id (db:get-test-id db run-id test-name item-path)) - (tdat (db:get-test-info-by-id db test-id))) + (test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path)) + (tdat (cdb:get-test-info-by-id *runremote* test-id))) (if tdat (begin ;; Look at the test state and status (if (or (member (db:test-get-status tdat) '("PASS" "WARN" "WAIVED" "CHECK")) @@ -449,12 +452,12 @@ ;; examine waitons for any fails. If it is FAIL or INCOMPLETE then eliminate this test ;; from the runnable list (if keep-test (for-each (lambda (waiton) ;; for now we are waiting only on the parent test - (let* ((parent-test-id (db:get-test-id db run-id waiton "")) - (wtdat (db:get-test-info-by-id db test-id))) + (let* ((parent-test-id (cdb:remote-run db:get-test-id #f run-id waiton "")) + (wtdat (cdb:get-test-info-by-id *runremote* test-id))) (if (or (member (db:test-get-status wtdat) '("FAIL" "KILLED")) (member (db:test-get-state wtdat) '("INCOMPETE"))) (set! keep-test #f)))) ;; no point in running this one again @@ -467,13 +470,13 @@ ;; test steps ;;====================================================================== ;; teststep-set-status! used to be here -(define (test-get-kill-request db test-id) ;; run-id test-name itemdat) +(define (test-get-kill-request test-id) ;; run-id test-name itemdat) (let* (;; (item-path (item-list->path itemdat)) - (testdat (db:get-test-info-by-id db test-id))) ;; run-id test-name item-path))) + (testdat (cdb:get-test-info-by-id *runremote* test-id))) ;; run-id test-name item-path))) (equal? (test:get-state testdat) "KILLREQ"))) (define (test:tdb-get-rundat-count tdb) (if tdb (let ((res 0)) Index: tests/Makefile ================================================================== --- tests/Makefile +++ tests/Makefile @@ -18,59 +18,58 @@ NEWTARGET = "-target $(OS)/$(FS)/$(VER)" TARGET = "-target ubuntu/nfs/none" all : test1 test2 test3 test4 test5 +test0 : cleanprep + cd simplerun ; $(MEGATEST) -server - -debug $(DEBUG)& + test1 : cleanprep rm -f simplerun/megatest.db rm -rf simplelinks/ simpleruns/ mkdir -p simplelinks simpleruns cd simplerun;cp ../../*_records.scm .;perl -pi.bak -e 's/define-inline/define/' *_records.scm cd simplerun;echo '(load "../tests.scm")' | $(MEGATEST) -repl -debug $(DEBUG) test2 : fullprep - cd fullrun;$(MEGATEST) -runtests ez_pass,runfirst/a/% -reqtarg ubuntu/nfs/none :runname $(RUNNAME) $(SERVER) -debug $(DEBUG) $(LOGGING) + cd fullrun;$(MEGATEST) -runtests ez_pass,runfirst/a/% -reqtarg ubuntu/nfs/none :runname $(RUNNAME) -debug $(DEBUG) $(LOGGING) cd fullrun;megatest -runall -target ubunut/nfs/none :runname $(RUNNAME)_01 -testpatt %/,%/ai -debug $(DEBUG) cd fullrun;megatest -runtests %/,%/ai -target ubunut/nfs/none :runname $(RUNAME)_02 -debug $(DEBUG) cd fullrun;megatest -runtests runfirst/%,%/ai -target ubunut/nfs/none :runname $(RUNNAME)_02 -debug $(DEBUG) cd fullrun;megatest -runtests %/,%/winter -target ubunut/nfs/none :runname $(RUNNAME)_03 -debug $(DEBUG) sleep 40;cd fullrun;megatest -target ubuntu/nfs/none :runname $(RUNNAME) -set-state-status COMPLETED,FORCED :state COMPLETED :status PASS -testpatt ez_p%s,runfirst/ -debug $(DEBUG) $(LOGGING) test3 : fullprep - cd fullrun;$(MEGATEST) -runtests runfirst -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b $(SERVER) -debug 10 + cd fullrun;$(MEGATEST) -runtests runfirst -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -debug 10 test4 : fullprep - cd fullrun;$(MEGATEST) $(SERVER) $(LOGGING) -debug $(DEBUG) & - cd fullrun;sleep 5;$(MEGATEST) -debug $(DEBUG) -runall -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING) + cd fullrun;$(MEGATEST) -debug $(DEBUG) -runall -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING) # NOTE: Only one instance can be a server test5 : fullprep - cd fullrun;$(MEGATEST) $(SERVER) $(LOGGING) & - cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log & + cd fullrun;sleep 0;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log & cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ab -debug $(DEBUG) $(LOGGING) > ab.log 2> ab.log & cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ac -debug $(DEBUG) $(LOGGING) > ac.log 2> ac.log & cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ad -debug $(DEBUG) $(LOGGING) > ad.log 2> ad.log & - # cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log & - # cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log & + cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log & + cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log & test6: fullprep cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %/1 -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -v cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %blahha% -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -debug 10 cd fullrun;$(MEGATEST) -rollup :runname newrun -target ubuntu/nfs/none -debug 10 cleanprep : ../*.scm Makefile */*.config - # if [ -e fullrun/megatest.db ]; then sqlite3 fullrun/megatest.db "delete from metadat where var='SERVER';";fi mkdir -p /tmp/mt_runs /tmp/mt_links cd ..;make install - rm -f fullrun/logging.db + rm -f */logging.db */monitor.db touch cleanprep fullprep : cleanprep - cd fullrun;$(MEGATEST) -server - -debug $(DEBUG) & - sleep 5;cd fullrun;$(MEGATEST) -remove-runs :runname $(RUNNAME)% -target %/%/% -testpatt %/% + cd fullrun;$(MEGATEST) -remove-runs :runname $(RUNNAME)% -target %/%/% -testpatt %/% cd fullrun;$(BINPATH)/dboard -rows 15 & dashboard : cleanprep cd fullrun && $(BINPATH)/dashboard -rows 25 & @@ -80,10 +79,13 @@ clean : rm cleanprep kill : killall -v mtest main.sh dboard || true - rm -f fullrun/megatest.db fullrun/logging.db || true - killall -v -9 mtest dboard || true + rm -f */megatest.db */logging.db */monitor.db || true + killall -v mtest dboard || true + +hardkill : kill + sleep 5;killall -v mtest main.sh dboard -9 runforever : while(ls); do runname=`date +%F-%R:%S`;(cd fullrun;$(MEGATEST) -runall -target ubuntu/nfs/none :runname $$runname;/home/matt/data/megatest/megatest -runall -target ubuntu/nfs/none :runname $$runname;/home/matt/data/megatest/megatest -runall -target ubuntu/nfs/none :runname $$runname);done Index: tests/fullrun/megatest.config ================================================================== --- tests/fullrun/megatest.config +++ tests/fullrun/megatest.config @@ -18,12 +18,12 @@ # Max retries allows megatest to re-check that a tests status has changed # as tests can have transient FAIL status occasionally maxretries 20 [validvalues] -state start end -status pass fail n/a 0 1 running +state start end 0 1 - 2 +status pass fail n/a 0 1 running - 2 # These are set before all tests, override them # in the testconfig [pre-launch-env-overrides] section [env-override] SPECIAL_ENV_VARS overide them here - should be seen at launch and in the runs Index: tests/tests.scm ================================================================== --- tests/tests.scm +++ tests/tests.scm @@ -1,7 +1,22 @@ +;; Copyright 2006-2012, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. + +;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') + (require-extension test) (require-extension regex) +(require-extension srfi-18) +(import srfi-18) +(require-extension zmq) +(import zmq) (define test-work-dir (current-directory)) ;; read in all the _record files (let ((files (glob "*_records.scm"))) @@ -8,10 +23,12 @@ (for-each (lambda (file) (print "Loading " file) (load file)) files)) + +(define *runremote* #f) ;;====================================================================== ;; P R O C E S S E S ;;====================================================================== @@ -54,11 +71,45 @@ (test #f "(testname GLOB 'a' AND item_path GLOB 'b') OR (testname LIKE 'a%' AND item_path LIKE '%') OR (testname GLOB '' AND item_path LIKE 'b%')" (tests:match->sqlqry "a/b,a%,/b%")) (test #f "(testname GLOB 'a' AND item_path GLOB 'b') OR (testname LIKE 'a%' AND item_path LIKE '%') OR (testname LIKE '%' AND item_path LIKE 'b%')" (tests:match->sqlqry "a/b,a%,%/b%")) -;; (exit) +;;====================================================================== +;; S E R V E R +;;====================================================================== + +(test "setup for run" #t (begin (setup-for-run) + (string? (getenv "MT_RUN_AREA_HOME")))) + +(test "server-register, get-best-server" '("bob" 1234) (let ((res #f)) + (open-run-close tasks:server-register tasks:open-db 1 "bob" 1234 100 'live) + (set! res (open-run-close tasks:get-best-server tasks:open-db)) + res)) +(test "de-register server" #t (let ((res #f)) + (open-run-close tasks:server-deregister tasks:open-db "bob" port: 1234) + (list? (open-run-close tasks:get-best-server tasks:open-db)))) + +(define hostinfo #f) +(test #f #t (let ((dat (open-run-close tasks:get-best-server tasks:open-db))) + (set! hostinfo dat) + (and (string? (car dat)) + (number? (cadr dat))))) + +(test #f #t (let ((zmq-socket (apply server:client-connect hostinfo))) + (set! *runremote* zmq-socket) + (socket? *runremote*))) + +(test #f #t (let ((res (server:client-login *runremote*))) + (car res))) + +(test #f #t (socket? *runremote*)) + +;; (test #f #t (server:client-setup)) + +(test #f #t (car (cdb:login *runremote* *toppath* *my-client-signature*))) + +(test #f #t (open-run-close tasks:get-best-server tasks:open-db)) ;;====================================================================== ;; C O N F I G F I L E S ;;====================================================================== @@ -78,12 +129,10 @@ (define header (list "col1" "col2" "col3" "col4")) (test "Get row by header" "blah" (db:get-value-by-header row header "col4")) ;; (define *toppath* "tests") (define *db* #f) -(test "setup for run" #t (begin (setup-for-run) - (string? (getenv "MT_RUN_AREA_HOME")))) (test "open-db" #t (begin (set! *db* (open-db)) (if *db* #t #f))) ;; quit wasting time, I'm changing *db* to db @@ -107,24 +156,21 @@ (test "write env files" "nada.csh" (begin (save-environment-as-files "nada") (and (file-exists? "nada.sh") (file-exists? "nada.csh")))) + +(test #f #t (cdb:client-call *runremote* 'immediate #f (lambda ()(display "Got here eh!?") #t))) + +;; (set! *verbosity* 20) +(test #f *verbosity* (cdb:set-verbosity *runremote* *verbosity*)) +(test #f #f (cdb:roll-up-pass-fail-counts *runremote* 1 "test1" "" "PASS")) +;; (set! *verbosity* 1) +;; (cdb:set-verbosity *runremote* *verbosity*) (test "get all legal tests" (list "test1" "test2") (sort (get-all-legal-tests) string<=?)) -(test "register-test, test info" "NOT_STARTED" - (begin - (cdb:tests-register-test *remoterun* 1 "nada" "") - ;; (rdb:flush-queue) - (vector-ref (db:get-test-info *db* 1 "nada" "") 3))) - -(test #f "NOT_STARTED" - (begin - (rdb:tests-register-test #f 1 "nada" "") - ;; (rdb:flush-queue) - (vector-ref (open-run-close db:get-test-info #f 1 "nada" "") 3))) (test "get-keys" "SYSTEM" (vector-ref (car (db:get-keys *db*)) 0));; (key:get-fieldname (car (sort (db-get-keys *db*)(lambda (a b)(string>=? (vector-ref a 0)(vector-ref b 0))))))) (define remargs (args:get-args '("bar" "foo" ":runname" "bob" ":SYSTEM" "ubuntu" ":RELEASE" "v1.2" ":datapath" "blah/foo" "nada") @@ -138,10 +184,16 @@ '(("SYSTEM" "key1")("RELEASE" "key2")) "myrun" "new" "n/a" "bob"))) + +(test #f "CACHED" (cdb:tests-register-test *runremote* 1 "nada" "")) +(test #f 1 (cdb:remote-run db:get-test-id #f 1 "nada" "")) +(test #f "NOT_STARTED" (vector-ref (open-run-close db:get-test-info #f 1 "nada" "") 3)) +(test #f "NOT_STARTED" (vector-ref (cdb:get-test-info *runremote* 1 "nada" "") 3)) + (define keys (db:get-keys *db*)) ;;====================================================================== ;; D B ;;====================================================================== @@ -233,46 +285,43 @@ (test "Add a step" #t (begin (db:teststep-set-status! db 2 "step1" "start" 0 "This is a comment" "mylogfile.html") (sleep 2) (db:teststep-set-status! db 2 "step1" "end" "pass" "This is a different comment" "finallogfile.html") - (set! test-id (db:test-get-id (car (db:get-tests-for-run db 1 "test1" '() '())))) + (set! test-id (db:test-get-id (car (cdb:remote-run db:get-tests-for-run #f 1 "test1" '() '())))) (number? test-id))) -(test "Get rundir" #t (let ((rundir (db:test-get-rundir-from-test-id db test-id))) - (print "Rundir" rundir) +(test "Get rundir" #t (let ((rundir (cdb:remote-run db:test-get-rundir-from-test-id #f test-id))) + (print "Rundir " rundir) + (system (conc "mkdir -p " rundir)) (string? rundir))) -(test "Create a test db" "../simpleruns/key1/key2/myrun/test1/testdat.db" (let ((tdb (db:open-test-db-by-test-id db test-id))) - (sqlite3#finalize! tdb) - (file-exists? "../simpleruns/key1/key2/myrun/test1/testdat.db"))) -(test "Get steps for test" #t (> (length (db:get-steps-for-test db test-id)) 0)) +(test #f #t (sqlite3#database? (open-test-db "./"))) +(test "Create a test db" "../simpleruns/key1/key2/myrun/test1/testdat.db" + (let ((tdb (open-run-close db:open-test-db-by-test-id db test-id))) + (if tdb (sqlite3#finalize! tdb)) + (file-exists? "../simpleruns/key1/key2/myrun/test1/testdat.db"))) + +(test "Get steps for test" #t (let ((steps (cdb:remote-run db:get-steps-for-test #f test-id))) + (print steps) + (> (length steps) 0))) (test "Get nice table for steps" "2.0s" (begin - (vector-ref (hash-table-ref (db:get-steps-table db test-id) "step1") 4))) + (vector-ref (hash-table-ref (open-run-close db:get-steps-table #f test-id) "step1") 4))) ;; (exit) ;;====================================================================== ;; R E M O T E C A L L S ;;====================================================================== -;; start a server process -(set! *verbosity* 10) -;; (define server-pid (process-run "../../bin/megatest" (list "-server" "-" "-debug" (conc *verbosity*)))) -;; (sleep 2) - -(define th1 (make-thread server:launch)) -(thread-start! th1) - (define start-wait (current-seconds)) -(server:client-setup) (print "Starting intensive cache and rpc test") (for-each (lambda (params) - ;;; (rdb:tests-register-test #f 1 (conc "test" (random 20)) "") - (apply cdb:test-set-status-state *remoterun* test-id params) - (rdb:pass-fail-counts test-id (random 100) (random 100)) - (rdb:test-rollup-test_data-pass-fail test-id) + (cdb:tests-register-test *runremote* 1 (conc "test" (random 20)) "") + (apply cdb:test-set-status-state *runremote* test-id params) + (cdb:pass-fail-counts *runremote* test-id (random 100) (random 100)) + (cdb:test-rollup-test_data-pass-fail *runremote* test-id) (thread-sleep! 0.01)) ;; cache ordering granularity is at the second level. Should really be at the ms level '(("COMPLETED" "PASS" #f) ("NOT_STARTED" "FAIL" "Just testing") ("NOT_STARTED" "FAIL" "Just testing") ("NOT_STARTED" "FAIL" "Just testing") @@ -311,20 +360,21 @@ ("NOT_STARTED" "FAIL" "Just testing") ("KILLED" "UNKNOWN" "More testing") ("KILLED" "UNKNOWN" "More testing") )) ;; now set all tests to completed -(rdb:flush-queue) -(let ((tests (open-run-close db:get-tests-for-run #f 1 "%" '() '()))) +(cdb:flush-queue *runremote*) +(let ((tests (cdb:remote-run db:get-tests-for-run #f 1 "%" '() '()))) (print "Setting " (length tests) " to COMPLETED/PASS") (for-each (lambda (test) - (rdb:test-set-status-state (db:test-get-id test) "COMPLETED" "PASS" "Forced pass")) + (cdb:test-set-status-state *runremote* (db:test-get-id test) "COMPLETED" "PASS" "Forced pass")) tests)) (print "Waiting for server to be done, should be about 20 seconds") -(process-wait server-pid) +(cdb:kill-server *runremote*) +;; (process-wait server-pid) (test "Server wait time" #t (let ((run-delta (- (current-seconds) start-wait))) (print "Server ran for " run-delta " seconds") (> run-delta 20))) (test "Rollup the run(s)" #t (begin @@ -332,8 +382,10 @@ #t)) (hash-table-set! args:arg-hash ":runname" "%") (test "Remove the rollup run" #t (begin (operate-on 'remove-runs))) + +(thread-join! th1 th2 th3) ;; ADD ME!!!! (db:get-prereqs-not-met *db* 1 '("runfirst") "" mode: 'normal) ;; ADD ME!!!! (rdb:get-tests-for-run *db* 1 "runfirst" #f '() '()) Index: testzmq/hwclient.scm ================================================================== --- testzmq/hwclient.scm +++ testzmq/hwclient.scm @@ -1,6 +1,6 @@ -(use zmq posix) +(use zmq posix srfi-18) (define s (make-socket 'req)) (connect-socket s "tcp://*:5563") (define myname (cadr (argv))) Index: testzmq/hwserver.scm ================================================================== --- testzmq/hwserver.scm +++ testzmq/hwserver.scm @@ -1,15 +1,28 @@ (use zmq srfi-18 posix) -(define s (make-socket 'rep)) -(bind-socket s "tcp://*:5563") - -(print "Start server...") -(let loop () - (let* ((msg (receive-message s)) - (name (caddr (string-split msg " "))) - (resp (conc "World " name))) - (print "Received request: [" msg "]") - (thread-sleep! 0.0001) - (print "Sending response \"" resp "\"") - (send-message s resp) - (loop))) +(define th1 (make-thread + (lambda () + (let ((s (make-socket 'rep))) + (bind-socket s "tcp://*:5563") + (print "Start server...") + (let loop () + (let* ((msg (receive-message s)) + (name (caddr (string-split msg " "))) + (resp (conc "World " name))) + (print "Received request: [" msg "]") + (thread-sleep! 0.0001) + (print "Sending response \"" resp "\"") + (send-message s resp) + (loop))))))) +(define th2 (make-thread + (lambda () + (let loop ((count 0)) + (print "count is " count) + (thread-sleep! 0.1) + (if (< count 10000) + (loop (+ count 1))))))) + +(thread-start! th1) +(thread-start! th2) + +(thread-join! th1) Index: testzmq/hwtest.sh ================================================================== --- testzmq/hwtest.sh +++ testzmq/hwtest.sh @@ -2,11 +2,12 @@ echo Compiling hwclient and hwserver csc hwclient.scm csc hwserver.scm -./hwserver & +./hwserver > hwserver.log & + sleep 1 for x in a b c d e f g h i j k l m n o p q r s t u v w x y z;do ./hwclient $x & done Index: utils/installall.sh ================================================================== --- utils/installall.sh +++ utils/installall.sh @@ -79,11 +79,11 @@ make PLATFORM=linux PREFIX=$PREFIX install cd $BUILDHOME fi # Some eggs are quoted since they are reserved to Bash -for f in matchable readline apropos base64 regex-literals format "regex-case" "test" coops trace csv dot-locking posix-utils posix-extras directory-utils hostinfo tcp rpc csv-xml fmt json ; do +(??)for f in matchable readline apropos base64 regex-literals format "regex-case" "test" coops trace csv dot-locking posix-utils posix-extras directory-utils hostinfo tcp rpc csv-xml fmt json md5; do if ! [[ -e $PREFIX/lib/chicken/6/$f.so ]];then chicken-install $PROX $f # chicken-install -deploy -prefix $DEPLOYTARG $PROX $f else echo Skipping install of egg $f as it is already installed