Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -72,11 +72,11 @@ tests.o runs.o dashboard.o dashboard-tests.o dashboard-main.o : run_records.scm db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm tests.o tasks.o dashboard-tasks.o : task_records.scm runs.o : test_records.scm megatest.o : megatest-fossil-hash.scm -client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm fs-transport.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm : common_records.scm rpc-transport.scm +rmt.scm client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm fs-transport.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm : common_records.scm rpc-transport.scm common_records.scm : altdb.scm vg.o dashboard.o : vg_records.scm dcommon.o : run_records.scm # Temporary while transitioning to new routine # runs.o : run-tests-queue-classic.scm run-tests-queue-new.scm Index: NOTES ================================================================== --- NOTES +++ NOTES @@ -1,13 +1,19 @@ +===================================================================== +NOTES from looking at branch v1.62-rpc +===================================================================== + +*last-db-access* or *db-last-access* ==> which is it to be? +seen in singletest: ERROR: Unrecognised arguments: :first_err This is the first error ====================================================================== New way of launching needed to accomodate different target hosttypes for items ====================================================================== [flavors] -general ssh #{getbgesthost general} +general ssh #{getbesthost general} nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo [hosts] general cubian xena Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -17,10 +17,11 @@ ;; allow these queries through without starting a server ;; (define api:read-only-queries '(get-key-val-pairs get-keys + get-key-vals test-toplevel-num-items get-test-info-by-id test-get-rundir-from-test-id get-count-tests-running-for-testname get-count-tests-running @@ -34,11 +35,14 @@ test-get-paths-matching-keynames-target-new get-prereqs-not-met get-count-tests-running-for-run-id get-run-info get-run-status - register-run + get-run-stats + get-targets + get-target + ;; register-run get-tests-for-run get-test-id get-tests-for-runs-mindata get-run-name-from-id get-runs @@ -49,10 +53,11 @@ get-runs-by-patt get-steps-data get-steps-for-test read-test-data login + tasks-get-last testmeta-get-record have-incompletes? synchash-get )) Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -46,17 +46,23 @@ (define (client:connect iface port) (case (server:get-transport) ((rpc) (rpc:client-connect iface port)) ((http) (http:client-connect iface port)) ((zmq) (zmq:client-connect iface port)) - (else (rpc:client-connect iface port)))) + (else + (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport *runremote*) " not supported (5)") + (exit)))) (define (client:setup run-id #!key (remaining-tries 10) (failed-connects 0)) (case (server:get-transport) - ((rpc) (rpc-transport:client-setup run-id remaining-tries: remaining-tries failed-connects: failed-connects)) ;;(client:setup-rpc run-id)) + ((rpc) (let ((res (client:setup-rpc run-id remaining-tries: remaining-tries))) + (remote-conndat-set! *runremote* res) + res)) ((http)(client:setup-http run-id remaining-tries: remaining-tries failed-connects: failed-connects)) - (else (rpc-transport:client-setup run-id remaining-tries: remaining-tries failed-connects: failed-connects)))) ;; (client:setup-rpc run-id)))) + (else + (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport *runremote*) " not supported (6)") + (exit)))) ;; (client:setup-rpc run-id)))) ;; (define (client:login-no-auto-setup server-info run-id) ;; (case (server:get-transport) ;; ((rpc) (rpc:login-no-auto-client-setup server-info run-id)) ;; ((http) (rmt:login-no-auto-client-setup server-info run-id)) @@ -152,10 +158,30 @@ ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; + +(define (client:setup-rpc run-id #!key (remaining-tries 10) (failed-connects 0)) + (debug:print-info 2 *default-log-port* "client:setup-rpc remaining-tries=" remaining-tries) + (let* ((server-dat (tasks:get-server (db:delay-if-busy (tasks:open-db)) run-id)) + (num-available (tasks:num-in-available-state (db:delay-if-busy (tasks:open-db)) run-id))) + (cond + ((<= remaining-tries 0) + (debug:print-error 0 *default-log-port* "failed to start or connect to server for run-id " run-id) + (exit 1)) + (server-dat + (debug:print-info 4 *default-log-port* "client:setup-rpc server-dat=" server-dat ", remaining-tries=" remaining-tries) + + (rpc-transport:client-setup run-id server-dat remaining-tries: remaining-tries)) + (else + (if (< num-available 2) + (server:try-running run-id)) + (thread-sleep! (+ 2 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. + (client:setup run-id remaining-tries: (- remaining-tries 1)))))) + + (define (client:setup-http run-id #!key (remaining-tries 10) (failed-connects 0)) (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries) (let* ((tdbdat (tasks:open-db))) (if (<= remaining-tries 0) (begin @@ -165,33 +191,23 @@ (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) (if server-dat (let* ((iface (tasks:hostinfo-get-interface server-dat)) (hostname (tasks:hostinfo-get-hostname server-dat)) (port (tasks:hostinfo-get-port server-dat)) - (start-res (case *transport-type* - ((http)(http-transport:client-connect iface port)) - ;;((nmsg)(nmsg-transport:client-connect hostname port)) - )) - (ping-res (case *transport-type* - ((http)(rmt:login-no-auto-client-setup start-res run-id)) - ;; ((nmsg)(let ((logininfo (rmt:login-no-auto-client-setup start-res run-id))) - ;; (if logininfo - ;; (car (vector-ref logininfo 1)) - ;; #f))) - - ))) + (start-res (http-transport:client-connect iface port)) + (ping-res (rmt:login-no-auto-client-setup start-res))) (if (and start-res ping-res) (begin - (hash-table-set! *runremote* run-id start-res) + (remote-conndat-set! *runremote* start-res) ;; (hash-table-set! *runremote* run-id start-res) (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res)) start-res) (begin ;; login failed but have a server record, clean out the record and try again (debug:print-info 0 *default-log-port* "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) (case *transport-type* ((http)(http-transport:close-connections run-id))) - (hash-table-delete! *runremote* run-id) + (remote-conndat-set! *runremote* #f) ;; (hash-table-delete! *runremote* run-id) (tasks:kill-server-run-id run-id) (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat) @@ -209,11 +225,11 @@ (if (< num-available 2) (server:try-running run-id)) (thread-sleep! (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. (client:setup run-id remaining-tries: (- remaining-tries 1))))))))) -;; keep this as a function to ease future +;; keep this as a function to ease future ;; this is unused, not porting for rpc -BB (define (client:start run-id server-info) (http-transport:client-connect (tasks:hostinfo-get-interface server-info) (tasks:hostinfo-get-port server-info))) ;; ;; client:signal-handler Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -7,11 +7,11 @@ ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. ;;====================================================================== -(use srfi-1 posix regex-case base64 format dot-locking csv-xml z3 sql-de-lite hostinfo typed-records) +(use srfi-1 posix regex-case base64 format dot-locking csv-xml z3 sql-de-lite hostinfo md5 message-digest typed-records directory-utils) (require-extension regex posix) (require-extension (srfi 18) extras tcp rpc) (import (prefix sqlite3 sqlite3:)) @@ -18,10 +18,12 @@ (import (prefix base64 base64:)) (declare (unit common)) (include "common_records.scm") +(include "thunk-utils.scm") + ;; (require-library margs) ;; (include "margs.scm") ;; (define old-exit exit) @@ -43,16 +45,16 @@ (define home (getenv "HOME")) (define user (getenv "USER")) ;; GLOBAL GLETCHES -(define *contexts* (make-hash-table)) - -;; Common data structure for +;; CONTEXTS (defstruct cxt (taskdb #f) (cmutex (make-mutex))) +(define *contexts* (make-hash-table)) +(define *context-mutex* (make-mutex)) ;; safe method for accessing a context given a toppath ;; (define (common:with-cxt toppath proc) (mutex-lock! *context-mutex*) @@ -60,13 +62,24 @@ (if (not cxt) (set! cxt (let ((x (make-cxt)))(hash-table-set! *contexts* toppath x) x))) (let ((cxt-mutex (cxt-mutex cxt))) (mutex-unlock! *context-mutex*) (mutex-lock! cxt-mutex) - (let ((res (proc cxt))) - (mutex-unlock! cxt-mutex) - res)))) + ;; here we guard proc with exception handler so + ;; no matter how proc succeeds or fails, + ;; the cxt-mutex will be unlocked afterward. + (let* ((EXCEPTION-SYMBOL (gensym)) ;; use a generated symbol + (guarded-proc ;; to avoid collision + (lambda args + (let* ((res (condition-case + (apply proc args) + [x () (cons EXCEPTION-SYMBOL x)]))) + (mutex-unlock! cxt-mutex) + (if (and (pair? res) (eq? (car res) EXCEPTION)) + (abort (cdr res)) + res))))) + (guarded-proc cxt))))) (define *db-keys* #f) (define *configinfo* #f) ;; raw results from setup, includes toppath and table from megatest.config (define *runconfigdat* #f) ;; run configs data @@ -73,64 +86,85 @@ (define *configdat* #f) ;; megatest.config data (define *configstatus* #f) ;; status of data; 'fulldata : all processing done, #f : no data yet, 'partialdata : partial read done (define *toppath* #f) (define *already-seen-runconfig-info* #f) -(define *waiting-queue* (make-hash-table)) (define *test-meta-updated* (make-hash-table)) (define *globalexitstatus* 0) ;; attempt to work around possible thread issues (define *passnum* 0) ;; when running track calls to run-tests or similar -(define *write-frequency* (make-hash-table)) ;; run-id => (vector (current-seconds) 0)) (define *alt-log-file* #f) ;; used by -log (define *common:denoise* (make-hash-table)) ;; for low noise printing (define *default-log-port* (current-error-port)) +(define *time-zero* (current-seconds)) ;; for the watchdog ;; DATABASE -(define *dbstruct-db* #f) +(define *dbstruct-db* #f) ;; used to cache the dbstruct in db:setup. Goal is to remove this. +;; db stats (define *db-stats* (make-hash-table)) ;; hash of vectors < count duration-total > (define *db-stats-mutex* (make-mutex)) -(define *db-sync-mutex* (make-mutex)) -(define *db-multi-sync-mutex* (make-mutex)) -(define *db-local-sync* (make-hash-table)) ;; used to record last touch of db -(define *megatest-db* #f) -(define *last-db-access* (current-seconds)) ;; update when db is accessed via server +;; db access +(define *db-last-access* (current-seconds)) ;; last db access, used in server (define *db-write-access* #t) -(define *inmemdb* #f) +;; db sync +(define *db-last-write* 0) ;; used to record last touch of db +(define *db-last-sync* 0) ;; last time the sync to megatest.db happened +(define *db-sync-in-progress* #f) ;; if there is a sync in progress do not try to start another +(define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync* and *db-last-write* +;; task db (define *task-db* #f) ;; (vector db path-to-db) (define *db-access-allowed* #t) ;; flag to allow access (define *db-access-mutex* (make-mutex)) +(define *db-cache-path* #f) ;; SERVER (define *my-client-signature* #f) -(define *transport-type* 'http) -(define *transport-type* 'http) ;; override with [server] transport http|rpc|nmsg -(define *runremote* (make-hash-table)) ;; if set up for server communication this will hold +(define *transport-type* #f) ;; override with [server] transport http|rpc|nmsg + +(define *DEFAULT-TRANSPORT* "http") +(define (common:set-transport-type) + (set! *transport-type* + (string->symbol + (or + (args:get-arg "-transport") + (configf:lookup *configdat* "server" "transport") + *DEFAULT-TRANSPORT*))) + *transport-type*) + +(define *runremote* #f) ;; if set up for server communication this will hold (define *max-cache-size* 0) (define *logged-in-clients* (make-hash-table)) -(define *client-non-blocking-mode* #f) (define *server-id* #f) (define *server-info* #f) (define *time-to-exit* #f) -(define *received-response* #f) -(define *default-numtries* 10) (define *server-run* #t) (define *run-id* #f) (define *server-kind-run* (make-hash-table)) +(define *home-host* #f) +(define *total-non-write-delay* 0) +(define *heartbeat-mutex* (make-mutex)) +;; client +(define *rmt-mutex* (make-mutex)) ;; remote access calls mutex + +;; RPC transport +(define *rpc:listener* #f) + +;; KEY info (define *target* (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN (define *keys* (make-hash-table)) ;; cache the keys here (define *keyvals* (make-hash-table)) (define *toptest-paths* (make-hash-table)) ;; cache toptest path settings here (define *test-paths* (make-hash-table)) ;; cache test-id to test run paths here (define *test-ids* (make-hash-table)) ;; cache run-id, testname, and item-path => test-id (define *test-info* (make-hash-table)) ;; cache the test info records, update the state, status, run_duration etc. from testdat.db (define *run-info-cache* (make-hash-table)) ;; run info is stable, no need to reget +(define *launch-setup-mutex* (make-mutex)) ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set +(define *homehost-mutex* (make-mutex)) -;; Awful. Please FIXME +;; cache environment vars for each run here (define *env-vars-by-run-id* (make-hash-table)) -(define *current-run-name* #f) ;; Testconfig and runconfig caches. (define *testconfigs* (make-hash-table)) ;; test-name => testconfig (define *runconfigs* (make-hash-table)) ;; target => runconfig @@ -188,46 +222,74 @@ (common:version-signature)))) ;; Move me elsewhere ... ;; RADT => Why do we meed the version check here, this is called only if version misma ;; -(define (common:cleanup-db) +(define (common:cleanup-db dbstruct) (db:multi-db-sync - #f ;; do all run-ids + dbstruct ;; 'new2old 'killservers 'dejunk ;; 'adj-testids ;; 'old2new - 'new2old) + 'new2old + 'schema) (if (common:version-changed?) (common:set-last-run-version))) + +;; Rotate logs, logic: +;; if > 500k and older than 1 week: +;; remove previous compressed log and compress this log +;; WARNING: This proc operates assuming that it is in the directory above the +;; logs directory you wish to log-rotate. +;; +(define (common:rotate-logs) + (if (not (directory-exists? "logs"))(create-directory "logs")) + (directory-fold + (lambda (file rem) + (if (and (string-match "^.*.log" file) + (> (file-size (conc "logs/" file)) 200000)) + (let ((gzfile (conc "logs/" file ".gz"))) + (if (file-exists? gzfile) + (begin + (debug:print-info 0 *default-log-port* "removing " gzfile) + (delete-file gzfile))) + (debug:print-info 0 *default-log-port* "compressing " file) + (system (conc "gzip logs/" file))))) + '() + "logs")) ;; Force a megatest cleanup-db if version is changed and skip-version-check not specified ;; (define (common:exit-on-version-changed) (if (common:version-changed?) - (let ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))) - (debug:print 0 *default-log-port* - "WARNING: Version mismatch!\n" - " expected: " (common:version-signature) "\n" - " got: " (common:get-last-run-version)) - (if (and (file-exists? mtconf) - (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db - (begin - (debug:print 0 *default-log-port* " I see you are the owner of megatest.config, attempting to cleanup and reset to new version") - (handle-exceptions - exn - (begin - (debug:print 0 *default-log-port* "Failed to switch versions.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (print-call-chain (current-error-port)) - (exit 1)) - (common:cleanup-db))) - (begin - (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"") - (exit 1)))))) + (if (common:on-homehost?) + (let ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config")) + (dbstruct (db:setup))) + (debug:print 0 *default-log-port* + "WARNING: Version mismatch!\n" + " expected: " (common:version-signature) "\n" + " got: " (common:get-last-run-version)) + (if (and (file-exists? mtconf) + (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db + (begin + (debug:print 0 *default-log-port* " I see you are the owner of megatest.config, attempting to cleanup and reset to new version") + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "Failed to switch versions.") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (print-call-chain (current-error-port)) + (exit 1)) + (common:cleanup-db dbstruct))) + (begin + (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"") + (exit 1)))) + (begin + (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.") + (exit 1))))) ;;====================================================================== ;; S P A R S E A R R A Y S ;;====================================================================== @@ -345,36 +407,110 @@ ;;====================================================================== ;; S T A T E S A N D S T A T U S E S ;;====================================================================== (define *common:std-states* - '((0 "COMPLETED") - (1 "NOT_STARTED") - (2 "RUNNING") - (3 "REMOTEHOSTSTART") - (4 "LAUNCHED") - (5 "KILLED") - (6 "KILLREQ") - (7 "STUCK") - (8 "ARCHIVED"))) + '((0 "ARCHIVED") + (1 "STUCK") + (2 "KILLREQ") + (3 "KILLED") + (4 "NOT_STARTED") + (5 "COMPLETED") + (6 "LAUNCHED") + (7 "REMOTEHOSTSTART") + (8 "RUNNING") + )) (define *common:std-statuses* - '((0 "PASS") - (1 "WARN") - (2 "FAIL") + '(;; (0 "DELETED") + (1 "n/a") + (2 "PASS") (3 "CHECK") - (4 "n/a") - (5 "WAIVED") - (6 "SKIP") - (7 "DELETED") - (8 "STUCK/DEAD") + (4 "SKIP") + (5 "WARN") + (6 "WAIVED") + (7 "STUCK/DEAD") + (8 "FAIL") (9 "ABORT"))) -;; These are stopping conditions that prevent a test from being run -(define *common:cant-run-states-sym* - '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE ABORT ARCHIVED)) +(define *common:ended-states* ;; states which indicate the test is stopped and will not proceed + '("COMPLETED" "ARCHIVED" "KILLED" "KILLREQ" "STUCK" "INCOMPLETE")) +(define *common:badly-ended-states* ;; these roll up as CHECK, i.e. results need to be checked + '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD")) + +(define *common:running-states* ;; test is either running or can be run + '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED")) + +(define *common:cant-run-states* ;; These are stopping conditions that prevent a test from being run + '("COMPLETED" "KILLED" "UNKNOWN" "INCOMPLETE" "ARCHIVED")) + +(define *common:not-started-ok-statuses* ;; if not one of these statuses when in not_started state treat as dead + '("n/a" "na" "PASS" "FAIL" "WARN" "CHECK" "WAIVED" "DEAD" "SKIP")) + +(define (common:special-sort items order comp) + (let ((items-order (map reverse order)) + (acomp (or comp >))) + (sort items + (lambda (a b) + (let ((a-num (cadr (or (assoc a items-order) '(0 0)))) + (b-num (cadr (or (assoc b items-order) '(0 0))))) + (acomp a-num b-num)))))) + +;; ;; given a toplevel with currstate, currstatus apply state and status +;; ;; => (newstate . newstatus) +;; (define (common:apply-state-status currstate currstatus state status) +;; (let* ((cstate (string->symbol (string-downcase currstate))) +;; (cstatus (string->symbol (string-downcase currstatus))) +;; (sstate (string->symbol (string-downcase state))) +;; (sstatus (string->symbol (string-downcase status))) +;; (nstate #f) +;; (nstatus #f)) +;; (set! nstate +;; (case cstate +;; ((completed not_started killed killreq stuck archived) +;; (case sstate ;; completed -> sstate +;; ((completed killed killreq stuck archived) completed) +;; ((running remotehoststart launched) running) +;; (else unknown-error-1))) +;; ((running remotehoststart launched) +;; (case sstate +;; ((completed killed killreq stuck archived) #f) ;; need to look at all items +;; ((running remotehoststart launched) running) +;; (else unknown-error-2))) +;; (else unknown-error-3))) +;; (set! nstatus +;; (case sstatus +;; ((pass) +;; (case nstate +;; ((pass n/a deleted) pass) +;; ((warn) warn) +;; ((fail) fail) +;; ((check) check) +;; ((waived) waived) +;; ((skip) skip) +;; ((stuck/dead) stuck) +;; ((abort) abort) +;; (else unknown-error-4))) +;; ((warn) +;; (case nstate +;; ((pass warn n/a skip deleted) warn) +;; ((fail) fail) +;; ((check) check) +;; ((waived) waived) +;; ((stuck/dead) stuck) +;; (else unknown-error-5))) +;; ((fail) +;; (case nstate +;; ((pass warn fail check n/a waived skip deleted stuck/dead stuck) fail) +;; ((abort) abort) +;; (else unknown-error-6))) +;; (else unknown-error-7))) +;; (cons +;; (if nstate (symbol->string nstate) nstate) +;; (if nstatus (symbol->string nstatus) nstatus)))) + ;;====================================================================== ;; D E B U G G I N G S T U F F ;;====================================================================== (define *verbosity* 1) @@ -393,61 +529,124 @@ (if *toppath* (pathname-file *toppath*) (pathname-file (current-directory))))) (define (common:get-db-tmp-area) - (create-directory (conc "/tmp/" (current-user-name) - "/megatest/" - (common:get-testsuite-name) "/" - (string-translate *toppath* "/" "_")) #t)) + (if *db-cache-path* + *db-cache-path* + (let ((dbpath (create-directory (conc "/tmp/" (current-user-name) + "/megatest_localdb/" + (common:get-testsuite-name) "/" + (string-translate *toppath* "/" ".")) #t))) + (set! *db-cache-path* dbpath) + dbpath))) + +(define (common:get-area-path-signature) + (message-digest-string (md5-primitive) *toppath*)) ;;====================================================================== ;; E X I T H A N D L I N G ;;====================================================================== -(define (common:legacy-sync-recommended) - (or (args:get-arg "-runtests") - (args:get-arg "-run") - (args:get-arg "-server") - ;; (args:get-arg "-set-run-status") - (args:get-arg "-remove-runs") - ;; (args:get-arg "-get-run-status") - )) - -(define (common:legacy-sync-required) - (configf:lookup *configdat* "setup" "megatest-db")) +(define (common:run-sync?) + (let ((ohh (common:on-homehost?)) + (srv (args:get-arg "-server"))) + ;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv) + (and (common:on-homehost?) + (args:get-arg "-server")))) + +;;;; run-ids +;; if #f use *db-local-sync* : or 'local-sync-flags +;; if #t use timestamps : or 'timestamps +(define (common:sync-to-megatest.db dbstruct) + (let ((start-time (current-seconds)) + (res (db:multi-db-sync dbstruct 'new2old))) + (let ((sync-time (- (current-seconds) start-time))) + (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds") + (if (common:low-noise-print 30 "sync new to old") + (debug:print-info 0 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds"))) + res)) + +;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp +;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server) +;; +(define (common:watchdog) + (thread-sleep! 0.05) ;; delay for startup + (let ((legacy-sync (common:run-sync?)) + (debug-mode (debug:debug-mode 1)) + (last-time (current-seconds))) + (debug:print-info 0 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync) + (if legacy-sync + (let ((dbstruct (db:setup))) + (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") + (let loop () + ;; sync for filesystem local db writes + ;; + (mutex-lock! *db-multi-sync-mutex*) + (let* ((need-sync (>= *db-last-write* *db-last-sync*)) ;; no sync since last write + (sync-in-progress *db-sync-in-progress*) + (should-sync (> (- (current-seconds) *db-last-sync*) 5)) ;; sync every five seconds minimum + (will-sync (and (or need-sync should-sync) + (not sync-in-progress))) + (start-time (current-seconds))) + ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) + (if will-sync (set! *db-sync-in-progress* #t)) + (mutex-unlock! *db-multi-sync-mutex*) + (if will-sync + (let ((res (common:sync-to-megatest.db dbstruct))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive + (if (> res 0) ;; some records were transferred, keep the db alive + (begin + (mutex-lock! *heartbeat-mutex*) + (set! *db-last-access* (current-seconds)) + (mutex-unlock! *heartbeat-mutex*) + (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) + (debug:print-info 2 *default-log-port* "sync called but zero records transferred")))) + (if will-sync + (begin + (mutex-lock! *db-multi-sync-mutex*) + (set! *db-sync-in-progress* #f) + (set! *db-last-sync* start-time) + (mutex-unlock! *db-multi-sync-mutex*))) + (if (and debug-mode + (> (- start-time last-time) 60)) + (begin + (set! last-time start-time) + (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) + + ;; keep going unless time to exit + ;; + (if (not *time-to-exit*) + (let delay-loop ((count 0)) + (if (and (not *time-to-exit*) + (< count 4)) ;; was 11, changing to 4. + (begin + (thread-sleep! 1) + (delay-loop (+ count 1)))) + (loop))) + (if (common:low-noise-print 30) + (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*))))))) (define (std-exit-procedure) + (let ((no-hurry (if *time-to-exit* ;; hurry up #f (begin (set! *time-to-exit* #t) #t)))) (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.") (if (and no-hurry (debug:debug-mode 18)) (rmt:print-db-stats)) (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds - (let ((run-ids (hash-table-keys *db-local-sync*))) - (if (and (not (null? run-ids)) - (or (common:legacy-sync-recommended) - (configf:lookup *configdat* "setup" "megatest-db"))) - (if no-hurry (db:multi-db-sync run-ids 'new2old)))) - (if *dbstruct-db* (db:close-all *dbstruct-db*)) - (if *inmemdb* (db:close-all *inmemdb*)) - (if (and *megatest-db* - (sqlite3:database? *megatest-db*)) - (begin - (sqlite3:interrupt! *megatest-db*) - (sqlite3:finalize! *megatest-db* #t) - (set! *megatest-db* #f))) + (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated (if *task-db* (let ((db (cdr *task-db*))) (if (sqlite3:database? db) (begin (sqlite3:interrupt! db) (sqlite3:finalize! db #t) - (vector-set! *task-db* 0 #f))))) + ;; (vector-set! *task-db* 0 #f) + (set! *task-db* #f))))) (close-output-port *default-log-port*) (set! *default-log-port* (current-error-port))) "Cleanup db exit thread")) (th2 (make-thread (lambda () (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...") (if no-hurry @@ -454,10 +653,19 @@ (thread-sleep! 5) ;; give the clean up few seconds to do it's stuff (thread-sleep! 2)) (debug:print 4 *default-log-port* " ... done") ) "clean exit"))) + + ;; let's try to clean up open sockets + (if *runremote* + (case (remote-transport *runremote*) + ((http) #t) + ((rpc) (rpc:close-all-connections!)) + (else + (debug:print-info 0 *default-log-port* "Transport "(remote-transport *runremote*)" not supported")))) + (thread-start! th1) (thread-start! th2) (thread-join! th1)))) (define (std-signal-handler signum) @@ -555,10 +763,31 @@ (pathname-directory (pathname-directory (pathname-directory exe-path)))) #f))) +;; return first path that can be created or already exists and is writable +;; +(define (common:get-create-writeable-dir dirs) + (if (null? dirs) + #f + (let loop ((hed (car dirs)) + (tal (cdr dirs))) + (let ((res (or (and (directory? hed) + (file-write-access? hed) + hed) + (handle-exceptions + exn + #f + (create-directory hed #t))))) + (if (and (string? res) + (directory? res)) + res + (if (null? tal) + #f + (loop (car tal)(cdr tal)))))))) + ;;====================================================================== ;; T A R G E T S , S T A T E , S T A T U S , ;; R U N N A M E A N D T E S T P A T T ;;====================================================================== @@ -619,10 +848,60 @@ (if target (begin (debug:print-error 0 *default-log-port* "Invalid target, spaces or blanks not allowed \"" target "\", target should be: " (string-intersperse keys "/") ", have " tlist " for elements") #f) #f)))) + +;; logic for getting homehost. Returns (host . at-home) +;; IF *toppath* is not set, wait up to five seconds trying every two seconds +;; (this is to accomodate the watchdog) +;; +(define (common:get-homehost #!key (trynum 5)) + ;; called often especially at start up. use mutex to eliminate collisions + (mutex-lock! *homehost-mutex*) + (cond + (*home-host* + (mutex-unlock! *homehost-mutex*) + *home-host*) + ((not *toppath*) + (mutex-unlock! *homehost-mutex*) + (launch:setup) ;; safely mutexed now + (if (> trynum 0) + (begin + (thread-sleep! 2) + (common:get-homehost trynum: (- trynum 1))) + #f)) + (else + (let* ((currhost (get-host-name)) + (bestadrs (server:get-best-guess-address currhost)) + ;; first look in config, then look in file .homehost, create it if not found + (homehost (or (configf:lookup *configdat* "server" "homehost" ) + (let ((hhf (conc *toppath* "/.homehost"))) + (if (file-exists? hhf) + (with-input-from-file hhf read-line) + (if (file-write-access? *toppath*) + (begin + (with-output-to-file hhf + (lambda () + (print bestadrs))) + (begin + (mutex-unlock! *homehost-mutex*) + (car (common:get-homehost)))) + #f))))) + (at-home (or (equal? homehost currhost) + (equal? homehost bestadrs)))) + (set! *home-host* (cons homehost at-home)) + (mutex-unlock! *homehost-mutex*) + *home-host*)))) + +;; am I on the homehost? +;; +(define (common:on-homehost?) + (let ((hh (common:get-homehost))) + (if hh + (cdr hh) + #f))) ;;====================================================================== ;; M I S C L I S T S ;;====================================================================== @@ -642,10 +921,11 @@ (if (null? tala) ;; we are done talb (loop (car tala) (cdr tala) (car talb) + (cdr talb))) #f))))) ;; Needed for long lists to be sorted where (apply max ... ) dies ;; @@ -827,10 +1107,64 @@ (with-input-from-pipe (conc "ssh " remote-host " cat /proc/loadavg") (lambda ()(list (read)(read)(read))))) (with-input-from-file "/proc/loadavg" (lambda ()(list (read)(read)(read)))))) + +;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads +;; returns list (normalized-proc-load normalized-core-load 1m 5m 15m ncores nthreads) +;; +(define (common:get-normalized-cpu-load remote-host) + (let ((data (if remote-host + (with-input-from-pipe + (conc "ssh " remote-host " cat /proc/loadavg;cat /proc/cpuinfo;echo end") + read-lines) + (append + (with-input-from-file "/proc/loadavg" + read-lines) + (with-input-from-file "/proc/cpuinfo" + read-lines) + (list "end")))) + (load-rx (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$")) + (proc-rx (regexp "^processor\\s+:\\s+(\\d+)\\s*$")) + (core-rx (regexp "^core id\\s+:\\s+(\\d+)\\s*$")) + (phys-rx (regexp "^physical id\\s+:\\s+(\\d+)\\s*$")) + (max-num (lambda (p n)(max (string->number p) n)))) + ;; (print "data=" data) + (if (null? data) ;; something went wrong + #f + (let loop ((hed (car data)) + (tal (cdr data)) + (loads #f) + (proc-num 0) ;; processor includes threads + (phys-num 0) ;; physical chip on motherboard + (core-num 0)) ;; core + ;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num) + (if (null? tal) ;; have all our data, calculate normalized load and return result + (let* ((act-proc (+ proc-num 1)) + (act-phys (+ phys-num 1)) + (act-core (+ core-num 1)) + (adj-proc-load (/ (car loads) act-proc)) + (adj-core-load (/ (car loads) act-core))) + (append (list (cons 'adj-proc-load adj-proc-load) + (cons 'adj-core-load adj-core-load)) + (list (cons '1m-load (car loads)) + (cons '5m-load (cadr loads)) + (cons '15m-load (caddr loads))) + (list (cons 'proc act-proc) + (cons 'core act-core) + (cons 'phys act-phys)))) + (regex-case + hed + (load-rx ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num)) + (proc-rx ( x p ) (loop (car tal)(cdr tal) loads (max-num p proc-num) phys-num core-num)) + (phys-rx ( x p ) (loop (car tal)(cdr tal) loads proc-num (max-num p phys-num) core-num)) + (core-rx ( x c ) (loop (car tal)(cdr tal) loads proc-num phys-num (max-num c core-num))) + (else + (begin + ;; (print "NO MATCH: " hed) + (loop (car tal)(cdr tal) loads proc-num phys-num core-num))))))))) (define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f)) (let* ((loadavg (common:get-cpu-load remote-host)) (first (car loadavg)) (next (cadr loadavg)) @@ -929,30 +1263,36 @@ (if (number? newval) (set! freespc newval)))))) (car df-results)) freespc)) -;; check space in dbdir -;; returns: ok/not dbspace required-space -;; -(define (common:check-db-dir-space) - (let* ((dbdir (db:get-dbdir)) - (dbspace (if (directory? dbdir) - (get-df dbdir) - 0)) - (required (string->number - (or (configf:lookup *configdat* "setup" "dbdir-space-required") - "100000")))) +(define (common:check-space-in-dir dirpath required) + (let* ((dbspace (if (directory? dirpath) + (get-df dirpath) + 0))) (list (> dbspace required) dbspace required - dbdir))) + dirpath))) +;; check space in dbdir and in megatest dir +;; returns: ok/not dbspace required-space +;; +(define (common:check-db-dir-space) + (let* ((required (string->number + (or (configf:lookup *configdat* "setup" "dbdir-space-required") + "100000"))) + (dbdir (common:get-db-tmp-area)) ;; (db:get-dbdir)) + (tdbspace (common:check-space-in-dir dbdir required)) + (mdbspace (common:check-space-in-dir *toppath* required))) + (sort (list tdbspace mdbspace) (lambda (a b) + (< (cadr a)(cadr b)))))) + ;; check available space in dbdir, exit if insufficient ;; (define (common:check-db-dir-and-exit-if-insufficient) - (let* ((spacedat (common:check-db-dir-space)) + (let* ((spacedat (car (common:check-db-dir-space))) ;; look only at worst for now (is-ok (car spacedat)) (dbspace (cadr spacedat)) (required (caddr spacedat)) (dbdir (cadddr spacedat))) (if (not is-ok) Index: common_records.scm ================================================================== --- common_records.scm +++ common_records.scm @@ -43,10 +43,19 @@ (lambda () (print ((condition-property-accessor 'exn 'message) exn)) (print "Callback error in " procname) (print "Full condition info:\n" (condition->list exn))))) (proc))) + +;; Need a mutex protected way to get and set values +;; or use (define-simple-syntax ?? +;; +(define-inline (with-mutex mtx accessor record . val) + (mutex-lock! mtx) + (let ((res (apply accessor record val))) + (mutex-unlock! mtx) + res)) ;; this was cached based on results from profiling but it turned out the profiling ;; somehow went wrong - perhaps too many processes writing to it. Leaving the caching ;; in for now but can probably take it out later. ;; Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -17,12 +17,12 @@ (require-library iup) (import (prefix iup iup:)) (use canvas-draw) -(use sqlite3 srfi-1 posix regex regex-case srfi-69) -(import (prefix sqlite3 sqlite3:)) +(use srfi-1 posix regex regex-case srfi-69) +(use (prefix sqlite3 sqlite3:)) (declare (unit dashboard-tests)) (declare (uses common)) (declare (uses db)) (declare (uses gutils)) @@ -158,11 +158,11 @@ ;;====================================================================== ;; Run info panel ;;====================================================================== (define (run-info-panel db keydat testdat runname) (let* ((run-id (db:test-get-run_id testdat)) - (rundat (db:get-run-info db run-id)) + (rundat (rmt:get-run-info run-id)) (header (db:get-header rundat)) (event_time (db:get-value-by-header (db:get-rows rundat) (db:get-header rundat) "event_time"))) (iup:frame @@ -286,11 +286,12 @@ (iup:label "STATE:" #:size "30x") (let* ((btns (map (lambda (state) (let ((btn (iup:button state #:expand "HORIZONTAL" #:size "50x" #:font "Courier New, -10" #:action (lambda (x) - (rmt:test-set-state-status-by-id run-id test-id state #f #f) + ;; (rmt:test-set-state-status-by-id run-id test-id state #f #f) + (rmt:roll-up-pass-fail-counts run-id test-id #f state #f #f) ;; test-name passed in as test-id is respected (db:test-set-state! testdat state))))) btn)) (map cadr *common:std-states*)))) ;; (list "COMPLETED" "NOT_STARTED" "RUNNING" "REMOTEHOSTSTART" "LAUNCHED" "KILLED" "KILLREQ")))) (vector-set! *state-status* 0 (lambda (state color) @@ -319,11 +320,12 @@ (iup:attribute-set! wtxtbox "VALUE" c) (if (not *dashboard-comment-share-slot*) (set! *dashboard-comment-share-slot* wtxtbox))) )))) (begin - (rmt:test-set-state-status-by-id run-id test-id #f status #f) + ;; (rmt:test-set-state-status-by-id run-id test-id #f status #f) + (rmt:roll-up-pass-fail-counts run-id test-id #f #f status #f) ;; test-name passed in as test-id is respected (db:test-set-status! testdat status)))))))) btn)) (map cadr *common:std-statuses*)))) ;; (list "PASS" "WARN" "FAIL" "CHECK" "n/a" "WAIVED" "SKIP")))) (vector-set! *state-status* 1 (lambda (status color) @@ -415,13 +417,13 @@ ;;====================================================================== ;; ;;====================================================================== (define (dashboard-tests:examine-test run-id test-id) ;; run-id run-key origtest) - (let* ((db-path (db:dbfile-path run-id)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/db/" run-id ".db")) - (dbstruct (make-dbr:dbstruct path: (db:dbfile-path #f) ;; (configf:lookup *configdat* "setup" "linktree") - local: #t)) + (let* ((db-path (db:dbfile-path)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/db/" run-id ".db")) + (dbstruct #f) ;; NOT ACTUALLY USED (db:setup)) ;; (make-dbr:dbstruct path: (db:dbfile-path #f) ;; (configf:lookup *configdat* "setup" "linktree") + ;; local: #t)) (testdat (rmt:get-test-info-by-id run-id test-id)) ;; (db:get-test-info-by-id dbstruct run-id test-id)) (db-mod-time 0) ;; (file-modification-time db-path)) (last-update 0) ;; (current-seconds)) (request-update #t)) (if (not testdat) @@ -513,11 +515,11 @@ ;; NOTE: BUG HIDER, try to eliminate this exception handler (handle-exceptions exn (debug:print-info 0 *default-log-port* "test db access issue in examine test for run-id " run-id ", test-id " test-id ": " ((condition-property-accessor 'exn 'message) exn)) (rmt:get-test-info-by-id run-id test-id ))))) - ;; (debug:print-info 0 *default-log-port* "need-update= " need-update " curr-mod-time = " curr-mod-time) + ;; (print "INFO: need-update= " need-update " curr-mod-time = " curr-mod-time) (cond ((and need-update newtestdat) (set! testdat newtestdat) (set! teststeps (tests:get-compressed-steps run-id test-id)) (set! logfile (conc (db:test-get-rundir testdat) "/" (db:test-get-final_logf testdat))) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -53,10 +53,11 @@ Usage: dashboard [options] -h : this help -test run-id,test-id : control test identified by testid -skip-version-check : skip the version check + -use-db-cache : access database via cache Misc -rows R : set number of rows -cols C : set number of columns ")) @@ -81,12 +82,13 @@ "-use-server" "-guimonitor" "-main" "-v" "-q" - "-use-local" + "-use-db-cache" "-skip-version-check" + "-repl" ) args:arg-hash 0)) (if (not (null? remargs)) @@ -104,10 +106,19 @@ (if (not (launch:setup)) (begin (print "Failed to find megatest.config, exiting") (exit 1))) +;; create a watch dog to move changes from lt/.db/*.db to megatest.db +;; +(if (file-write-access? (conc *toppath* "/megatest.db")) + (thread-start! (make-thread common:watchdog "Watchdog thread")) + (if (not (args:get-arg "-use-db-cache")) + (begin + (debug:print-info 0 *default-log-port* "Forcing db-cache mode due to read-only access to megatest.db") + (hash-table-set! args:arg-hash "-use-db-cache" #t)))) + ;; data common to all tabs goes here ;; (defstruct dboard:commondat ((curr-tab-num 0) : number) please-update @@ -116,11 +127,10 @@ updaters updating uidat ;; needs to move to tabdat at some time hide-not-hide-tabs ) - (define (dboard:commondat-make) (make-dboard:commondat curr-tab-num: 0 tabdats: (make-hash-table) @@ -248,10 +258,11 @@ ((status-ignore-hash (make-hash-table)) : hash-table) ;; hash of STATUS => #t/#f (target #f) (test-patts #f) ;; db info to file the .db files for the area + (access-mode (db:get-access-mode)) ;; use cached db or not (dbdir #f) (dbfpath #f) (dbkeys #f) ((last-db-update (make-hash-table)) : hash-table) ;; last db file timestamp (monitor-db-path #f) ;; where to find monitor.db @@ -261,11 +272,11 @@ ((num-tests 10) : number) ;; total number of tests to show (used in the old runs display) ;; runs tree ((path-run-ids (make-hash-table)) : hash-table) ;; path (target / runname) => id (runs-tree #f) - ((runs-tree-ht (make-hash-table)) : hash-table) ;; track which targests added to tree (merge functionality with path-run-ids?) + ((runs-tree-ht (make-hash-table)) : hash-table) ;; track which targets added to tree (merge functionality with path-run-ids?) ;; tab data ((view-changed #t) : boolean) ((xadj 0) : number) ;; x slider number (if using canvas) ((yadj 0) : number) ;; y slider number (if using canvas) @@ -297,18 +308,18 @@ (dboard:setup-tabdat dat) (dboard:setup-num-rows dat) dat)) (define (dboard:setup-tabdat tabdat) - (dboard:tabdat-dbdir-set! tabdat (db:dbfile-path #f)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) - (dboard:tabdat-dbfpath-set! tabdat (db:dbfile-path 0)) + (dboard:tabdat-dbdir-set! tabdat (db:dbfile-path)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) + (dboard:tabdat-dbfpath-set! tabdat (db:dbfile-path)) (dboard:tabdat-monitor-db-path-set! tabdat (conc (dboard:tabdat-dbdir tabdat) "/monitor.db")) ;; HACK ALERT: this is a hack, please fix. (dboard:tabdat-ro-set! tabdat (not (file-read-access? (dboard:tabdat-dbfpath tabdat)))) - (dboard:tabdat-keys-set! tabdat (rmt:get-keys)) + (dboard:tabdat-keys-set! tabdat (db:dispatch-query (db:get-access-mode) rmt:get-keys db:get-keys)) (dboard:tabdat-dbkeys-set! tabdat (append (dboard:tabdat-keys tabdat) (list "runname"))) (dboard:tabdat-tot-runs-set! tabdat (rmt:get-num-runs "%")) ) ;; RADT => Matrix defstruct addition @@ -479,21 +490,22 @@ ;; gets all the tests for run-id that match testnamepatt and key-vals, merges them ;; ;; NOTE: Yes, this is used ;; (define (dboard:get-tests-for-run-duplicate tabdat run-id run testnamepatt key-vals) - (let* ((num-to-get + (let* ((access-mode (dboard:tabdat-access-mode tabdat)) + (num-to-get (let ((num-tests-from-config (configf:lookup *configdat* "setup" "num-tests-to-get"))) (if num-tests-from-config (begin (BB> "override num-tests 100 -> "num-tests-from-config) (string->number num-tests-from-config)) 100))) (states (hash-table-keys (dboard:tabdat-state-ignore-hash tabdat))) (statuses (hash-table-keys (dboard:tabdat-status-ignore-hash tabdat))) - (do-not-use-db-file-timestamps (configf:lookup *configdat* "setup" "do-not-use-db-file-timestamps")) ;; this still hosts runs-summary-tab - (do-not-use-query-timestamps (configf:lookup *configdat* "setup" "do-not-use-query-timestamps")) ;; this no longer troubles runs-summary-tab + (do-not-use-db-file-timestamps #t) ;; (configf:lookup *configdat* "setup" "do-not-use-db-file-timestamps")) ;; this still hosts runs-summary-tab + (do-not-use-query-timestamps #t) ;; (configf:lookup *configdat* "setup" "do-not-use-query-timestamps")) ;; this no longer troubles runs-summary-tab (sort-info (get-curr-sort)) (sort-by (vector-ref sort-info 1)) (sort-order (vector-ref sort-info 2)) (bubble-type (if (member sort-order '(testname)) 'testname @@ -516,29 +528,30 @@ (db-pth (conc db-dir "/" run-id ".db"))) (dboard:rundat-db-path-set! run-dat db-pth) db-pth))) (tmptests (if (or do-not-use-db-file-timestamps (>= (common:lazy-modification-time db-path) last-update)) - (rmt:get-tests-for-run run-id testnamepatt states statuses ;; run-id testpatt states statuses - (dboard:rundat-run-data-offset run-dat) - num-to-get - (dboard:tabdat-hide-not-hide tabdat) ;; no-in - sort-by ;; sort-by - sort-order ;; sort-order - #f ;; 'shortlist ;; qrytype - (if (dboard:tabdat-filters-changed tabdat) - 0 - last-update) ;; last-update - *dashboard-mode*) ;; use dashboard mode + (db:dispatch-query access-mode rmt:get-tests-for-run db:get-tests-for-run + run-id testnamepatt states statuses ;; run-id testpatt states statuses + (dboard:rundat-run-data-offset run-dat) + num-to-get + (dboard:tabdat-hide-not-hide tabdat) ;; no-in + sort-by ;; sort-by + sort-order ;; sort-order + #f ;; 'shortlist ;; qrytype + (if (dboard:tabdat-filters-changed tabdat) + 0 + last-update) ;; last-update + *dashboard-mode*) ;; use dashboard mode '())) (use-new (dboard:tabdat-hide-not-hide tabdat)) (tests-ht (if (dboard:tabdat-filters-changed tabdat) (let ((ht (make-hash-table))) (dboard:rundat-tests-set! run-dat ht) ht) - (dboard:rundat-tests run-dat))) - (start-time (current-seconds))) + (dboard:rundat-tests run-dat)))) + ;;(start-time (current-seconds))) ;; to limit the amount of data transferred each cycle use limit of num-to-get and offset (dboard:rundat-run-data-offset-set! run-dat (if (< (length tmptests) num-to-get) @@ -592,15 +605,18 @@ ;; ;; create a virtual table of all the tests ;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") ) ;; (define (update-rundat tabdat runnamepatt numruns testnamepatt keypatts) - (let* ((keys (rmt:get-keys)) - (last-runs-update (dboard:tabdat-last-runs-update tabdat)) - (allruns (rmt:get-runs runnamepatt numruns (dboard:tabdat-start-run-offset tabdat) keypatts)) + (let* ((access-mode (dboard:tabdat-access-mode tabdat)) + (keys (db:dispatch-query access-mode rmt:get-keys db:get-keys)) + (last-runs-update (- (dboard:tabdat-last-runs-update tabdat) 2)) + (allruns (db:dispatch-query access-mode rmt:get-runs db:get-runs + runnamepatt numruns (dboard:tabdat-start-run-offset tabdat) keypatts)) ;;(allruns-tree (rmt:get-runs-by-patt (dboard:tabdat-keys tabdat) "%" #f #f #f #f)) - (allruns-tree (rmt:get-runs-by-patt keys "%" #f #f #f #f last-runs-update));;'("id" "runname") + (allruns-tree (db:dispatch-query access-mode rmt:get-runs-by-patt db:get-runs-by-patt + keys "%" #f #f #f #f last-runs-update));;'("id" "runname") (header (db:get-header allruns)) (runs (db:get-rows allruns)) ;; RA => Filtered as per runpatt selected (runs-tree (db:get-rows allruns-tree)) ;; RA => Returns complete list of runs (start-time (current-seconds)) (runs-hash (let ((ht (make-hash-table))) @@ -662,12 +678,88 @@ (loop run tal new-res newmaxtests) ;; not done getting data for this run (loop (car tal)(cdr tal) new-res newmaxtests))))))) (dboard:tabdat-filters-changed-set! tabdat #f) (dboard:update-tree tabdat runs-hash header tb))) - - +;; this calls dboard:get-tests-for-run-duplicate for each run +;; +;; create a virtual table of all the tests +;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") ) +;; +(define (dboard:update-rundat tabdat runnamepatt numruns testnamepatt keypatts) + (let* ((access-mode (dboard:tabdat-access-mode tabdat)) + (keys (dboard:tabdat-keys tabdat)) ;; (db:dispatch-query access-mode rmt:get-keys db:get-keys))) + (last-runs-update (- (dboard:tabdat-last-runs-update tabdat) 2)) + (allruns (db:dispatch-query access-mode rmt:get-runs db:get-runs + runnamepatt numruns (dboard:tabdat-start-run-offset tabdat) keypatts)) + ;;(allruns-tree (rmt:get-runs-by-patt (dboard:tabdat-keys tabdat) "%" #f #f #f #f)) + (allruns-tree (db:dispatch-query access-mode rmt:get-runs-by-patt db:get-runs-by-patt + keys "%" #f #f #f #f 0)) ;; last-runs-update));;'("id" "runname") + (header (db:get-header allruns)) + (runs (db:get-rows allruns)) ;; RA => Filtered as per runpatt selected + (runs-tree (db:get-rows allruns-tree)) ;; RA => Returns complete list of runs + (start-time (current-seconds)) + (runs-hash (let ((ht (make-hash-table))) + (for-each (lambda (run) + (hash-table-set! ht (db:get-value-by-header run header "id") run)) + runs-tree) ;; (vector-ref runs-dat 1)) + ht)) + (tb (dboard:tabdat-runs-tree tabdat))) + (dboard:tabdat-last-runs-update-set! tabdat (- (current-seconds) 2)) + (dboard:tabdat-header-set! tabdat header) + ;; + ;; trim runs to only those that are changing often here + ;; + (if (null? runs) + (begin + (dboard:tabdat-allruns-set! tabdat '()) + (dboard:tabdat-all-test-names-set! tabdat '()) + (dboard:tabdat-item-test-names-set! tabdat '()) + (hash-table-clear! (dboard:tabdat-allruns-by-id tabdat))) + (let loop ((run (car runs)) + (tal (cdr runs)) + (res '()) + (maxtests 0)) + (let* ((run-id (db:get-value-by-header run header "id")) + (run-struct (hash-table-ref/default (dboard:tabdat-allruns-by-id tabdat) run-id #f)) + ;; (last-update (if run-struct (dboard:rundat-last-update run-struct) 0)) + (key-vals (rmt:get-key-vals run-id)) + (tests-ht (dboard:get-tests-for-run-duplicate tabdat run-id run testnamepatt key-vals)) + ;; GET RID OF dboard:get-tests-dat - it is superceded by dboard:get-tests-for-run-duplicate + ;; dboard:get-tests-for-run-duplicate - returns a hash table + ;; (dboard:get-tests-dat tabdat run-id last-update)) + (all-test-ids (hash-table-keys tests-ht)) + (num-tests (length all-test-ids))) + ;; (print "run-struct: " run-struct) + ;; NOTE: bubble-up also sets the global (dboard:tabdat-item-test-names tabdat) + ;; (tests (bubble-up tmptests priority: bubble-type)) + ;; NOTE: 11/01/2013 This routine is *NOT* getting called excessively. + ;; (debug:print 0 *default-log-port* "Getting data for run " run-id " with key-vals=" key-vals) + ;; Not sure this is needed? + (let* ((newmaxtests (max num-tests maxtests)) + ;; (last-update (- (current-seconds) 10)) + (run-struct (or run-struct + (dboard:rundat-make-init + run: run + tests: tests-ht + key-vals: key-vals))) + (new-res (if (null? all-test-ids) res (cons run-struct res))) + (elapsed-time (- (current-seconds) start-time))) + (if (null? all-test-ids) + (hash-table-delete! (dboard:tabdat-allruns-by-id tabdat) run-id) + (hash-table-set! (dboard:tabdat-allruns-by-id tabdat) run-id run-struct)) + (if (or (null? tal) + (> elapsed-time 2)) ;; stop loading data after 5 seconds, on the next call more data *should* be loaded since get-tests-for-run uses last update + (begin + (if (> elapsed-time 2)(print "NOTE: updates are taking a long time, " elapsed-time "s elapsed.")) + (dboard:tabdat-allruns-set! tabdat new-res) + maxtests) + (if (> (dboard:rundat-run-data-offset run-struct) 0) + (loop run tal new-res newmaxtests) ;; not done getting data for this run + (loop (car tal)(cdr tal) new-res newmaxtests))))))) + (dboard:tabdat-filters-changed-set! tabdat #f) + (dboard:update-tree tabdat runs-hash header tb))) (define *collapsed* (make-hash-table)) (define (toggle-hide lnum uidat) ; fulltestname) (let* ((btn (vector-ref (dboard:uidat-get-lftcol uidat) lnum)) @@ -1476,11 +1568,13 @@ (if (not (null? path)) (hash-table-ref/default (dboard:tabdat-path-run-ids tabdat) path #f) #f)) (define (dboard:get-tests-dat tabdat run-id last-update) - (let* ((tdat (if run-id (rmt:get-tests-for-run run-id + (let* ((access-mode (dboard:tabdat-access-mode tabdat)) + (tdat (if run-id (db:dispatch-query access-mode rmt:get-tests-for-run db:get-tests-for-run + run-id (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "test-name" "%/%") (hash-table-keys (dboard:tabdat-state-ignore-hash tabdat)) ;; '() (hash-table-keys (dboard:tabdat-status-ignore-hash tabdat)) ;; '() #f #f ;; offset limit (dboard:tabdat-hide-not-hide tabdat) ;; not-in @@ -1507,20 +1601,22 @@ (if (and res (> (length res) 1)) (cadr res) #f))) (define (dboard:update-tree tabdat runs-hash runs-header tb) - (let* ((run-ids (sort (filter number? (hash-table-keys runs-hash)) + (let* ((access-mode (dboard:tabdat-access-mode tabdat)) + (run-ids (sort (filter number? (hash-table-keys runs-hash)) (lambda (a b) (let* ((record-a (hash-table-ref runs-hash a)) (record-b (hash-table-ref runs-hash b)) (time-a (db:get-value-by-header record-a runs-header "event_time")) (time-b (db:get-value-by-header record-b runs-header "event_time"))) (< time-a time-b))))) (changed #f) (last-runs-update (dboard:tabdat-last-runs-update tabdat)) - (runs-dat (rmt:get-runs-by-patt (dboard:tabdat-keys tabdat) "%" #f #f #f #f last-runs-update))) + (runs-dat (db:dispatch-query access-mode rmt:get-runs-by-patt db:get-runs-by-patt + (dboard:tabdat-keys tabdat) "%" #f #f #f #f last-runs-update))) (dboard:tabdat-last-runs-update-set! tabdat (- (current-seconds) 2)) (for-each (lambda (run-id) (let* ((run-record (hash-table-ref/default runs-hash run-id #f)) (key-vals (map (lambda (key)(db:get-value-by-header run-record runs-header key)) (dboard:tabdat-keys tabdat))) @@ -1585,12 +1681,14 @@ hide-clean: hide-clean) #f))) (define (dashboard:get-runs-hash tabdat) - (let* ((last-runs-update 0);;(dboard:tabdat-last-runs-update tabdat)) - (runs-dat (rmt:get-runs-by-patt (dboard:tabdat-keys tabdat) "%" #f #f #f #f last-runs-update)) + (let* ((access-mode (dboard:tabdat-access-mode tabdat)) + (last-runs-update 0);;(dboard:tabdat-last-runs-update tabdat)) + (runs-dat (db:dispatch-query access-mode rmt:get-runs-by-patt db:get-runs-by-patt + (dboard:tabdat-keys tabdat) "%" #f #f #f #f last-runs-update)) (runs-header (vector-ref runs-dat 0)) ;; 0 is header, 1 is list of records (runs (vector-ref runs-dat 1)) (run-id (dboard:tabdat-curr-run-id tabdat)) (runs-hash (let ((ht (make-hash-table))) (for-each (lambda (run) @@ -1598,15 +1696,17 @@ runs) ht))) runs-hash)) (define (dashboard:runs-summary-updater commondat tabdat tb cell-lookup run-matrix) - (if (dashboard:database-changed? commondat tabdat context-key: 'runs-summary-rundat) - (dashboard:do-update-rundat tabdat)) + ;; (if (dashboard:database-changed? commondat tabdat context-key: 'runs-summary-rundat) + (dashboard:do-update-rundat tabdat) ;; ) (dboard:runs-summary-control-panel-updater tabdat) (let* ((last-runs-update (dboard:tabdat-last-runs-update tabdat)) - (runs-dat (rmt:get-runs-by-patt (dboard:tabdat-keys tabdat) "%" #f #f #f #f last-runs-update)) + (runs-dat (db:dispatch-query (dboard:tabdat-access-mode tabdat) + rmt:get-runs-by-patt db:get-runs-by-patt + (dboard:tabdat-keys tabdat) "%" #f #f #f #f last-runs-update)) (runs-header (vector-ref runs-dat 0)) ;; 0 is header, 1 is list of records (runs (vector-ref runs-dat 1)) (run-id (dboard:tabdat-curr-run-id tabdat)) (runs-hash (dashboard:get-runs-hash tabdat)) ;; (runs-hash (let ((ht (make-hash-table))) @@ -2034,11 +2134,10 @@ (mark-for-update tabdat) (update-search commondat tabdat "test-name" val)) "make-controls"))) (iup:hbox (iup:button "Quit" #:action (lambda (obj) - ;; (if (dboard:tabdat-dblocal tabdat) (db:close-all (dboard:tabdat-dblocal tabdat))) (exit)) #:expand "NO" #:size "40x15") (iup:button "Refresh" #:action (lambda (obj) (mark-for-update tabdat)) #:expand "NO" #:size "40x15") @@ -2567,29 +2666,29 @@ (define *last-recalc-ended-time* 0) (define (dashboard:recalc modtime please-update-buttons last-db-update-time) (or please-update-buttons - (and (> (current-milliseconds)(+ *last-recalc-ended-time* 150)) - (> modtime last-db-update-time) + (and ;; (> (current-milliseconds)(+ *last-recalc-ended-time* 150)) ;; can't use this - it needs to be tab specific + (> modtime (- last-db-update-time 3)) ;; add three seconds of margin (> (current-seconds)(+ last-db-update-time 1))))) ;; (define *monitor-db-path* #f) (define *last-monitor-update-time* 0) ;; Force creation of the db in case it isn't already there. (tasks:open-db) -(define (dashboard:get-youngest-run-db-mod-time tabdat) +(define (dashboard:get-youngest-run-db-mod-time dbdir) (handle-exceptions exn (begin (debug:print 0 *default-log-port* "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn)) (current-seconds)) ;; something went wrong - just print an error and return current-seconds (common:max (map (lambda (filen) - (file-modification-time filen)) - (glob (conc (dboard:tabdat-dbdir tabdat) "/*.db")))))) + (file-modification-time filen)) + (glob (conc dbdir "/*.db*")))))) (define (dashboard:monitor-changed? commondat tabdat) (let* ((run-update-time (current-seconds)) (monitor-db-path (dboard:tabdat-monitor-db-path tabdat)) (monitor-modtime (if (and monitor-db-path (file-exists? monitor-db-path)) @@ -2607,19 +2706,23 @@ (hash-table-ref/default (dboard:tabdat-last-db-update tabdat) context 0)) (define (dboard:set-last-db-update! tabdat context newtime) (hash-table-set! (dboard:tabdat-last-db-update tabdat) context newtime)) +;; DOES NOT WORK RELIABLY WITH /tmp WAL mode files. Timestamps only change when the db +;; is closed (I think). If db dir starts with /tmp always return true +;; (define (dashboard:database-changed? commondat tabdat #!key (context-key 'default)) (let* ((run-update-time (current-seconds)) - (modtime (dashboard:get-youngest-run-db-mod-time tabdat)) ;; NOTE: ensure this is tabdat!! + (dbdir (dboard:tabdat-dbdir tabdat)) + (modtime (dashboard:get-youngest-run-db-mod-time dbdir)) (recalc (dashboard:recalc modtime (dboard:commondat-please-update commondat) - (dboard:get-last-db-update tabdat context-key)))) - ;; (dboard:tabdat-last-db-update tabdat)))) + (dboard:get-last-db-update tabdat context-key)))) + ;; (dboard:tabdat-last-db-update tabdat)))) (if recalc - (dboard:set-last-db-update! tabdat context-key run-update-time)) + (dboard:set-last-db-update! tabdat context-key run-update-time)) (dboard:commondat-please-update-set! commondat #f) recalc)) ;; point inside line ;; @@ -2713,12 +2816,15 @@ (db:test-get-event_time (hash-table-ref testsdat (car b)))))))))) ;; run times tab data updater ;; (define (dashboard:run-times-tab-run-data-updater commondat tabdat tab-num) - (let* ((last-runs-update (dboard:tabdat-last-runs-update tabdat)) - (runs-dat (rmt:get-runs-by-patt (dboard:tabdat-keys tabdat) "%" #f #f #f #f last-runs-update)) + (let* ((access-mode (dboard:tabdat-access-mode tabdat)) + (last-runs-update (dboard:tabdat-last-runs-update tabdat)) + (runs-dat (db:dispatch-query access-mode + rmt:get-runs-by-patt db:get-runs-by-patt + (dboard:tabdat-keys tabdat) "%" #f #f #f #f last-runs-update)) (runs-header (vector-ref runs-dat 0)) ;; 0 is header, 1 is list of records (runs-hash (let ((ht (make-hash-table))) (for-each (lambda (run) (hash-table-set! ht (db:get-value-by-header run runs-header "id") run)) (vector-ref runs-dat 1)) @@ -3256,15 +3362,16 @@ ;; (define (tabdat-values tabdat) ;; runs update-rundat using the various filters from the gui ;; (define (dashboard:do-update-rundat tabdat) - (update-rundat + (dboard:update-rundat tabdat (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "runname" "%") (dboard:tabdat-numruns tabdat) (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "test-name" "%/%") + ;; generate key patterns from the target stored in tabdat (let* ((dbkeys (dboard:tabdat-dbkeys tabdat))) (let ((fres (if (dboard:tabdat-target tabdat) (let ((ptparts (append (dboard:tabdat-target tabdat)(make-list (length dbkeys) "%")))) (map (lambda (k v)(list k v)) dbkeys ptparts)) (let ((res '())) @@ -3282,13 +3389,14 @@ (let* ((tabdat (dboard:common-get-tabdat commondat tab-num: tab-num)) (dbkeys (dboard:tabdat-dbkeys tabdat))) ;;(print "RA => calling runs-tab-updater with commondat " commondat " tab-num " tab-num) ;;(tabdat-values tabdat) ;;RA added ;; (pp (dboard:tabdat->alist tabdat)) + ;; (if (dashboard:database-changed? commondat tabdat context-key: 'runs-rundat) (dashboard:do-update-rundat tabdat) (let ((uidat (dboard:commondat-uidat commondat))) - ;;(print "RA => Calling update-buttons with tabdat : " tabdat " uidat " uidat) + ;;(print "RA => Calling update-buttons with tabdat : " tabdat " uidat " uidat) (update-buttons tabdat uidat (dboard:tabdat-numruns tabdat) (dboard:tabdat-num-tests tabdat))) )) "dashboard:runs-tab-updater")) ;;====================================================================== @@ -3319,11 +3427,11 @@ (debug:print 3 *default-log-port* "INFO: tried to open test with invalid run-id,test-id. " (args:get-arg "-test")) (exit 1))))) ;; ((args:get-arg "-guimonitor") ;; (gui-monitor (dboard:tabdat-dblocal tabdat))) (else - (dboard:commondat-uidat-set! commondat (make-dashboard-buttons commondat)) ;; (dboard:tabdat-dblocal data) + (dboard:commondat-uidat-set! commondat (make-dashboard-buttons commondat)) (dboard:commondat-curr-tab-num-set! commondat 0) (dboard:commondat-add-updater commondat (lambda () (dashboard:runs-tab-updater commondat 1)) @@ -3357,7 +3465,9 @@ ;; ease debugging by loading ~/.dashboardrc (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.dashboardrc"))) (if (file-exists? debugcontrolf) (load debugcontrolf))) -(main) +(if (args:get-arg "-repl") + (repl) + (main)) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -38,24 +38,29 @@ ;;====================================================================== ;; R E C O R D S ;;====================================================================== +;; each db entry is a pair ( db . dbfilepath ) +;; I propose this record evolves into the area record +;; (defstruct dbr:dbstruct - main - strdb - ((path #f) : string) - ((local #f) : boolean) - rundb - inmem - mtime - rtime - stime - inuse - refdb - ((locdbs (make-hash-table)) : hash-table) - olddb) + (tmpdb #f) + (mtdb #f) + (refndb #f) + (homehost #f) ;; not used yet + (on-homehost #f) ;; not used yet + ) ;; goal is to converge on one struct for an area but for now it is too confusing + + +;; record for keeping state,status and count for doing roll-ups in +;; iterated tests +;; +(defstruct dbr:counts + (state #f) + (status #f) + (count 0)) ;;====================================================================== ;; SQLITE3 HELPERS ;;====================================================================== @@ -86,22 +91,15 @@ ;; if #f => get main db ;; if db already open - return inmem ;; if db not open, open inmem, rundb and sync then return inmem ;; inuse gets set automatically for rundb's ;; -(define (db:get-db dbstruct run-id) - (if (sqlite3:database? dbstruct) ;; pass sqlite3 databases on through - dbstruct - (begin - (let ((dbdat (if (or (not run-id) - (eq? run-id 0)) - (db:open-main dbstruct) - (db:open-rundb dbstruct run-id) - ))) - dbdat)))) - -;; legacy handling of structure for managing db's. Refactor this into dbr:? +(define (db:get-db dbstruct . blah) ;; run-id) + (or (dbr:dbstruct-tmpdb dbstruct) + (db:open-db dbstruct))) + +;; ;; legacy handling of structure for managing db's. Refactor this into dbr:? (define (db:dbdat-get-db dbdat) (if (pair? dbdat) (car dbdat) dbdat)) @@ -113,35 +111,38 @@ ;; mod-read: ;; 'mod modified data ;; 'read read data ;; Locks the mutex and depending on 'mod or 'read passed, sets the last timestamp in dbstruct ;; -(define (db:done-with dbstruct run-id mod-read) - (if (not (sqlite3:database? dbstruct)) - (begin - (mutex-lock! *rundb-mutex*) - (if (eq? mod-read 'mod) - (dbr:dbstruct-mtime-set! dbstruct (current-milliseconds)) - (dbr:dbstruct-rtime-set! dbstruct (current-milliseconds))) - (dbr:dbstruct-inuse-set! dbstruct #f) - (mutex-unlock! *rundb-mutex*)))) - -;; (db:with-db dbstruct run-id sqlite3:exec "select blah from blaz;") +;; (define (db:done-with dbstruct run-id mod-read) +;; (if (not (sqlite3:database? dbstruct)) +;; (begin +;; (mutex-lock! *rundb-mutex*) +;; (if (eq? mod-read 'mod) +;; (dbr:dbstruct-mtime-set! dbstruct (current-milliseconds)) +;; (dbr:dbstruct-rtime-set! dbstruct (current-milliseconds))) +;; (dbr:dbstruct-inuse-set! dbstruct #f) +;; (mutex-unlock! *rundb-mutex*)))) + +;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") ;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no ;; (define (db:with-db dbstruct run-id r/w proc . params) (let* ((dbdat (if (dbr:dbstruct? dbstruct) (db:get-db dbstruct run-id) - dbstruct)) ;; cheat, allow for passing in a dbdat + (begin + (print-call-chain) + (print "db:with-db called with dbdat instead of dbstruct, FIXME!!") + dbstruct))) ;; cheat, allow for passing in a dbdat (db (db:dbdat-get-db dbdat))) (handle-exceptions exn (begin (debug:print-error 0 *default-log-port* "sqlite3 issue in db:with-db, dbstruct=" dbstruct ", run-id=" run-id ", proc=" proc ", params=" params " error: " ((condition-property-accessor 'exn 'message) exn)) (print-call-chain (current-error-port))) (let ((res (apply proc db params))) - (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) + ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) res)))) ;;====================================================================== ;; K E E P F I L E D B I N dbstruct ;;====================================================================== @@ -169,56 +170,52 @@ ;; NB// #f => return dbdir only ;; (was planned to be; zeroth db with name=main.db) ;; ;; If run-id is #f return to create and retrieve the path where the db will live. ;; -(define (db:dbfile-path run-id) - (let* ((dbdir (common:get-db-tmp-area)) ;; (db:get-dbdir)) - (fname (if run-id - (if (eq? run-id 0) "main.db" (conc run-id ".db")) - #f))) +(define (db:dbfile-path . junk) ;; run-id) + (let* ((dbdir (common:get-db-tmp-area))) ;; (db:get-dbdir)) +;; (fname (if run-id +;; (if (eq? run-id 0) "main.db" (conc run-id ".db")) +;; #f))) (handle-exceptions exn (begin (debug:print-error 0 *default-log-port* "Couldn't create path to " dbdir) (exit 1)) (if (not (directory? dbdir))(create-directory dbdir #t))) - (if fname - (conc dbdir "/" fname) - dbdir))) + dbdir)) ;; (if fname +;; (conc dbdir "/" fname) +;; dbdir))) ;; Returns the database location as specified in config file ;; -(define (db:get-dbdir) - (or (configf:lookup *configdat* "setup" "dbdir") - (conc (configf:lookup *configdat* "setup" "linktree") "/.db"))) +;; (define db:get-dbdir common:get-db-tmp-area) +;; (or (configf:lookup *configdat* "setup" "dbdir") +;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db"))) (define (db:set-sync db) (let ((syncprag (configf:lookup *configdat* "setup" "sychronous"))) - (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 1) ";")))) + (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) ;; open an sql database inside a file lock ;; returns: db existed-prior-to-opening ;; RA => Returns a db handler; sets the lock if opened in writable mode ;; (define (db:lock-create-open fname initproc) - ;; (if (file-exists? fname) - ;; (let ((db (sqlite3:open-database fname))) - ;; (sqlite3:set-busy-handler! db (make-busy-timeout 136000)) - ;; (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") - ;; db) - (let* ((parent-dir (pathname-directory fname)) + (let* ((parent-dir (or (pathname-directory fname)(current-directory))) ;; no parent? go local (dir-writable (file-write-access? parent-dir)) (file-exists (file-exists? fname)) (file-write (if file-exists (file-write-access? fname) dir-writable ))) (if file-write ;; dir-writable (let (;; (lock (obtain-dot-lock fname 1 5 10)) (db (sqlite3:open-database fname))) (sqlite3:set-busy-handler! db (make-busy-timeout 136000)) - (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") + ;; (db:set-sync db) + (sqlite3:execute db "PRAGMA synchronous = 0;") (if (not file-exists) (begin (if (string-match "^/tmp/.*" fname) ;; this is a file in /tmp (sqlite3:execute db "PRAGMA journal_mode=WAL;") (print "Creating " fname " in NON-WAL mode.")) @@ -227,124 +224,97 @@ db) (begin (debug:print 2 *default-log-port* "WARNING: opening db in non-writable dir " fname) (sqlite3:open-database fname))))) ;; ) -;; This routine creates the db. It is only called if the db is not already opened -;; -(define (db:open-rundb dbstruct run-id #!key (attemptnum 0)(do-not-open #f)) ;; (conc *toppath* "/megatest.db") (car *configinfo*))) - (let* ((local #t) ;; (dbr:dbstruct-local dbstruct)) - (rdb (if local - (dbr:dbstruct-localdb dbstruct run-id) - (dbr:dbstruct-inmem dbstruct)))) ;; (dbr:dbstruct-runrec dbstruct run-id 'inmem))) - (if (or rdb - do-not-open) - rdb - (begin - (mutex-lock! *rundb-mutex*) - (let* (;; (fname (if (or (not run-id)(eq? run-id 0)) "main.db" (conc run-id ".db"))) - (dbpath (db:dbfile-path run-id)) ;; (conc toppath "/db/" run-id ".db")) - (dbexists (file-exists? dbpath)) - (tmppath (common:get-db-tmp-area)) - (inmem #f) ;; (if local #f (db:lock-create-open (conc tmppath "/" fname) db:initialize-run-id-db))) ;; (db:open-inmem-db))) - (refdb #f) ;; (if local #f (db:open-inmem-db))) - (db (db:lock-create-open dbpath ;; this is the database physically on disk - (lambda (db) - (handle-exceptions - exn - (begin - ;; (release-dot-lock dbpath) - (if (> attemptnum 2) - (debug:print-error 0 *default-log-port* "tried twice, cannot create/initialize db for run-id " run-id ", at path " dbpath) - (db:open-rundb dbstruct run-id attemptnum (+ attemptnum 1)))) - (db:initialize-run-id-db db) - (sqlite3:execute - db - "INSERT OR IGNORE INTO tests (id,run_id,testname,event_time,item_path,state,status) VALUES (?,?,'bogustest',strftime('%s','now'),'nowherepath','DELETED','n/a');" - (* run-id 30000) ;; allow for up to 30k tests per run - run-id) - ;; do a dummy query to test that the table exists and the db is truly readable - (sqlite3:execute db "SELECT * FROM tests WHERE id=?;" (* run-id 30000)) - )))) ;; add strings db to rundb, not in use yet - ;; )) ;; (sqlite3:open-database dbpath)) - (olddb (if *megatest-db* - *megatest-db* - (let ((db (db:open-megatest-db))) - (set! *megatest-db* db) - db))) - (write-access (file-write-access? dbpath)) - ;; (handler (make-busy-timeout 136000)) - ) - (if (and dbexists (not write-access)) - (set! *db-write-access* #f)) ;; only unset so other db's also can use this control - (dbr:dbstruct-rundb-set! dbstruct (cons db dbpath)) - (dbr:dbstruct-inuse-set! dbstruct #t) - (dbr:dbstruct-olddb-set! dbstruct olddb) - ;; (dbr:dbstruct-run-id-set! dbstruct run-id) - (mutex-unlock! *rundb-mutex*) - (if local - (begin - (dbr:dbstruct-localdb-set! dbstruct run-id db) ;; (dbr:dbstruct-inmem-set! dbstruct db) ;; direct access ... - db) - (begin - (dbr:dbstruct-inmem-set! dbstruct inmem) - ;; dec 14, 2014 - keep deleted records available. hunch is that they are needed for id placeholders - ;; (sqlite3:execute db "DELETE FROM tests WHERE state='DELETED';") ;; they just slow us down in this context - (db:sync-tables db:sync-tests-only db inmem) - (db:delay-if-busy refdb) ;; dbpath: (db:dbdat-get-path refdb)) ;; What does delaying here achieve? - (dbr:dbstruct-refdb-set! dbstruct refdb) - (db:sync-tables db:sync-tests-only inmem refdb) ;; use inmem as the reference, don't read again from db - ;; sync once more to deal with delays? - ;; (db:sync-tables db:sync-tests-only db inmem) - ;; (db:sync-tables db:sync-tests-only inmem refdb) - inmem))))))) - -;; This routine creates the db if not already present. It is only called if the db is not already ls opened -;; -(define (db:open-main dbstruct) ;; (conc *toppath* "/megatest.db") (car *configinfo*))) - (let ((mdb (dbr:dbstruct-main dbstruct))) ;; RA => Returns the first reference in dbstruct - (if mdb - mdb - (begin - (mutex-lock! *rundb-mutex*) - (let* ((dbpath (db:dbfile-path 0)) - (dbexists (file-exists? dbpath)) - (db (db:lock-create-open dbpath db:initialize-main-db)) - (olddb (db:open-megatest-db)) - (write-access (file-write-access? dbpath)) - (dbdat (cons db dbpath))) - (if (and dbexists (not write-access)) - (set! *db-write-access* #f)) - (dbr:dbstruct-main-set! dbstruct dbdat) - (dbr:dbstruct-olddb-set! dbstruct olddb) ;; olddb is already a (cons db path) - (mutex-unlock! *rundb-mutex*) - (if (and (not dbexists) - *db-write-access*) ;; did not have a prior db and do have write access - (db:multi-db-sync #f 'old2new)) ;; migrate data from megatest.db automatically - dbdat))))) +;; ;; This routine creates the db. It is only called if the db is not already opened +;; ;; +;; (define (db:open-rundb dbstruct run-id #!key (attemptnum 0)(do-not-open #f)) ;; (conc *toppath* "/megatest.db") (car *configinfo*))) +;; (let* ((dbfile (db:dbfile-path run-id)) ;; (conc toppath "/db/" run-id ".db")) +;; (dbexists (file-exists? dbfile)) +;; (db (db:lock-create-open dbfile (lambda (db) +;; (handle-exceptions +;; exn +;; (begin +;; ;; (release-dot-lock dbpath) +;; (if (> attemptnum 2) +;; (debug:print-error 0 *default-log-port* "tried twice, cannot create/initialize db for run-id " run-id ", at path " dbpath) +;; (db:open-rundb dbstruct run-id attemptnum (+ attemptnum 1)))) +;; (db:initialize-run-id-db db) +;; (sqlite3:execute +;; db +;; "INSERT OR IGNORE INTO tests (id,run_id,testname,event_time,item_path,state,status) VALUES (?,?,'bogustest',strftime('%s','now'),'nowherepath','DELETED','n/a');" +;; (* run-id 30000) ;; allow for up to 30k tests per run +;; run-id) +;; ;; do a dummy query to test that the table exists and the db is truly readable +;; (sqlite3:execute db "SELECT * FROM tests WHERE id=?;" (* run-id 30000)) +;; )))) ;; add strings db to rundb, not in use yet +;; (olddb (if *megatest-db* +;; *megatest-db* +;; (let ((db (db:open-megatest-db))) +;; (set! *megatest-db* db) +;; db))) +;; (write-access (file-write-access? dbfile))) +;; (if (and dbexists (not write-access)) +;; (set! *db-write-access* #f)) ;; only unset so other db's also can use this control +;; (dbr:dbstruct-rundb-set! dbstruct (cons db dbfile)) +;; (dbr:dbstruct-inuse-set! dbstruct #t) +;; (dbr:dbstruct-olddb-set! dbstruct olddb) +;; ;;; (mutex-unlock! *rundb-mutex*) ;;; why did we need a mutex on opening db's? +;; (db:sync-tables db:sync-tests-only *megatest-db* db) +;; db)) + +;; This routine creates the db if not already present. It is only called if the db is not already opened +;; +(define (db:open-db dbstruct #!key (areapath #f)) + (let ((tmpdb (dbr:dbstruct-tmpdb dbstruct))) ;; RA => Returns the first reference in dbstruct + (if tmpdb + tmpdb + ;; (mutex-lock! *rundb-mutex*) + (let* ((dbpath (db:dbfile-path)) ;; 0)) + (dbexists (file-exists? dbpath)) + (dbfexists (file-exists? (conc dbpath "/megatest.db"))) + (tmpdb (db:open-megatest-db path: dbpath)) ;; lock-create-open dbpath db:initialize-main-db)) + (mtdb (db:open-megatest-db)) + (refndb (db:open-megatest-db path: dbpath name: "megatest_ref.db")) + (write-access (file-write-access? dbpath))) + (if (and dbexists (not write-access)) + (set! *db-write-access* #f)) + (dbr:dbstruct-mtdb-set! dbstruct mtdb) + (dbr:dbstruct-tmpdb-set! dbstruct tmpdb) ;; olddb is already a (cons db path) + (dbr:dbstruct-refndb-set! dbstruct refndb) + ;; (mutex-unlock! *rundb-mutex*) + (if (and (not dbfexists) + write-access) ;; *db-write-access*) ;; did not have a prior db and do have write access + (begin + (debug:print 0 *default-log-port* "filling db " (db:dbdat-get-path tmpdb) " with data from " (db:dbdat-get-path mtdb)) + (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb refndb tmpdb)) + (debug:print 0 *default-log-port* " db, " (db:dbdat-get-path tmpdb) " already exists, not propogating data from " (db:dbdat-get-path mtdb))) + ;; (db:multi-db-sync dbstruct 'old2new)) ;; migrate data from megatest.db automatically + tmpdb)))) ;; Make the dbstruct, setup up auxillary db's and call for main db at least once ;; ;; called in http-transport and replicated in rmt.scm for *local* access. ;; -(define (db:setup run-id #!key (local #f)) - (let* ((dbdir (db:dbfile-path #f)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) - (dbstruct (make-dbr:dbstruct path: dbdir local: local))) - dbstruct)) - -;; open the local db for direct access (no server) -;; -(define (db:open-local-db-handle) +(define (db:setup #!key (areapath #f)) (or *dbstruct-db* - (let ((dbstruct (db:setup #f local: #t))) - (set! *dbstruct-db* dbstruct) - dbstruct))) - -;; Open the classic megatest.db file in toppath + (if (common:on-homehost?) + (let* ((dbstruct (make-dbr:dbstruct))) + (db:open-db dbstruct areapath: areapath) + (set! *dbstruct-db* dbstruct) + dbstruct) + (begin + (debug:print 0 *default-log-port* "ERROR: attempt to open database when not on homehost. Exiting. Homehost: " (common:get-homehost)) + (exit 1))))) + +;; Open the classic megatest.db file (defaults to open in toppath) ;; -(define (db:open-megatest-db) - (let* ((dbpath (conc *toppath* "/megatest.db")) +;; NOTE: returns a dbdat not a dbstruct! +;; +(define (db:open-megatest-db #!key (path #f)(name #f)) + (let* ((dbpath (conc (or path *toppath*) "/" (or name "megatest.db"))) (dbexists (file-exists? dbpath)) (db (db:lock-create-open dbpath (lambda (db) (db:initialize-main-db db) (db:initialize-run-id-db db)))) @@ -354,94 +324,47 @@ (cons db dbpath))) ;; sync run to disk if touched ;; (define (db:sync-touched dbstruct run-id #!key (force-sync #f)) - (let ((mtime (dbr:dbstruct-mtime dbstruct)) - (stime (dbr:dbstruct-stime dbstruct)) - (rundb (dbr:dbstruct-rundb dbstruct)) - (inmem (dbr:dbstruct-inmem dbstruct)) - (maindb (dbr:dbstruct-main dbstruct)) - (refdb (dbr:dbstruct-refdb dbstruct)) - (olddb (dbr:dbstruct-olddb dbstruct)) - ;; (runid (dbr:dbstruct-run-id dbstruct)) - ) + (let ((tmpdb (dbr:dbstruct-tmpdb dbstruct)) + (mtdb (dbr:dbstruct-mtdb dbstruct)) + (refndb (dbr:dbstruct-refndb dbstruct)) + (start-t (current-seconds))) (debug:print-info 4 *default-log-port* "Syncing for run-id: " run-id) - ;; (mutex-lock! *http-mutex*) - (if (eq? run-id 0) - ;; runid equal to 0 is main.db - (if maindb - (if (or (not (number? mtime)) - (not (number? stime)) - (> mtime stime) - force-sync) - (begin - (db:delay-if-busy maindb) - (db:delay-if-busy olddb) - (let ((num-synced (db:sync-tables (db:sync-main-list maindb) maindb olddb))) - (dbr:dbstruct-stime-set! dbstruct (current-milliseconds)) - num-synced) - 0)) - (begin - ;; this can occur when using local access (i.e. not in a server) - ;; need a flag to turn it off. - ;; - (debug:print 3 *default-log-port* "WARNING: call to sync main.db to megatest.db but main not initialized") - 0)) - ;; any other runid is a run - (if (or (not (number? mtime)) - (not (number? stime)) - (> mtime stime) - force-sync) - (begin - (db:delay-if-busy rundb) - (db:delay-if-busy olddb) - (dbr:dbstruct-stime-set! dbstruct (current-milliseconds)) - (let ((num-synced (db:sync-tables db:sync-tests-only inmem refdb rundb olddb))) - ;; (mutex-unlock! *http-mutex*) - num-synced) - (begin - ;; (mutex-unlock! *http-mutex*) - 0)))))) - -(define (db:close-main dbstruct) - (let ((maindb (dbr:dbstruct-main dbstruct))) - (if maindb - (begin - (sqlite3:finalize! (db:dbdat-get-db maindb)) - (dbr:dbstruct-main-set! dbstruct #f))))) - -(define (db:close-run-db dbstruct run-id) - (let ((rdb (db:open-rundb dbstruct run-id do-not-open: #t))) - (if (and rdb - (sqlite3:database? rdb)) - (begin - (sqlite3:finalize! rdb) - (dbr:dbstruct-localdb-set! dbstruct run-id #f) - (dbr:dbstruct-inmem-set! dbstruct #f))))) + (mutex-lock! *db-multi-sync-mutex*) + (let ((update_info (cons (if force-sync 0 *db-last-sync*) "last_update"))) + (mutex-unlock! *db-multi-sync-mutex*) + (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb refndb mtdb)) + (mutex-lock! *db-multi-sync-mutex*) + (set! *db-last-sync* start-t) + (mutex-unlock! *db-multi-sync-mutex*))) ;; close all opened run-id dbs (define (db:close-all dbstruct) - ;; finalize main.db - (db:sync-touched dbstruct 0 force-sync: #t) - ;;(common:db-block-further-queries) - ;; (mutex-lock! *db-sync-mutex*) ;; with this perhaps it isn't necessary to use the block-further-queries mechanism? - - (db:close-main dbstruct) - - (let ((locdbs (dbr:dbstruct-locdbs dbstruct))) - (if (hash-table? locdbs) - (for-each (lambda (run-id) - (db:close-run-db dbstruct run-id)) - (hash-table-keys locdbs))))) - -(define (db:open-inmem-db) - (let* ((db (sqlite3:open-database ":memory:")) - (handler (make-busy-timeout 3600))) - (sqlite3:set-busy-handler! db handler) - (db:initialize-run-id-db db) - (cons db #f))) + (if (dbr:dbstruct? dbstruct) + (begin + ;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server. + (let ((tdb (db:dbdat-get-db (dbr:dbstruct-tmpdb dbstruct))) + (mdb (db:dbdat-get-db (dbr:dbstruct-mtdb dbstruct))) + (rdb (db:dbdat-get-db (dbr:dbstruct-refndb dbstruct)))) + (if tdb (sqlite3:finalize! tdb)) + (if mdb (sqlite3:finalize! mdb)) + (if rdb (sqlite3:finalize! rdb)))))) + +;; (let ((locdbs (dbr:dbstruct-locdbs dbstruct))) +;; (if (hash-table? locdbs) +;; (for-each (lambda (run-id) +;; (db:close-run-db dbstruct run-id)) +;; (hash-table-keys locdbs))))) + +;; (define (db:open-inmem-db) +;; (let* ((db (sqlite3:open-database ":memory:")) +;; (handler (make-busy-timeout 3600))) +;; (sqlite3:set-busy-handler! db handler) +;; (db:initialize-run-id-db db) +;; (cons db #f))) ;; just tests, test_steps and test_data tables (define db:sync-tests-only (list ;; (list "strs" @@ -491,12 +414,12 @@ '("status" #f) '("type" #f)))) ;; needs db to get keys, this is for syncing all tables ;; -(define (db:sync-main-list db) - (let ((keys (db:get-keys db))) +(define (db:sync-main-list dbstruct) + (let ((keys (db:get-keys dbstruct))) (list (list "keys" '("id" #f) '("fieldname" #f) '("fieldtype" #f)) @@ -515,10 +438,14 @@ '("iterated" #f) '("avg_runtime" #f) '("avg_disk" #f) '("tags" #f) '("jobgroup" #f))))) + +(define (db:sync-all-tables-list dbstruct) + (append (db:sync-main-list dbstruct) + db:sync-tests-only)) ;; use bunch of Unix commands to try to break the lock and recreate the db ;; (define (db:move-and-recreate-db dbdat) (let* ((dbpath (db:dbdat-get-path dbdat)) @@ -593,16 +520,18 @@ #t)))))) ;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) ;; db's are dbdat's ;; -(define (db:sync-tables tbls fromdb todb . slave-dbs) - (mutex-lock! *db-sync-mutex*) +;; if last-update specified ("field-name" . time-in-seconds) +;; then sync only records where field-name >= time-in-seconds +;; IFF field-name exists +;; +(define (db:sync-tables tbls last-update fromdb todb . slave-dbs) (handle-exceptions exn (begin - (mutex-unlock! *db-sync-mutex*) (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.") (print-call-chain (current-error-port)) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (print "exn=" (condition->list exn)) (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) @@ -615,19 +544,11 @@ (debug:print-error 0 *default-log-port* "Failed to rebuild " dbpath ", exiting now.") (exit))))) (cons todb slave-dbs)) 0) -;; (if *server-run* ;; we are inside a server, throw a sync-failed error -;; (signal (make-composite-condition -;; (make-property-condition 'sync-failed 'message "db:sync-tables failed in a server context."))) -;; 0)) ;; return zero for num synced - - ;; (set! *time-to-exit* #t) ;; let watch dog know that it is time to die. - ;; (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down") - ;; (portlogger:open-run-close portlogger:set-port port "released") - ;; (exit 1))) + ;; this is the work to be done (cond ((not fromdb) (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with fromdb missing") -1) ((not todb) (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with todb missing") -2) ((not (sqlite3:database? (db:dbdat-get-db fromdb))) (debug:print-error 0 *default-log-port* "db:sync-tables called with fromdb not a database " fromdb) -3) @@ -641,15 +562,26 @@ (tot-count 0)) (for-each ;; table (lambda (tabledat) (let* ((tablename (car tabledat)) (fields (cdr tabledat)) + (use-last-update (if last-update + (if (pair? last-update) + (member (car last-update) ;; last-update field name + (map car fields)) + (begin + (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair, received: " last-update) ;; found in fields + #f)) + #f)) (num-fields (length fields)) (field->num (make-hash-table)) (num->field (apply vector (map car fields))) (full-sel (conc "SELECT " (string-intersperse (map car fields) ",") - " FROM " tablename ";")) + " FROM " tablename (if use-last-update ;; apply last-update criteria + (conc " " (car last-update) ">=" (cdr last-update)) + "") + ";")) (full-ins (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) " " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );")) (fromdat '()) (fromdats '()) (totrecords 0) @@ -722,25 +654,24 @@ fromdats) (sqlite3:finalize! stmth))) (append (list todb) slave-dbs)))) tbls) (let* ((runtime (- (current-milliseconds) start-time)) - (should-print (common:low-noise-print 120 "db sync" (> runtime 500)))) ;; low and high sync times treated as separate. + (should-print (or (debug:debug-mode 12) + (common:low-noise-print 120 "db sync" (> runtime 500))))) ;; low and high sync times treated as separate. (if should-print (debug:print 3 *default-log-port* "INFO: db sync, total run time " runtime " ms")) (for-each (lambda (dat) (let ((tblname (car dat)) (count (cdr dat))) (set! tot-count (+ tot-count count)) (if (> count 0) (if should-print (debug:print 0 *default-log-port* (format #f " ~10a ~5a" tblname count)))))) (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b)))))) - tot-count))) - (mutex-unlock! *db-sync-mutex*))) + tot-count))))) - -(define (db:patch-schema-rundb run-id frundb) +(define (db:patch-schema-rundb frundb) ;; ;; remove this some time after September 2016 (added in version v1.6031 ;; (for-each (lambda (table-name) @@ -764,19 +695,19 @@ WHERE id=old.id; END;")) ) '("tests" "test_steps" "test_data"))) -(define (db:patch-schema-maindb run-id maindb) +(define (db:patch-schema-maindb maindb) ;; ;; remove all these some time after september 2016 (added in v1.6031 ;; (handle-exceptions exn (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 *default-log-port* "Column last_update already added to runs table") - (db:general-sqlite-error-dump exn "alter table runs ..." run-id "none")) + (db:general-sqlite-error-dump exn "alter table runs ..." #f "none")) (sqlite3:execute maindb "ALTER TABLE runs ADD COLUMN last_update INTEGER DEFAULT 0")) ;; these schema changes don't need exception handling (sqlite3:execute @@ -798,147 +729,232 @@ FOR EACH ROW BEGIN UPDATE run_stats SET last_update=(strftime('%s','now')) WHERE id=old.id; END;")) + +(define *global-db-store* (make-hash-table)) + +(define (db:get-access-mode) + (if (args:get-arg "-use-db-cache") 'cached 'rmt)) + +;; Add db direct +;; +(define (db:dispatch-query access-mode rmt-cmd db-cmd . params) + (if (eq? access-mode 'cached) + (print "not doing cached calls right now")) +;; (apply db:call-with-cached-db db-cmd params) + (apply rmt-cmd params)) +;;) + +;; return the target db handle so it can be used +;; +(define (db:cache-for-read-only source target #!key (use-last-update #f)) + (if (and (hash-table-ref/default *global-db-store* target #f) + (>= (file-modification-time target)(file-modification-time source))) + (hash-table-ref *global-db-store* target) + (let* ((toppath (launch:setup)) + (targ-db-last-mod (if (file-exists? target) + (file-modification-time target) + 0)) + (cache-db (or (hash-table-ref/default *global-db-store* target #f) + (db:open-megatest-db path: target))) + (source-db (db:open-megatest-db path: source)) + (curr-time (current-seconds)) + (res '()) + (last-update (if use-last-update (cons "last_update" targ-db-last-mod) #f))) + (db:sync-tables (db:sync-main-list source-db) last-update source-db cache-db) + (db:sync-tables db:sync-tests-only last-update source-db cache-db) + (hash-table-set! *global-db-store* target cache-db) + cache-db))) + +;; call a proc with a cached db +;; +(define (db:call-with-cached-db proc . params) + ;; first cache the db in /tmp + (let* ((cname-part (conc "megatest_cache/" (common:get-testsuite-name))) + (fname (conc (common:get-area-path-signature) ".db")) + (cache-dir (common:get-create-writeable-dir + (list (conc "/tmp/" (current-user-name) "/" cname-part) + (conc "/tmp/" (current-user-name) "-" cname-part) + (conc "/tmp/" (current-user-name) "_" cname-part)))) + (megatest-db (conc *toppath* "/megatest.db"))) + ;; (debug:print-info 0 *default-log-port* "Using cache dir " cache-dir) + (if (not cache-dir) + (begin + (debug:print 0 *default-log-port* "ERROR: Failed to find an area to write the cache db") + (exit 1)) + (let* ((th1 (make-thread + (lambda () + (if (and (file-exists? megatest-db) + (file-write-access? megatest-db)) + (begin + (common:sync-to-megatest.db 'timestamps) ;; internally mutexes on *db-local-sync* + (debug:print-info 2 *default-log-port* "Done syncing to megatest.db")))) + "call-with-cached-db sync-to-megatest.db")) + (cache-db (db:cache-for-read-only + megatest-db + (conc cache-dir "/" fname) + use-last-update: #t))) + (thread-start! th1) + (apply proc cache-db params) + )))) ;; options: ;; ;; 'killservers - kills all servers ;; 'dejunk - removes junk records ;; 'adj-testids - move test-ids into correct ranges ;; 'old2new - sync megatest.db records to .db/{main,1,2 ...}.db ;; 'new2old - sync .db/{main,1,2,3 ...}.db to megatest.db ;; 'closeall - close all opened dbs +;; 'schema - attempt to apply schema changes ;; ;; run-ids: '(1 2 3 ...) or #f (for all) ;; -(define (db:multi-db-sync run-ids . options) - (let* ((toppath (launch:setup)) - (dbstruct (if toppath (make-dbr:dbstruct path: toppath) #f)) - (mtdb (if toppath (db:open-megatest-db))) - (allow-cleanup (if run-ids #f #t)) - (run-ids (if run-ids - run-ids - (if toppath (begin - (db:delay-if-busy mtdb) - (db:get-all-run-ids mtdb))))) - (tdbdat (tasks:open-db)) - (servers (tasks:get-all-servers (db:delay-if-busy tdbdat)))) - - ;; kill servers - (if (member 'killservers options) - (for-each - (lambda (server) - (tasks:server-delete-record (db:delay-if-busy tdbdat) (vector-ref server 0) "dbmigration") - (tasks:kill-server (vector-ref server 2)(vector-ref server 1))) - servers)) - - ;; clear out junk records - ;; - (if (member 'dejunk options) - (begin - (db:delay-if-busy mtdb) - (db:clean-up mtdb))) - - ;; adjust test-ids to fit into proper range - ;; - (if (member 'adj-testids options) - (begin - (db:delay-if-busy mtdb) - (db:prep-megatest.db-for-migration mtdb))) - - ;; sync runs, test_meta etc. - ;; - (if (member 'old2new options) - (begin - (db:sync-tables (db:sync-main-list mtdb) mtdb (db:get-db dbstruct #f)) - (for-each - (lambda (run-id) - (db:delay-if-busy mtdb) - (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id)) - (dbstruct (if toppath (make-dbr:dbstruct path: toppath local: #t) #f))) - (debug:print 0 *default-log-port* "INFO: Propagating " (length testrecs) " records for run-id=" run-id " to run specific db") - (db:replace-test-records dbstruct run-id testrecs) - (sqlite3:finalize! (db:dbdat-get-db (dbr:dbstruct-rundb dbstruct))))) - run-ids))) - - ;; now ensure all newdb data are synced to megatest.db - ;; do not use the run-ids list passed in to the function - ;; - (if (member 'new2old options) - (let* ((maindb (make-dbr:dbstruct path: toppath local: #t)) - (src-run-ids (if run-ids run-ids (db:get-all-run-ids (db:dbdat-get-db (db:get-db maindb 0))))) - (all-run-ids (sort (delete-duplicates (cons 0 src-run-ids)) <)) - (count 1) - (total (length all-run-ids)) - (dead-runs '())) - ;; first fix schema if needed - (map - (lambda (th) - (thread-join! th)) - (map - (lambda (run-id) - (thread-start! - (make-thread - (lambda () - (let* ((fromdb (if toppath (make-dbr:dbstruct path: toppath local: #t) #f)) - (frundb (db:dbdat-get-db (db:get-db fromdb run-id)))) - (if (eq? run-id 0) - (let ((maindb (db:dbdat-get-db (db:get-db fromdb #f)))) - (db:patch-schema-maindb run-id maindb)) - (db:patch-schema-rundb run-id frundb))) - (set! count (+ count 1)) - (debug:print 0 *default-log-port* "Finished patching schema for " (if (eq? run-id 0) " main.db " (conc run-id ".db")) ", " count " of " total))))) - all-run-ids)) - ;; Then sync and fix db's - (set! count 0) - (process-fork - (lambda () - (map - (lambda (th) - (thread-join! th)) - (map - (lambda (run-id) - (thread-start! - (make-thread - (lambda () - (let* ((fromdb (if toppath (make-dbr:dbstruct path: toppath local: #t) #f)) - (frundb (db:dbdat-get-db (db:get-db fromdb run-id)))) - (if (eq? run-id 0) - (let ((maindb (db:dbdat-get-db (db:get-db fromdb #f)))) - (db:sync-tables (db:sync-main-list dbstruct) (db:get-db fromdb #f) mtdb) - (set! dead-runs (db:clean-up-maindb (db:get-db fromdb #f)))) - (begin - ;; NB// must sync first to ensure deleted tests get marked as such in megatest.db - (db:sync-tables db:sync-tests-only (db:get-db fromdb run-id) mtdb) - (db:clean-up-rundb (db:get-db fromdb run-id))))) - (set! count (+ count 1)) - (debug:print 0 *default-log-port* "Finished clean up of " - (if (eq? run-id 0) - " main.db " (conc run-id ".db")) ", " count " of " total))))) - all-run-ids)))) - - ;; removed deleted runs - (let ((dbdir (tasks:get-task-db-path))) - (for-each (lambda (run-id) - (let ((fullname (conc dbdir "/" run-id ".db"))) - (if (file-exists? fullname) - (begin - (debug:print 0 *default-log-port* "Removing database file for deleted run " fullname) - (delete-file fullname))))) - dead-runs)))) - - ;; (db:close-all dbstruct) - ;; (sqlite3:finalize! mdb) - )) +(define (db:multi-db-sync dbstruct . options) + (if (not (launch:setup)) + (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.") + (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) + (tmpdb (dbr:dbstruct-tmpdb dbstruct)) + (refndb (dbr:dbstruct-refndb dbstruct)) + (allow-cleanup #t) ;; (if run-ids #f #t)) + (tdbdat (tasks:open-db)) + (servers (tasks:get-all-servers (db:delay-if-busy tdbdat))) + (data-synced 0)) ;; count of changed records (I hope) + + ;; kill servers + (if (member 'killservers options) + (for-each + (lambda (server) + (tasks:server-delete-record (db:delay-if-busy tdbdat) (vector-ref server 0) "dbmigration") + (tasks:kill-server (vector-ref server 2)(vector-ref server 1))) + servers)) + + ;; clear out junk records + ;; + (if (member 'dejunk options) + (begin + (db:delay-if-busy mtdb) ;; ok to delay on mtdb + (db:clean-up mtdb) + (db:clean-up tmpdb) + (db:clean-up refndb))) + + ;; adjust test-ids to fit into proper range + ;; + ;; (if (member 'adj-testids options) + ;; (begin + ;; (db:delay-if-busy mtdb) + ;; (db:prep-megatest.db-for-migration mtdb))) + + ;; sync runs, test_meta etc. + ;; + (if (member 'old2new options) + ;; (begin + (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb)) + ;; (db:sync-main-list mtdb) mtdb (db:get-db dbstruct #f)) +;; (for-each +;; (lambda (run-id) +;; (db:delay-if-busy mtdb) +;; (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id))) +;; ;; (dbstruct (if toppath (make-dbr:dbstruct path: toppath local: #t) #f))) +;; (debug:print 0 *default-log-port* "INFO: Propagating " (length testrecs) " records for run-id=" run-id " to run specific db") +;; (db:replace-test-records dbstruct run-id testrecs) +;; (sqlite3:finalize! (db:dbdat-get-db (dbr:dbstruct-rundb dbstruct))))) +;; run-ids))) + + ;; now ensure all newdb data are synced to megatest.db + ;; do not use the run-ids list passed in to the function + ;; + (if (member 'new2old options) + (set! data-synced + (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb) + data-synced))) + + + (if (member 'fixschema options) + (begin + (db:patch-schema-maindb (db:dbdat-get-db mtdb)) + (db:patch-schema-maindb (db:dbdat-get-db tmpdb)) + (db:patch-schema-maindb (db:dbdat-get-db refndb)) + (db:patch-schema-rundb (db:dbdat-get-db mtdb)) + (db:patch-schema-rundb (db:dbdat-get-db tmpdb)) + (db:patch-schema-rundb (db:dbdat-get-db refndb)))) + + ;; (let* ((maindb (make-dbr:dbstruct path: toppath local: #t)) + ;; (src-run-ids (if run-ids run-ids (db:get-all-run-ids (db:dbdat-get-db (db:get-db maindb 0))))) + ;; (all-run-ids (sort (delete-duplicates (cons 0 src-run-ids)) <)) + ;; (count 1) + ;; (total (length all-run-ids)) + ;; (dead-runs '())) + ;; ;; first fix schema if needed + ;; (map + ;; (lambda (th) + ;; (thread-join! th)) + ;; (map + ;; (lambda (run-id) + ;; (thread-start! + ;; (make-thread + ;; (lambda () + ;; (let* ((fromdb (if toppath (make-dbr:dbstruct path: toppath local: #t) #f)) +;; (if (member 'schema options) + ;; (if (eq? run-id 0) + ;; (let ((maindb (db:dbdat-get-db (db:get-db fromdb #f)))) + ;; (db:patch-schema-maindb run-id maindb)) + ;; (db:patch-schema-rundb run-id frundb))) + ;; (set! count (+ count 1)) + ;; (debug:print 0 *default-log-port* "Finished patching schema for " (if (eq? run-id 0) " main.db " (conc run-id ".db")) ", " count " of " total))))) + ;; all-run-ids)) + ;; ;; Then sync and fix db's + ;; (set! count 0) + ;; (process-fork + ;; (lambda () + ;; (map + ;; (lambda (th) + ;; (thread-join! th)) + ;; (map + ;; (lambda (run-id) + ;; (thread-start! + ;; (make-thread + ;; (lambda () + ;; (let* ((fromdb (if toppath (make-dbr:dbstruct path: toppath local: #t) #f)) + ;; (frundb (db:dbdat-get-db (db:get-db fromdb run-id)))) + ;; (if (eq? run-id 0) + ;; (let ((maindb (db:dbdat-get-db (db:get-db fromdb #f)))) +;; (db:sync-tables (db:sync-main-list dbstruct) #f (db:get-db fromdb #f) mtdb) + ;; (set! dead-runs (db:clean-up-maindb (db:get-db fromdb #f)))) + ;; (begin + ;; ;; NB// must sync first to ensure deleted tests get marked as such in megatest.db +;; (db:sync-tables db:sync-tests-only #f (db:get-db fromdb run-id) mtdb) + ;; (db:clean-up-rundb (db:get-db fromdb run-id))))) + ;; (set! count (+ count 1)) + ;; (debug:print 0 *default-log-port* "Finished clean up of " + ;; (if (eq? run-id 0) + ;; " main.db " (conc run-id ".db")) ", " count " of " total))))) + ;; all-run-ids)))) + + ;; removed deleted runs +;; (let ((dbdir (tasks:get-task-db-path))) +;; (for-each (lambda (run-id) +;; (let ((fullname (conc dbdir "/" run-id ".db"))) +;; (if (file-exists? fullname) +;; (begin +;; (debug:print 0 *default-log-port* "Removing database file for deleted run " fullname) +;; (delete-file fullname))))) +;; dead-runs)))) +;; + ;; (db:close-all dbstruct) + ;; (sqlite3:finalize! mdb) + data-synced))) ;; keeping it around for debugging purposes only (define (open-run-close-no-exception-handling proc idb . params) (debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params) + (print "I don't work anymore. open-run-close-no-exception-handling needs fixing or removing...") + (exit) (if (or *db-write-access* - (not (member proc *db:all-write-procs*))) + (not #t)) ;; was: (member proc * db:all-write-procs *))) (let* ((db (cond ((pair? idb) (db:dbdat-get-db idb)) ((sqlite3:database? idb) idb) ((not idb) (debug:print-error 0 *default-log-port* "cannot open-run-close with #f anymore")) ((procedure? idb) (idb)) @@ -1316,11 +1332,11 @@ ;;====================================================================== ;; L O G G I N G D B ;;====================================================================== -(define (open-logging-db) ;; (conc *toppath* "/megatest.db") (car *configinfo*))) +(define (open-logging-db) (let* ((dbpath (conc (if *toppath* (conc *toppath* "/") "") "logging.db")) ;; fname) (dbexists (file-exists? dbpath)) (db (sqlite3:open-database dbpath)) (handler (make-busy-timeout (if (args:get-arg "-override-timeout") (string->number (args:get-arg "-override-timeout")) @@ -1372,11 +1388,11 @@ ;; ;; HOWEVER: this code in run:test seems to work fine ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) ;; (db:test-get-run_duration testdat))) ;; 600) - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (test-id run-dir uname testname item-path) (if (and (equal? uname "n/a") (equal? item-path "")) ;; this is a toplevel test ;; what to do with toplevel? call rollup? @@ -1388,11 +1404,11 @@ "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');" run-id deadtime) ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config ;; - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (test-id run-dir uname testname item-path) (if (and (equal? uname "n/a") (equal? item-path "")) ;; this is a toplevel test ;; what to do with toplevel? call rollup? @@ -1436,11 +1452,11 @@ ;; ;; HOWEVER: this code in run:test seems to work fine ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) ;; (db:test-get-run_duration testdat))) ;; 600) - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (test-id run-dir uname testname item-path) (if (and (equal? uname "n/a") (equal? item-path "")) ;; this is a toplevel test ;; what to do with toplevel? call rollup? @@ -1452,11 +1468,11 @@ "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');" run-id deadtime) ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config ;; - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (test-id run-dir uname testname item-path) (if (and (equal? uname "n/a") (equal? item-path "")) ;; this is a toplevel test ;; what to do with toplevel? call rollup? @@ -1468,11 +1484,11 @@ (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. ;; - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (let* (;; (min-incompleted (filter (lambda (x) ;; (let* ((testpath (cadr x)) ;; (tdatpath (conc testpath "/testdat.db")) ;; (dbexists (file-exists? tdatpath))) ;; (or (not dbexists) ;; if no file then something wrong - mark as incomplete @@ -1483,24 +1499,27 @@ (if (> (length all-ids) 0) (begin (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as INCOMPLETE") (sqlite3:execute db - (conc "UPDATE tests SET state='INCOMPLETE' WHERE id IN (" + (conc "UPDATE tests SET state='INCOMPLETE' WHERE run_id=? AND id IN (" (string-intersperse (map conc all-ids) ",") - ");"))))) + ");") + run-id)))) ;; Now do rollups for the toplevel tests ;; - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (for-each (lambda (toptest) (let ((test-name (list-ref toptest 3))) ;; (run-id (list-ref toptest 5))) (db:top-test-set-per-pf-counts dbstruct run-id test-name))) toplevels))) +;; BUG: Probably broken - does not explicitly use run-id in the query +;; (define (db:top-test-set-per-pf-counts dbstruct run-id test-name) (db:general-call (db:get-db dbstruct run-id) 'top-test-set-per-pf-counts (list test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name))) ;; Clean out old junk and vacuum the database @@ -1531,11 +1550,11 @@ ;; delete all runs that are state='deleted' "DELETE FROM runs WHERE state='deleted';" ;; delete empty runs "DELETE FROM runs WHERE id NOT IN (SELECT DISTINCT r.id FROM runs AS r INNER JOIN tests AS t ON t.run_id=r.id);" )))) - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (sqlite3:with-transaction db (lambda () (sqlite3:for-each-row (lambda (tot) (debug:print-info 0 *default-log-port* "Records count before clean: " tot)) @@ -1545,11 +1564,11 @@ (debug:print-info 0 *default-log-port* "Records count after clean: " tot)) count-stmt))) (map sqlite3:finalize! statements) (sqlite3:finalize! count-stmt) ;; (db:find-and-mark-incomplete db) - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (sqlite3:execute db "VACUUM;"))) ;; Clean out old junk and vacuum the database ;; ;; Ultimately do something like this: @@ -1572,11 +1591,11 @@ ;; delete all tests that belong to runs that are 'deleted' ;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");") ;; delete all tests that are 'DELETED' "DELETE FROM tests WHERE state='DELETED';" )))) - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (sqlite3:with-transaction db (lambda () (sqlite3:for-each-row (lambda (tot) (debug:print-info 0 *default-log-port* "Records count before clean: " tot)) @@ -1586,11 +1605,11 @@ (debug:print-info 0 *default-log-port* "Records count after clean: " tot)) count-stmt))) (map sqlite3:finalize! statements) (sqlite3:finalize! count-stmt) ;; (db:find-and-mark-incomplete db) - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (sqlite3:execute db "VACUUM;"))) ;; Clean out old junk and vacuum the database ;; ;; Ultimately do something like this: @@ -1619,11 +1638,11 @@ (sqlite3:for-each-row (lambda (run-id) (set! dead-runs (cons run-id dead-runs))) db "SELECT id FROM runs WHERE state='deleted';") - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (sqlite3:with-transaction db (lambda () (sqlite3:for-each-row (lambda (tot) (debug:print-info 0 *default-log-port* "Records count before clean: " tot)) @@ -1633,11 +1652,11 @@ (debug:print-info 0 *default-log-port* "Records count after clean: " tot)) count-stmt))) (map sqlite3:finalize! statements) (sqlite3:finalize! count-stmt) ;; (db:find-and-mark-incomplete db) - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (sqlite3:execute db "VACUUM;") dead-runs)) ;;====================================================================== ;; M E T A G E T A N D S E T V A R S @@ -1791,23 +1810,23 @@ (key=?str (string-intersperse (map (lambda (k)(conc k "=?")) keys) " AND "))) (debug:print 3 *default-log-port* "keys: " keys " allvals: " allvals " keyvals: " keyvals " key=?str is " key=?str) (debug:print 2 *default-log-port* "NOTE: using target " (string-intersperse (map cadr keyvals) "/") " for this run") (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" (let ((res #f)) - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time" comma keystr ") VALUES (?,?,?,?,strftime('%s','now')" comma valslots ");") allvals) - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (apply sqlite3:for-each-row (lambda (id) (set! res id)) db (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");"))) ;(debug:print 4 *default-log-port* "qry: " qry) qry) qryvals) - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (sqlite3:execute db "UPDATE runs SET state=?,status=?,event_time=strftime('%s','now') WHERE id=? AND state='deleted';" state status res) res) (begin (debug:print-error 0 *default-log-port* "Called without all necessary keys") #f)))) @@ -1856,11 +1875,11 @@ (vector header res))) ;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using inmem db) ;; (define (db:get-changed-run-ids since-time) - (let* ((dbdir (db:dbfile-path #f)) ;; (configf:lookup *configdat* "setup" "dbdir")) + (let* ((dbdir (db:dbfile-path)) ;; (configf:lookup *configdat* "setup" "dbdir")) (alldbs (glob (conc dbdir "/[0-9]*.db"))) (changed (filter (lambda (dbfile) (> (file-modification-time dbfile) since-time)) alldbs))) (delete-duplicates @@ -1995,11 +2014,11 @@ (totals (make-hash-table)) (curr (make-hash-table)) (res '()) (runs-info '())) ;; First get all the runname/run-ids - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (run-id runname) (set! runs-info (cons (list run-id runname) runs-info))) db "SELECT id,runname FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;") ;; If you change this to the more logical ASC please adjust calls to db:get-run-stats @@ -2092,11 +2111,11 @@ (remfields (list "id" "runname" "state" "status" "owner" "event_time")) (header (append keys remfields)) (keystr (conc (keys->keystr keys) "," (string-intersperse remfields ",")))) (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (a . x) (set! res (apply vector a x))) db (conc "SELECT " keystr " FROM runs WHERE id=? AND state != 'deleted';") @@ -2120,16 +2139,19 @@ ;; First set any related tests to DELETED (let* ((rdbdat (db:get-db dbstruct run-id)) (rdb (db:dbdat-get-db rdbdat)) (dbdat (db:get-db dbstruct #f)) (db (db:dbdat-get-db dbdat))) - (db:delay-if-busy rdbdat) - (sqlite3:execute rdb "UPDATE tests SET state='DELETED',comment='';") - (sqlite3:execute rdb "DELETE FROM test_steps;") - (sqlite3:execute rdb "DELETE FROM test_data;") - (db:delay-if-busy dbdat) - (sqlite3:execute db "UPDATE runs SET state='deleted',comment='' WHERE id=?;" run-id))) + ;; (db:delay-if-busy rdbdat) + (sqlite3:with-transaction + db + (lambda () + (sqlite3:execute rdb "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id) + (sqlite3:execute rdb "DELETE FROM test_data WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id) + (sqlite3:execute rdb "UPDATE tests SET state='DELETED',comment='' WHERE run_id=?;" run-id) + ;; (db:delay-if-busy dbdat) + (sqlite3:execute db "UPDATE runs SET state='deleted',comment='' WHERE id=?;" run-id))))) (define (db:update-run-event_time dbstruct run-id) (db:with-db dbstruct #f @@ -2153,11 +2175,11 @@ (debug:print-info 1 *default-log-port* "" newlockval " run number " run-id))))) (define (db:set-run-status dbstruct run-id status msg) (let* ((dbdat (db:get-db dbstruct #f)) (db (db:dbdat-get-db dbdat))) - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (if msg (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id) (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id)))) (define (db:get-run-status dbstruct run-id) @@ -2187,11 +2209,11 @@ (dbdat (db:get-db dbstruct #f)) (db (db:dbdat-get-db dbdat))) (for-each (lambda (key) (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (key-val) (set! res (cons (list key key-val) res))) db qry run-id))) keys) @@ -2204,11 +2226,11 @@ (dbdat (db:get-db dbstruct #f)) (db (db:dbdat-get-db dbdat))) (for-each (lambda (key) (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (key-val) (set! res (cons key-val res))) db qry run-id))) keys) @@ -2413,22 +2435,25 @@ (db:general-call dbdat 'delete-test-step-records (list test-id)) ;; (db:delay-if-busy) (db:general-call dbdat 'delete-test-data-records (list test-id)) (sqlite3:execute db "UPDATE tests SET state='DELETED',status='n/a',comment='' WHERE id=?;" test-id))) +;; (define (db:delete-old-deleted-test-records dbstruct) - (let ((run-ids (db:get-all-run-ids dbstruct)) + (let (;; (run-ids (db:get-all-run-ids dbstruct)) (targtime (- (current-seconds)(* 30 24 60 60)))) ;; one month in the past - (for-each - (lambda (run-id) - (db:with-db - dbstruct - run-id - #t - (lambda (db) - (sqlite3:execute db "DELETE FROM tests WHERE state='DELETED' AND event_time=) (if (>= value expected) "pass" "fail")) ((<=) (if (<= value expected) "pass" "fail")) (else (conc "ERROR: bad tol comparator " tol)))))) (debug:print 4 *default-log-port* "AFTER2: category: " category " variable: " variable " value: " value ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment) - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (sqlite3:execute db "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment,status,type) VALUES (?,?,?,?,?,?,?,?,?,?);" test-id category variable value expected tol units (if comment comment "") status type))) csvlist))) ;; This routine moved from tdb.scm, tdb:read-test-data @@ -2998,11 +3023,11 @@ ;; (define (db:read-test-data dbstruct run-id test-id categorypatt) (let* ((dbdat (db:get-db dbstruct run-id)) (db (db:dbdat-get-db dbdat)) (res '())) - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (id test_id category variable value expected tol units comment status type) (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) db "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt) @@ -3092,52 +3117,146 @@ (base64:base64-decode (string-substitute (regexp "_") "=" msg #t))) (lambda ()(deserialize))) (begin - (debug:print-error 0 *default-log-port* "reception failed. Received " msg " but cannot translate it.") + (debug:print-error 0 *default-log-port* "reception failed. Received \"" msg "\" but cannot translate it.") + (print-call-chain (current-error-port)) msg))) ;; crude reply for when things go awry ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize)))) (else msg))) ;; rpc + +;; This is to be the big daddy call (define (db:test-set-status-state dbstruct run-id test-id status state msg) (let ((dbdat (db:get-db dbstruct run-id))) (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) (db:general-call dbdat 'set-test-start-time (list test-id))) - (if msg - (db:general-call dbdat 'state-status-msg (list state status msg test-id)) - (db:general-call dbdat 'state-status (list state status test-id))) - (mt:process-triggers run-id test-id state status))) + ;; (if msg + ;; (db:general-call dbdat 'state-status-msg (list state status msg test-id)) + ;; (db:general-call dbdat 'state-status (list state status test-id))) + (db:set-state-status-and-roll-up-items dbstruct run-id test-id #f state status msg) + ;; process the test_data table + (if (and test-id state status (equal? status "AUTO")) + (db:test-data-rollup dbstruct run-id test-id status)) + (mt:process-triggers run-id test-id state status))) + +;; state is the priority rollup of all states +;; status is the priority rollup of all completed statesfu +;; +;; if test-name is an integer work off that instead of test-name test-path +;; +(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) + ;; establish info on incoming test followed by info on top level test + (let* ((db (db:dbdat-get-db (dbr:dbstruct-tmpdb dbstruct))) + (testdat (if (number? test-name) + (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id + (db:get-test-info dbstruct run-id test-name item-path))) + (test-id (db:test-get-id testdat)) + (test-name (if (number? test-name) + (db:test-get-testname testdat) + test-name)) + (item-path (db:test-get-item-path testdat)) + (tl-testdat (db:get-test-info dbstruct run-id test-name "")) + (tl-test-id (db:test-get-id tl-testdat))) + (sqlite3:with-transaction + db + (lambda () + (db:test-set-state-status-by-id dbstruct run-id test-id state status comment) + (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item + (let* ((state-status-counts (db:get-all-state-status-counts-for-test db run-id test-name item-path)) ;; item-path is used to exclude current state/status of THIS test + (running (length (filter (lambda (x) + (member (dbr:counts-state x) *common:running-states*)) + state-status-counts))) + (bad-not-started (length (filter (lambda (x) + (and (equal? (dbr:counts-state x) "NOT_STARTED") + (not (member (dbr:counts-status x) + *common:not-started-ok-statuses*)))) + state-status-counts))) + (all-curr-states (common:special-sort ;; worst -> best (sort of) + (delete-duplicates + (cons state (map dbr:counts-state state-status-counts))) + *common:std-states* >)) + (all-curr-statuses (common:special-sort ;; worst -> best + (delete-duplicates + (cons status (map dbr:counts-status state-status-counts))) + *common:std-statuses* >)) + (newstate (if (> running 0) + "RUNNING" + (if (> bad-not-started 0) + "COMPLETED" + (car all-curr-states)))) + (newstatus (if (> bad-not-started 0) + "CHECK" + (car all-curr-statuses)))) + ;; (print "Setting toplevel to: " newstate "/" newstatus) + (db:test-set-state-status-by-id dbstruct run-id tl-test-id newstate newstatus #f))))))) + +(define db:roll-up-pass-fail-counts db:set-state-status-and-roll-up-items) ;; call with state = #f to roll up with out accounting for state/status of this item ;; -(define (db:roll-up-pass-fail-counts dbstruct run-id test-name item-path state status) - (if (not (equal? item-path "")) - (let ((dbdat (db:get-db dbstruct run-id))) - ;; (db (db:dbdat-get-db dbdat))) - (db:general-call dbdat 'update-pass-fail-counts (list test-name test-name test-name)) - (db:top-test-set-per-pf-counts dbstruct run-id test-name)))) - -;; (case (string->symbol status) -;; ((RUNNING) (db:general-call dbdat 'top-test-set-running (list test-name))) -;; ((LAUNCHED) (db:general-call dbdat 'top-test-set (list "LAUNCHED" test-name))) -;; ((ABORT INCOMPLETE) (db:general-call dbdat 'top-test-set (list status test-name)))) - -;; (if (or (not state) -;; (not (equal? item-path ""))) -;; ;; just do a rollup -;; (begin -;; (db:top-test-set-per-pf-counts dbdat run-id test-name) -;; #f) -;; (begin -;; ;; NOTE: No else clause needed for this case -;; (case (string->symbol status) -;; ((RUNNING) (db:general-call dbdat 'top-test-set-running (list test-name))) -;; ((LAUNCHED) (db:general-call dbdat 'top-test-set (list "LAUNCHED" test-name))) -;; ((ABORT INCOMPLETE) (db:general-call dbdat 'top-test-set (list status test-name)))) -;; #f) -;; ))) +;; (define (db:roll-up-pass-fail-counts dbstruct run-id test-name item-path state status) +;; (if (not (equal? item-path "")) ;; if asked to do this for a specific item then do an incremental update +;; (let* ((dbdat (db:get-db dbstruct run-id)) +;; (toptestdat (db:get-test-info dbstruct run-id test-name item-path)) +;; (currtopstate (db:test-get-state toptestdat)) +;; (currtopstatus (db:test-get-status toptestdat)) +;; (nextss (common:apply-state-status currtopstate currtopstatus state status)) +;; (newtopstate (car nextss)) ;; #f or a symbol +;; (newtopstatus (cdr nextss))) ;; #f or a symbol +;; (if (not newtopstate) ;; need to calculate it +;; +;; ;; We rely on the toplevel to track status as state varies. I.e. preserve an ABORT +;; +;; +;; ;; (db (db:dbdat-get-db dbdat))) +;; (db:general-call dbdat 'update-pass-fail-counts (list test-name test-name test-name)) +;; (db:top-test-set-per-pf-counts dbstruct run-id test-name)))) +;; +;; ;; (case (string->symbol status) +;; ;; ((RUNNING) (db:general-call dbdat 'top-test-set-running (list test-name))) +;; ;; ((LAUNCHED) (db:general-call dbdat 'top-test-set (list "LAUNCHED" test-name))) +;; ;; ((ABORT INCOMPLETE) (db:general-call dbdat 'top-test-set (list status test-name)))) +;; +;; ;; (if (or (not state) +;; ;; (not (equal? item-path ""))) +;; ;; ;; just do a rollup +;; ;; (begin +;; ;; (db:top-test-set-per-pf-counts dbdat run-id test-name) +;; ;; #f) +;; ;; (begin +;; ;; ;; NOTE: No else clause needed for this case +;; ;; (case (string->symbol status) +;; ;; ((RUNNING) (db:general-call dbdat 'top-test-set-running (list test-name))) +;; ;; ((LAUNCHED) (db:general-call dbdat 'top-test-set (list "LAUNCHED" test-name))) +;; ;; ((ABORT INCOMPLETE) (db:general-call dbdat 'top-test-set (list status test-name)))) +;; ;; #f) +;; ;; ))) + +(define (db:get-all-state-status-counts-for-test db run-id test-name item-path) + (sqlite3:map-row + (lambda (state status count) + (make-dbr:counts state: state status: status count: count)) + db + "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND item_path !=? GROUP BY state,status;" + run-id test-name item-path)) + + +(define (db:get-all-item-states db run-id test-name) + (sqlite3:map-row + (lambda (a) a) + db + "SELECT DISTINCT state FROM tests WHERE item_path != '' AND state != 'DELETED' AND run_id=? AND testname=?" + run-id test-name)) + +(define (db:get-all-item-statuses db run-id test-name) + (sqlite3:map-row + (lambda (a) a) + db + "SELECT DISTINCT status FROM tests WHERE item_path != '' AND state != 'DELETED' AND state='COMPLETED' AND run_id=? AND testname=?" + run-id test-name)) (define (db:test-get-logfile-info dbstruct run-id test-name) (db:with-db dbstruct run-id @@ -3152,12 +3271,12 @@ (set! res (list path final_logf)) (if (directory? path) (debug:print 2 *default-log-port* "Found path: " path) (debug:print 2 *default-log-port* "No such path: " path))) ;; ) db - "SELECT rundir,final_logf FROM tests WHERE testname=? AND item_path='';" - test-name) + "SELECT rundir,final_logf FROM tests WHERE testname=? AND item_path='' AND run_id=?;" + test-name run-id) res)))) ;;====================================================================== ;; A G R E G A T E D T R A N S A C T I O N D B W R I T E S ;;====================================================================== @@ -3186,25 +3305,25 @@ ELSE status END WHERE id=?;") ;; DONE '(test-set-log "UPDATE tests SET final_logf=? WHERE id=?;") ;; DONE ;; '(test-set-rundir-by-test-id "UPDATE tests SET rundir=? WHERE id=?") ;; DONE ;; '(test-set-rundir "UPDATE tests SET rundir=? AND testname=? AND item_path=?;") ;; DONE - '(test-set-rundir-shortdir "UPDATE tests SET rundir=?,shortdir=? WHERE testname=? AND item_path=?;") + '(test-set-rundir-shortdir "UPDATE tests SET rundir=?,shortdir=? WHERE testname=? AND item_path=? AND run_id=?;") ;; BROKEN!!! NEEDS run-id '(delete-tests-in-state ;; "DELETE FROM tests WHERE state=?;") ;; DONE "UPDATE tests SET state='DELETED' WHERE state=?") '(tests:test-set-toplog "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';") '(update-cpuload-diskfree "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;") ;; DONE '(update-uname-host "UPDATE tests SET uname=?,host=? WHERE id=?;") ;; DONE + '(update-test-rundat "INSERT INTO test_rundat (test_id,update_time,cpuload,diskfree,diskusage,run_duration) VALUES (?,?,?,?,?,?);") '(update-test-state "UPDATE tests SET state=? WHERE state=? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") '(update-test-status "UPDATE tests SET status=? WHERE status like ? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") ;; stuff for roll-up-pass-fail-counts '(update-pass-fail-counts "UPDATE tests SET fail_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('FAIL','CHECK','INCOMPLETE','ABORT')), pass_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('PASS','WARN','WAIVED')) - WHERE testname=? AND item_path='';") ;; DONE - '(top-test-set "UPDATE tests SET state=? WHERE testname=? AND item_path='';") ;; DONE - '(top-test-set-running "UPDATE tests SET state='RUNNING' WHERE testname=? AND item_path='';") ;; DONE + WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE ;; BROKEN!!! NEEDS run-id + '(top-test-set-running "UPDATE tests SET state='RUNNING' WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE ;; BROKEN!!! NEEDS run-id ;; Might be the following top-test-set-per-pf-counts query could be better based off of something like this: ;; ;; select state,status,count(state) from tests where run_id=59 AND testname='runfirst' group by state,status; @@ -3290,11 +3409,11 @@ AND item_path != '' AND state = 'COMPLETED' AND status = 'PASS') > 0 THEN 'PASS' WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' ELSE 'UNKNOWN' END - WHERE testname=? AND item_path='';") ;; DONE + WHERE testname=? AND item_path='';") ;; DONE ;; BROKEN!!! NEEDS run-id ;; STEPS '(delete-test-step-records "UPDATE test_steps SET status='DELETED' WHERE test_id=?;") '(delete-test-data-records "UPDATE test_data SET status='DELETED' WHERE test_id=?;") ;; using status since no state field )) @@ -3312,16 +3431,16 @@ sync set-verbosity killserver )) -(define (db:login dbstruct calling-path calling-version run-id client-signature) +(define (db:login dbstruct calling-path calling-version client-signature) (cond ((not (equal? calling-path *toppath*)) (list #f "Login failed due to mismatch paths: " calling-path ", " *toppath*)) - ((not (equal? *run-id* run-id)) - (list #f "Login failed due to mismatch run-id: " run-id ", " *run-id*)) + ;; ((not (equal? *run-id* run-id)) + ;; (list #f "Login failed due to mismatch run-id: " run-id ", " *run-id*)) ((not (equal? megatest-version calling-version)) (list #f "Login failed due to mismatch megatest version: " calling-version ", " megatest-version)) (else (hash-table-set! *logged-in-clients* client-signature (current-seconds)) '(#t "successful login")))) @@ -3330,11 +3449,11 @@ (let ((query (let ((q (alist-ref (if (string? stmtname) (string->symbol stmtname) stmtname) db:queries))) (if q (car q) #f)))) - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (apply sqlite3:execute (db:dbdat-get-db dbdat) query params) #t)) ;; get a summary of state and status counts to calculate a rollup ;; @@ -3382,17 +3501,17 @@ ;; Run this remotely!! ;; (define (db:get-matching-previous-test-run-records dbstruct run-id test-name item-path) (let* ((dbdat (db:get-db dbstruct #f)) (db (db:dbdat-get-db dbdat)) - (keys (db:get-keys db)) + (keys (db:get-keys dbstruct)) (selstr (string-intersperse keys ",")) (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")) (keyvals #f) (tests-hash (make-hash-table))) ;; first look up the key values from the run selected by run-id - (db:delay-if-busy dbdat) + ;; (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (a . b) (set! keyvals (cons a b))) db (conc "SELECT " selstr " FROM runs WHERE id=? ORDER BY event_time DESC;") run-id) DELETED debugger.scm Index: debugger.scm ================================================================== --- debugger.scm +++ /dev/null @@ -1,73 +0,0 @@ -(use iup) - -(define *debugger-control* #f) -(define *debugger-rownum* 0) -(define *debugger-matrix* #f) -(define *debugger* #f) - -(define (debugger) - (if (not *debugger*) - (set! *debugger* - (thread-start! - (make-thread - (lambda () - (show - (dialog - (let ((pause #f) - (mtrx (matrix - #:expand "YES" - #:numlin 30 - #:numcol 3 - #:numlin-visible 20 - #:numcol-visible 2 - #:alignment1 "ALEFT" - ))) - (set! pause (button "Pause" - #:action (lambda (obj) - (set! *debugger-control* (not *debugger-control*)) - (attribute-set! pause "BGCOLOR" (if *debugger-control* - "200 0 0" - "0 0 200"))))) - (set! *debugger-matrix* mtrx) - (attribute-set! mtrx "WIDTH1" "300") - (vbox - mtrx - (hbox - pause))))) - (main-loop))))))) - -(define (debugger-start #!key (start 2)) - (set! *debugger-rownum* start)) - -(define (debugger-trace-var varname varval) - (let ((oldval (attribute *debugger-matrix* (conc *debugger-rownum* ":1"))) - (newval (conc varval))) - (if (not (equal? oldval newval)) - (begin - ;; (print "DEBUG: " varname " = " newval) - (attribute-set! *debugger-matrix* (conc *debugger-rownum* ":0") varname) - (attribute-set! *debugger-matrix* (conc *debugger-rownum* ":1") (conc varval)) - ;; (attribute-set! *debugger-matrix* "FITTOTEXT" "C1") - )) - (set! *debugger-rownum* (+ *debugger-rownum* 1)))) - - -(define (debugger-pauser) - (debugger) - (attribute-set! *debugger-matrix* "REDRAW" "ALL") - (let loop () - (if *debugger-control* - (begin - (print "PAUSED!") - (thread-sleep! 1) - (loop)) - ;;(thread-sleep! 0.01) - ))) - -;; ;; lets use the debugger eh? -;; (debugger-start) -;; (debugger-trace-var "can-run-more" can-run-more) -;; (debugger-trace-var "hed" hed) -;; (debugger-trace-var "prereqs-not-met" (runs:pretty-string prereqs-not-met)) -;; (debugger-pauser) - Index: docs/inprogress/megatest-architecture-proposed-2.fig ================================================================== --- docs/inprogress/megatest-architecture-proposed-2.fig +++ docs/inprogress/megatest-architecture-proposed-2.fig @@ -374,10 +374,15 @@ 6450 7800 6450 8475 -6 4 0 0 50 -1 0 12 0.0000 4 195 1905 5400 8850 pointers to the servers\001 4 0 0 50 -1 0 12 0.0000 4 150 930 5550 7575 monitor.db\001 -6 +6 6150 2700 7500 3225 +2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 + 6150 2700 7500 2700 7500 3225 6150 3225 6150 2700 +4 0 0 50 -1 0 12 0.0000 4 180 870 6225 2925 run2/test1\001 +-6 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 0 0 1.00 60.00 120.00 0 0 1.00 60.00 120.00 1725 5025 1275 2475 2 4 0 1 0 7 50 -1 -1 0.000 0 0 7 0 0 5 @@ -384,14 +389,10 @@ 5550 4500 5550 225 225 225 225 4500 5550 4500 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 0 0 1.00 60.00 120.00 0 0 1.00 60.00 120.00 1875 7725 1875 5775 -2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 2775 5400 7125 5700 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 1425 7725 2775 7725 2775 8250 1425 8250 1425 7725 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 3375 7725 4725 7725 4725 8250 3375 8250 3375 7725 2 1 0 1 0 7 50 -1 -1 3.000 0 0 -1 1 1 2 @@ -451,16 +452,18 @@ 3975 11250 4575 12075 2 1 0 1 0 7 50 -1 -1 3.000 0 0 -1 1 1 2 0 0 1.00 60.00 120.00 0 0 1.00 60.00 120.00 2175 5025 3075 3750 -2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 - 6150 2700 7500 2700 7500 3225 6150 3225 6150 2700 2 1 0 1 0 7 50 -1 -1 3.000 0 0 -1 1 1 2 0 0 1.00 60.00 120.00 0 0 1.00 60.00 120.00 4800 6375 2850 5550 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2 + 0 0 1.00 60.00 120.00 + 0 0 1.00 60.00 120.00 + 3600 2475 7425 6525 4 0 0 50 -1 0 12 0.0000 4 195 990 1800 2625 last_update\001 4 0 0 50 -1 0 12 0.0000 4 150 690 3150 4125 main.db\001 4 0 0 50 -1 0 12 0.0000 4 195 990 4200 3600 last_update\001 4 0 0 50 -1 0 12 0.0000 4 195 330 1950 6825 http\001 4 0 0 50 -1 0 12 0.0000 4 180 870 1575 7950 run1/test1\001 @@ -478,11 +481,10 @@ 4 0 0 50 -1 0 12 0.0000 4 195 1875 14325 6600 mysql/postgres model\001 4 0 0 50 -1 0 12 0.0000 4 195 4065 600 9300 Current state, no bypass - (if we switch to rpc?)\001 4 0 0 50 -1 0 12 0.0000 4 195 1125 2175 10650 prev try RPC\001 4 0 0 50 -1 0 12 0.0000 4 165 1095 2475 11100 rmt:get-tests\001 4 0 0 50 -1 0 12 0.0000 4 195 2205 450 525 /tmp//??? /.db/*.db\001 -4 0 0 50 -1 0 12 0.0000 4 180 870 6225 2925 run2/test1\001 4 0 0 50 -1 0 12 0.0000 4 195 1065 4800 7350 megatest.db\001 4 0 0 50 -1 0 12 0.0000 4 150 1785 600 8775 Possible Future state\001 4 0 0 50 -1 0 12 0.0000 4 150 1110 8025 450 CHANGES:\001 4 0 0 50 -1 0 12 0.0000 4 195 2145 8025 705 1. http -> rcp or nanomsg\001 4 0 0 50 -1 0 12 0.0000 4 195 3330 8025 960 2. cache db moves from inmem to /tmp\001 Index: fs-transport.scm ================================================================== --- fs-transport.scm +++ fs-transport.scm @@ -35,10 +35,10 @@ ;; There is no "server" per se but a convience routine to make it non ;; necessary to be reopening the db over and over again. ;; (define (fs:process-queue-item packet) - (if (not *megatest-db*) ;; we will require that (setup-for-run) has already been called - (set! *megatest-db* (open-db))) + (if (not *dbstruct-db*) ;; we will require that (setup-for-run) has already been called + (set! *dbstruct-db* (db:setup-db))) (debug:print-info 11 *default-log-port* "fs:process-queue-item called with packet=" packet) - (db:process-queue-item *megatest-db* packet)) + (db:process-queue-item *dbstruct-db* packet)) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -26,10 +26,11 @@ (declare (uses tests)) (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. (declare (uses server)) (declare (uses daemon)) (declare (uses portlogger)) +(declare (uses rmt)) (include "common_records.scm") (include "db_records.scm") (define (http-transport:make-server-url hostport) @@ -36,11 +37,10 @@ (if (not hostport) #f (conc "http://" (car hostport) ":" (cadr hostport)))) (define *server-loop-heart-beat* (current-seconds)) -(define *heartbeat-mutex* (make-mutex)) ;;====================================================================== ;; S E R V E R ;;====================================================================== @@ -58,11 +58,10 @@ (server:get-best-guess-address hostname) #f))) (if ipstr ipstr hostn))) ;; hostname))) (start-port (portlogger:open-run-close portlogger:find-port)) (link-tree-path (configf:lookup *configdat* "setup" "linktree"))) - ;; (set! db *inmemdb*) (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port) (root-path (if link-tree-path link-tree-path (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP! (handle-directory spiffy-directory-listing) @@ -82,14 +81,14 @@ (dat ($ 'dat)) (res #f)) (cond ((equal? (uri-path (request-uri (current-request))) '(/ "api")) - (send-response body: (api:process-request *inmemdb* $) ;; the $ is the request vars proc + (send-response body: (api:process-request *dbstruct-db* $) ;; the $ is the request vars proc headers: '((content-type text/plain))) (mutex-lock! *heartbeat-mutex*) - (set! *last-db-access* (current-seconds)) + (set! *db-lastaccess* (current-seconds)) (mutex-unlock! *heartbeat-mutex*)) ((equal? (uri-path (request-uri (current-request))) '(/ "")) (send-response body: (http-transport:main-page))) ((equal? (uri-path (request-uri (current-request))) @@ -112,11 +111,11 @@ ;; This is recursively run by http-transport:run until sucessful ;; (define (http-transport:try-start-server run-id ipaddrstr portnum server-id) (let ((config-hostname (configf:lookup *configdat* "server" "hostname")) (tdbdat (tasks:open-db))) - (debug:print-info 0 *default-log-port* "http-transport:try-start-server run-id=" run-id " ipaddrsstr=" ipaddrstr " portnum=" portnum " server-id=" server-id " config-hostname=" config-hostname) + (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " run-id=" run-id " ipaddrsstr=" ipaddrstr " portnum=" portnum " server-id=" server-id " config-hostname=" config-hostname) (handle-exceptions exn (begin (print-error-message exn) (if (< portnum 64000) @@ -221,28 +220,10 @@ (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info") (exit 1)))) (res #f) (success #t) (sparams (db:obj->string params transport: 'http))) -;; (condition-case -;; handle-exceptions -;; exn -;; (if (> numretries 0) -;; (begin -;; (mutex-unlock! *http-mutex*) -;; (thread-sleep! 1) -;; (handle-exceptions -;; exn -;; (debug:print 0 *default-log-port* "WARNING: closing connections failed. Server at " fullurl " almost certainly dead") -;; (close-all-connections!)) -;; (debug:print 0 *default-log-port* "WARNING: Failed to communicate with server, trying again, numretries left: " numretries) -;; (http-transport:client-api-send-receive run-id serverdat cmd sparams numretries: (- numretries 1))) -;; (begin -;; (mutex-unlock! *http-mutex*) -;; (tasks:kill-server-run-id run-id) -;; #f)) -;; (begin (debug:print-info 11 *default-log-port* "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n") ;; set up the http-client here (max-retry-attempts 1) ;; consider all requests indempotent (retry-request? (lambda (request) @@ -261,11 +242,12 @@ exn (begin (set! success #f) (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (hash-table-delete! *runremote* run-id) + (if *runremote* + (remote-conndat-set! *runremote* #f)) ;; Killing associated server to allow clean retry.") ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? (mutex-unlock! *http-mutex*) ;;; (signal (make-composite-condition ;;; (make-property-condition 'commfail 'message "failed to connect to server"))) @@ -275,11 +257,12 @@ fullurl (list (cons 'key "thekey") (cons 'cmd cmd) (cons 'params sparams)) read-string)) - transport: 'http))) + transport: 'http) + 0)) ;; added this speculatively ;; Shouldn't this be a call to the managed call-all-connections stuff above? (close-all-connections!) (mutex-unlock! *http-mutex*) )) (time-out (lambda () @@ -293,26 +276,30 @@ (thread-terminate! th2) (debug:print-info 11 *default-log-port* "got res=" res) (if (vector? res) (if (vector-ref res 0) res - (begin ;; note: this code also called in nmsg-transport - consider consolidating it - (debug:print-error 0 *default-log-port* "error occured at server, info=" (vector-ref res 2)) - (debug:print 0 *default-log-port* " client call chain:") - (print-call-chain (current-error-port)) - (debug:print 0 *default-log-port* " server call chain:") - (pp (vector-ref res 1) (current-error-port)) - (signal (vector-ref result 0)))) + (if (debug:debug-mode 11) + (begin ;; note: this code also called in nmsg-transport - consider consolidating it + (debug:print-error 11 *default-log-port* "error occured at server, info=" (vector-ref res 2)) + (debug:print 11 *default-log-port* " client call chain:") + (print-call-chain (current-error-port)) + (debug:print 11 *default-log-port* " server call chain:") + (pp (vector-ref res 1) (current-error-port)) + (signal (vector-ref res 0))) + res)) (signal (make-composite-condition (make-property-condition 'timeout 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server"))))))) ;; careful closing of connections stored in *runremote* ;; (define (http-transport:close-connections run-id) - (let* ((server-dat (hash-table-ref/default *runremote* run-id #f))) + (let* ((server-dat (if *runremote* + (remote-conndat *runremote*) + #f))) ;; (hash-table-ref/default *runremote* run-id #f))) (if (vector? server-dat) (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) (close-connection! api-dat) #t) #f))) @@ -348,11 +335,11 @@ ;; (define (http-transport:client-connect iface port) (let* ((api-url (conc "http://" iface ":" port "/api")) (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) (api-req (make-request method: 'POST uri: api-uri)) - (server-dat (vector iface port api-uri api-url api-req (current-seconds)))) + (server-dat (vector iface port api-uri api-url api-req (current-seconds) 'http))) server-dat)) ;; run http-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; @@ -373,11 +360,13 @@ (set! sdat *server-info*) (mutex-unlock! *heartbeat-mutex*) (if (and sdat (not changed) (> (- (current-seconds) start-time) 2)) - sdat + (begin + (debug:print-info 0 *default-log-port* "Received server alive signature") + sdat) (begin (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) (sleep 4) (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes (begin @@ -388,62 +377,46 @@ (equal? sdat last-sdat) sdat))))))) (iface (car server-info)) (port (cadr server-info)) (last-access 0) - (server-timeout (server:get-timeout))) + (server-timeout (server:get-timeout)) + (server-going #f)) (let loop ((count 0) (server-state 'available) - (bad-sync-count 0)) - - ;; Use this opportunity to sync the inmemdb to db - (if *inmemdb* - (let ((start-time (current-milliseconds)) - (sync-time #f) - (rem-time #f)) - ;; inmemdb is a dbstruct - (condition-case - (db:sync-touched *inmemdb* *run-id* force-sync: #t) - ((sync-failed)(cond - ((> bad-sync-count 10) ;; time to give up - (http-transport:server-shutdown server-id port)) - (else ;; (> bad-sync-count 0) ;; we've had a fail or two, delay and loop - (thread-sleep! 5) - (loop count server-state (+ bad-sync-count 1))))) - ((exn) - (debug:print-error 0 *default-log-port* "error from sync code other than 'sync-failed. Attempting to gracefully shutdown the server") - (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running crashed") - (exit))) - (set! sync-time (- (current-milliseconds) start-time)) - (set! rem-time (quotient (- 4000 sync-time) 1000)) - (debug:print 4 *default-log-port* "SYNC: time= " sync-time ", rem-time=" rem-time) - - (if (and (<= rem-time 4) - (> rem-time 0)) - (thread-sleep! rem-time) - (thread-sleep! 4))) ;; fallback for if the math is changed ... - - ;; - ;; no *inmemdb* yet, set running after our first pass through and start the db - ;; - (if (eq? server-state 'available) - (let ((new-server-id (tasks:server-am-i-the-server? (db:delay-if-busy tdbdat) run-id))) ;; try to ensure no double registering of servers - (if (equal? new-server-id server-id) - (begin - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep") - (thread-sleep! 0.5) ;; give some margin for queries to complete before switching from file based access to server based access - (set! *inmemdb* (db:setup run-id)) - ;; force initialization - ;; (db:get-db *inmemdb* #t) - (db:get-db *inmemdb* run-id) - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running")) - (begin ;; gotta exit nicely - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision") - (http-transport:server-shutdown server-id port)))))) + (bad-sync-count 0) + (start-time (current-milliseconds))) + + ;; Use this opportunity to sync the tmp db to megatest.db + (if (not server-going) ;; *dbstruct-db* + ;; Removed code is pasted below (keeping it around until we are clear it is not needed). + ;; no *dbstruct-db* yet, set running after our first pass through and start the db + (if (eq? server-state 'available) + (let ((new-server-id (tasks:server-am-i-the-server? (db:delay-if-busy tdbdat) run-id))) ;; try to ensure no double registering of servers + (if (equal? new-server-id server-id) + (begin + (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep") + (thread-sleep! 0.5) ;; give some margin for queries to complete before switching from file based access to server based access + (set! *dbstruct-db* (db:setup)) ;; run-id)) + (set! server-going #t) + (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running") + (server:write-dotserver *toppath* (conc iface ":" port)) + (server:dotserver-starting-remove)) + (begin ;; gotta exit nicely + (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision") + (http-transport:server-shutdown server-id port)))))) + + ;; when things go wrong we don't want to be doing the various queries too often + ;; so we strive to run this stuff only every four seconds or so. + (let* ((sync-time (- (current-milliseconds) start-time)) + (rem-time (quotient (- 4000 sync-time) 1000))) + (if (and (<= rem-time 4) + (> rem-time 0)) + (thread-sleep! rem-time))) (if (< count 1) ;; 3x3 = 9 secs aprox - (loop (+ count 1) 'running bad-sync-count)) + (loop (+ count 1) 'running bad-sync-count (current-milliseconds))) ;; Check that iface and port have not changed (can happen if server port collides) (mutex-lock! *heartbeat-mutex*) (set! sdat *server-info*) (mutex-unlock! *heartbeat-mutex*) @@ -453,13 +426,13 @@ (begin (debug:print-info 0 *default-log-port* "interface changed, refreshing iface and port info") (set! iface (car sdat)) (set! port (cadr sdat)))) - ;; Transfer *last-db-access* to last-access to use in checking that we are still alive + ;; Transfer *db-last-access* to last-access to use in checking that we are still alive (mutex-lock! *heartbeat-mutex*) - (set! last-access *last-db-access*) + (set! last-access *db-last-access*) (mutex-unlock! *heartbeat-mutex*) ;; (debug:print 11 *default-log-port* "last-access=" last-access ", server-timeout=" server-timeout) ;; ;; no_traffic, no running tests, if server 0, no running servers @@ -483,23 +456,46 @@ ;; the db indicates so ;; ;; (if (tasks:server-am-i-the-server? tdb run-id) ;; (tasks:server-set-state! tdb server-id "running")) ;; - (loop 0 server-state bad-sync-count)) + (loop 0 server-state bad-sync-count (current-milliseconds))) (http-transport:server-shutdown server-id port)))))) - + +;; code cut out from above +;; +;; (condition-case +;; ;; (if (and (member (mutex-state *db-sync-mutex*) '(abandoned not-abandoned)) +;; ;; (> (- (current-seconds) *db-last-sync*) 5)) ;; if not currently being synced nor recently synced +;; (db:sync-touched *dbstruct-db* *run-id* force-sync: #t) ;; usually done in the watchdog, not here. +;; ((sync-failed)(cond +;; ((> bad-sync-count 10) ;; time to give up +;; (http-transport:server-shutdown server-id port)) +;; (else ;; (> bad-sync-count 0) ;; we've had a fail or two, delay and loop +;; (thread-sleep! 5) +;; (loop count server-state (+ bad-sync-count 1))))) +;; ((exn) +;; (debug:print-error 0 *default-log-port* "error from sync code other than 'sync-failed. Attempting to gracefully shutdown the server") +;; (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running crashed") +;; (exit))) +;; (set! sync-time (- (current-milliseconds) start-time)) +;; (set! rem-time (quotient (- 4000 sync-time) 1000)) +;; (debug:print 4 *default-log-port* "SYNC: time= " sync-time ", rem-time=" rem-time) +;; +;; (if (and (<= rem-time 4) +;; (> rem-time 0)) +;; (thread-sleep! rem-time) +;; (thread-sleep! 4))) ;; fallback for if the math is changed ... + (define (http-transport:server-shutdown server-id port) (let ((tdbdat (tasks:open-db))) (debug:print-info 0 *default-log-port* "Starting to shutdown the server.") - ;; need to delete only *my* server entry (future use) - (set! *time-to-exit* #t) - (if *inmemdb* (db:sync-touched *inmemdb* *run-id* force-sync: #t)) ;; ;; start_shutdown ;; (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down") + (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up (portlogger:open-run-close portlogger:set-port port "released") (thread-sleep! 5) (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) (debug:print-info 0 *default-log-port* "Average cached write time " @@ -515,42 +511,50 @@ (/ *total-non-write-delay* *number-non-write-queries*)) " ms") (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting") (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running complete") + ;; if the .server file contained :myport then we can remove it + (server:remove-dotserver-file *toppath* port) (exit))) ;; all routes though here end in exit ... ;; ;; start_server? ;; (define (http-transport:launch run-id) + (server:dotserver-starting) (let* ((tdbdat (tasks:open-db))) (set! *run-id* run-id) (if (args:get-arg "-daemonize") (begin (daemon:ize) (if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it (begin (current-error-port *alt-log-file*) (current-output-port *alt-log-file*))))) - (if (server:check-if-running run-id) + (if (and (server:read-dotserver *toppath*) + (server:check-if-running run-id)) (begin (debug:print 0 *default-log-port* "INFO: Server for run-id " run-id " already running") - (exit 0))) - (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id)) + (exit 0)) + (begin ;; ok, no server detected, clean out any lingering records + (tasks:server-force-clean-running-records-for-run-id (db:delay-if-busy tdbdat) run-id "notresponding"))) + (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id 'http)) (remtries 4)) (if (not server-id) (if (> remtries 0) (begin (thread-sleep! 2) - (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id) + (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id 'http) (- remtries 1))) (begin ;; since we didn't get the server lock we are going to clean up and bail out (debug:print-info 2 *default-log-port* "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") (tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat) " http-transport:launch") + + (server:dotserver-starting-remove) )) (let* ((th2 (make-thread (lambda () (debug:print-info 0 *default-log-port* "Server run thread started") (http-transport:run (if (args:get-arg "-server") @@ -567,21 +571,21 @@ (thread-start! th3) (set! *didsomething* #t) (thread-join! th2) (exit)))))) -(define (http:ping run-id host-port) - (let* ((server-dat (http-transport:client-connect (car host-port)(cadr host-port))) - (login-res (rmt:login-no-auto-client-setup server-dat run-id))) - (if (and (list? login-res) - (car login-res)) - (begin - (print "LOGIN_OK") - (exit 0)) - (begin - (print "LOGIN_FAILED") - (exit 1))))) +;; (define (http:ping run-id host-port) +;; (let* ((server-dat (http-transport:client-connect (car host-port)(cadr host-port))) +;; (login-res (rmt:login-no-auto-client-setup server-dat run-id))) +;; (if (and (list? login-res) +;; (car login-res)) +;; (begin +;; (print "LOGIN_OK") +;; (exit 0)) +;; (begin +;; (print "LOGIN_FAILED") +;; (exit 1))))) (define (http-transport:server-signal-handler signum) (signal-mask! signum) (handle-exceptions exn Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -240,12 +240,12 @@ ;; Since we should have a clean slate at this time there is no need to do ;; any of the other stuff that tests:test-set-status! does. Let's just ;; force RUNNING/n/a ;; (thread-sleep! 0.3) - (tests:test-force-state-status! run-id test-id "RUNNING" "n/a") - (rmt:roll-up-pass-fail-counts run-id test-name item-path #f "RUNNING") + ;; (tests:test-force-state-status! run-id test-id "RUNNING" "n/a") + (rmt:roll-up-pass-fail-counts run-id test-name item-path #f "RUNNING" #f) ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here ;; if there is a runscript do it first (if fullrunscript (let ((pid (process-run fullrunscript))) @@ -388,12 +388,12 @@ (if (hash-table-ref/default misc-flags 'keep-going #f) ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta (loop (calc-minutes) (or new-cpu-load cpu-load) (or new-disk-free disk-free))))))) (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional (define (launch:execute encoded-cmd) - (let* ((cmdinfo (common:read-encoded-string encoded-cmd)) - (tconfigreg (tests:get-all))) + (let* ((cmdinfo (common:read-encoded-string encoded-cmd)) + (tconfigreg #f)) (setenv "MT_CMDINFO" encoded-cmd) (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed) ;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1)) (let* ((testpath (assoc/default 'testpath cmdinfo)) ;; testpath is the test spec area (top-path (assoc/default 'toppath cmdinfo)) @@ -436,11 +436,12 @@ (change-directory top-path) (begin (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " top-path " not found") (thread-sleep! 10) (loop (+ count 1))))) - + (launch:setup) ;; should be properly in the top-path now + (set! tconfigreg (tests:get-all)) (let ((sighand (lambda (signum) ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting (if (eq? signum signal/stop) (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting.")) (set! *time-to-exit* #t) @@ -702,10 +703,22 @@ ;; sets; *configdat* (megatest.config info) ;; *runconfigdat* (runconfigs.config info) ;; *configstatus* (status of the read data) ;; (define (launch:setup #!key (force #f)) + (mutex-lock! *launch-setup-mutex*) + (if (and *toppath* + (eq? *configstatus* 'fulldata)) ;; got it all + (begin + (debug:print 0 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata") + (mutex-unlock! *launch-setup-mutex*) + *toppath*) + (let ((res (launch:setup-body force: force))) + (mutex-unlock! *launch-setup-mutex*) + res))) + +(define (launch:setup-body #!key (force #f)) (let* ((toppath (or *toppath* (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath (runname (common:args-get-runname)) (target (common:args-get-target)) (linktree (common:get-linktree)) (sections (if target (list "default" target) #f)) ;; for runconfigs @@ -803,10 +816,11 @@ (set! *configstatus* 'partial)) (begin (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.") (exit 2)))))) ;; additional house keeping + (common:set-transport-type) (let* ((linktree (or (getenv "MT_LINKTREE") (if *configdat* (configf:lookup *configdat* "setup" "linktree") #f)))) (if linktree (begin (if (not (file-exists? linktree)) @@ -896,11 +910,11 @@ (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path)) (lnktarget (conc lnkpath "/" item-path))) ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical ;; rundir shortdir - (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path) + (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path run-id) (debug:print 2 *default-log-port* "INFO:\n lnkbase=" lnkbase "\n lnkpath=" lnkpath "\n toptest-path=" toptest-path "\n test-path=" test-path) (if (not (file-exists? linktree)) (begin (debug:print 0 *default-log-port* "WARNING: linktree did not exist! Creating it now at " linktree) @@ -969,11 +983,11 @@ (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath (if (file-exists? lnkpath) ;; (resolve-pathname lnkpath) (common:nice-path lnkpath) lnkpath) - testname "") + testname "" run-id) ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path) (if (or (not curr-test-path) (not (directory-exists? toptest-path))) (begin (debug:print-info 2 *default-log-port* "Creating " toptest-path " and link " lnkpath) @@ -1118,12 +1132,13 @@ (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record ;; prevent overlapping actions - set to LAUNCHED as early as possible ;; + ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) - (rmt:roll-up-pass-fail-counts run-id test-name item-path #f "LAUNCHED") + (rmt:roll-up-pass-fail-counts run-id test-name item-path #f "LAUNCHED" #f) (set! diskpath (get-best-disk *configdat* tconfig)) (if diskpath (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) (set! work-area (car dat)) (set! toptest-work-area (cadr dat)) Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -1,7 +1,7 @@ ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. (declare (unit megatest-version)) -(define megatest-version 1.6208) +(define megatest-version 1.6301) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -141,10 +141,11 @@ -start-dir path : switch to this directory before running megatest -rebuild-db : bring the database schema up to date -cleanup-db : remove any orphan records, vacuum the db -import-megatest.db : migrate a database from v1.55 series to v1.60 series -sync-to-megatest.db : migrate data back to megatest.db + -use-db-cache : use cached access to db to reduce load -update-meta : update the tests metadata for all tests -setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are overwritten by values set in config files. -server -|hostname : start the server (reduces contention on megatest.db), use - to automatically figure out hostname @@ -259,12 +260,14 @@ "-archive" "-since" "-fields" "-recover-test" ;; run-id,test-id - used internally to recover a test stuck in RUNNING state "-sort" - ) - (list "-h" "-help" "--help" + "-target-db" + "-source-db" + ) + (list "-h" "-help" "--help" "-manual" "-version" "-force" "-xterm" "-showkeys" @@ -277,11 +280,12 @@ "-daemonize" "-preclean" "-rerun-clean" "-rerun-all" "-clean-cache" - + "-cache-db" + "-use-db-cache" ;; misc "-repl" "-lock" "-unlock" "-list-servers" @@ -323,10 +327,11 @@ args:arg-hash 0)) ;; Add args that use remargs here ;; + (if (and (not (null? remargs)) (not (or (args:get-arg "-runstep") (args:get-arg "-envcap") (args:get-arg "-envdelta") @@ -339,62 +344,11 @@ (let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target")))) (if targ (setenv "MT_TARGET" targ))) ;; The watchdog is to keep an eye on things like db sync etc. ;; -(define *time-zero* (current-seconds)) -(define *watchdog* - (make-thread - (lambda () - (thread-sleep! 0.05) ;; delay for startup - (let ((legacy-sync (common:legacy-sync-required)) - (debug-mode (debug:debug-mode 1)) - (last-time (current-seconds))) - (if (common:legacy-sync-recommended) - (let loop () - ;; sync for filesystem local db writes - ;; - (let ((start-time (current-seconds)) - (servers-started (make-hash-table))) - (for-each - (lambda (run-id) - (mutex-lock! *db-multi-sync-mutex*) - (if (and legacy-sync - (hash-table-ref/default *db-local-sync* run-id #f)) - ;; (if (> (- start-time last-write) 5) ;; every five seconds - (begin ;; let ((sync-time (- (current-seconds) start-time))) - (db:multi-db-sync (list run-id) 'new2old) - (let ((sync-time (- (current-seconds) start-time))) - (debug:print-info 3 *default-log-port* "Sync of newdb to olddb for run-id " run-id " completed in " sync-time " seconds") - (if (common:low-noise-print 30 "sync new to old") - (debug:print-info 0 *default-log-port* "Sync of newdb to olddb for run-id " run-id " completed in " sync-time " seconds"))) - ;; (if (> sync-time 10) ;; took more than ten seconds, start a server for this run - ;; (begin - ;; (debug:print-info 0 *default-log-port* "Sync is taking a long time, start up a server to assist for run " run-id) - ;; (server:kind-run run-id))))) - (hash-table-delete! *db-local-sync* run-id))) - (mutex-unlock! *db-multi-sync-mutex*)) - (hash-table-keys *db-local-sync*)) - (if (and debug-mode - (> (- start-time last-time) 60)) - (begin - (set! last-time start-time) - (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) - - ;; keep going unless time to exit - ;; - (if (not *time-to-exit*) - (let delay-loop ((count 0)) - (if (and (not *time-to-exit*) - (< count 11)) ;; aprox 5-6 seconds - (begin - (thread-sleep! 1) - (delay-loop (+ count 1)))) - (loop))) - (if (common:low-noise-print 30) - (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*))))) - "Watchdog thread"))) +(define *watchdog* (make-thread common:watchdog "Watchdog thread")) (thread-start! *watchdog*) (if (args:get-arg "-log") (let ((oup (open-output-file (args:get-arg "-log")))) @@ -480,10 +434,18 @@ (on-exit std-exit-procedure) ;;====================================================================== ;; Misc general calls ;;====================================================================== + +(if (and (args:get-arg "-cache-db") + (args:get-arg "-source-db")) + (let* ((temp-dir (or (args:get-arg "-target-db") (create-directory (conc "/tmp/" (getenv "USER") "/" (string-translate (current-directory) "/" "_"))))) + (target-db (conc temp-dir "/cached.db")) + (source-db (args:get-arg "-source-db"))) + (db:cache-for-read-only source-db target-db) + (set! *didsomething* #t))) ;; handle a clean-cache request as early as possible ;; (if (args:get-arg "-clean-cache") (begin @@ -681,13 +643,13 @@ (if out-file (close-output-port out-port)) (exit) ;; yes, bending the rules here - need to exit since this is a utility )) (if (args:get-arg "-ping") - (let* ((run-id (string->number (args:get-arg "-run-id"))) + (let* ((server-id (string->number (args:get-arg "-ping"))) ;; extract run-id (i.e. no ":" (host:port (args:get-arg "-ping"))) - (server:ping run-id host:port))) + (server:ping (or server-id host:port) do-exit: #t))) ;;====================================================================== ;; Capture, save and manipulate environments ;;====================================================================== @@ -736,48 +698,43 @@ (if (args:get-arg "-server") ;; Server? Start up here. ;; (let ((tl (launch:setup)) - (run-id (and (args:get-arg "-run-id") - (string->number (args:get-arg "-run-id")))) - (transport-type (string->symbol (or (args:get-arg "-transport") "http")))) - (if run-id - (begin - (server:launch run-id transport-type) - (set! *didsomething* #t)) - (debug:print-error 0 *default-log-port* "server requires run-id be specified with -run-id"))) - - ;; Not a server? This section will decide how to communicate - ;; - ;; Setup client for all expect listed here - (if (null? (lset-intersection - equal? - (hash-table-keys args:arg-hash) - '("-list-servers" - "-stop-server" - "-kill-server" - "-show-cmdinfo" - "-list-runs" - "-ping"))) - (if (launch:setup) - (let ((run-id (and (args:get-arg "-run-id") - (string->number (args:get-arg "-run-id"))))) - ;; (set! *fdb* (filedb:open-db (conc *toppath* "/db/paths.db"))) - ;; if not list or kill then start a client (if appropriate) - (if (or (args-defined? "-h" "-version" "-create-megatest-area" "-create-test") - (eq? (length (hash-table-keys args:arg-hash)) 0)) - (debug:print-info 1 *default-log-port* "Server connection not needed") - (begin - ;; (if run-id - ;; (client:launch run-id) - ;; (client:launch 0) ;; without run-id we'll start a server for "0" - #t - )))))) - -;; MAY STILL NEED THIS -;; (set! *megatest-db* (make-dbr:dbstruct path: *toppath* local: #t)))))))))) + ;; (run-id (and (args:get-arg "-run-id") + ;; (string->number (args:get-arg "-run-id")))) + (transport-type *transport-type* )) + (server:launch 0 transport-type) + (set! *didsomething* #t))) +;; ;; (debug:print-error 0 *default-log-port* "server requires run-id be specified with -run-id"))) +;; +;; ;; Not a server? This section will decide how to communicate +;; ;; +;; ;; Setup client for all expect listed here +;; (if (null? (lset-intersection +;; equal? +;; (hash-table-keys args:arg-hash) +;; '("-list-servers" +;; "-stop-server" +;; "-kill-server" +;; "-show-cmdinfo" +;; "-list-runs" +;; "-ping"))) +;; (if (launch:setup) +;; (let ((run-id (and (args:get-arg "-run-id") +;; (string->number (args:get-arg "-run-id"))))) +;; ;; (set! *fdb* (filedb:open-db (conc *toppath* "/db/paths.db"))) +;; ;; if not list or kill then start a client (if appropriate) +;; (if (or (args-defined? "-h" "-version" "-create-megatest-area" "-create-test") +;; (eq? (length (hash-table-keys args:arg-hash)) 0)) +;; (debug:print-info 1 *default-log-port* "Server connection not needed") +;; (begin +;; ;; (if run-id +;; ;; (client:launch run-id) +;; ;; (client:launch 0) ;; without run-id we'll start a server for "0" +;; #t +;; )))))) (if (or (args:get-arg "-list-servers") (args:get-arg "-stop-server") (args:get-arg "-kill-server")) (let ((tl (launch:setup))) @@ -832,23 +789,24 @@ ;;====================================================================== ;; Weird special calls that need to run *after* the server has started? ;;====================================================================== (if (args:get-arg "-list-targets") - (let ((targets (common:get-runconfig-targets))) - (debug:print 1 *default-log-port* "Found "(length targets) " targets") - (case (string->symbol (or (args:get-arg "-dumpmode") "alist")) - ((alist) - (for-each (lambda (x) - ;; (print "[" x "]")) - (print x)) - targets)) - ((json) - (json-write targets)) - (else - (debug:print-error 0 *default-log-port* "dump output format " (args:get-arg "-dumpmode") " not supported for -list-targets"))) - (set! *didsomething* #t))) + (if (launch:setup) + (let ((targets (common:get-runconfig-targets))) + (debug:print 1 *default-log-port* "Found "(length targets) " targets") + (case (string->symbol (or (args:get-arg "-dumpmode") "alist")) + ((alist) + (for-each (lambda (x) + ;; (print "[" x "]")) + (print x)) + targets)) + ((json) + (json-write targets)) + (else + (debug:print-error 0 *default-log-port* "dump output format " (args:get-arg "-dumpmode") " not supported for -list-targets"))) + (set! *didsomething* #t)))) ;; cache the runconfigs in $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME/.runconfig ;; (define (full-runconfigs-read) ;; in the envprocessing branch the below code replaces the further below code @@ -1038,11 +996,11 @@ (define (get-value-by-fieldname datavec test-field-index fieldname) (let ((indx (hash-table-ref/default test-field-index fieldname #f))) (if indx (if (>= indx (vector-length datavec)) - #f ;; index to high, should raise an error I suppose + #f ;; index too high, should raise an error I suppose (vector-ref datavec indx)) #f))) ;; NOTE: list-runs and list-db-targets operate on local db!!! ;; @@ -1051,35 +1009,39 @@ (if (or (args:get-arg "-list-runs") (args:get-arg "-list-db-targets")) (if (launch:setup) (let* (;; (dbstruct (make-dbr:dbstruct path: *toppath* local: (args:get-arg "-local"))) (runpatt (args:get-arg "-list-runs")) + (access-mode (db:get-access-mode)) (testpatt (common:args-get-testpatt #f)) ;; (if (args:get-arg "-testpatt") ;; (args:get-arg "-testpatt") ;; "%")) (keys (rmt:get-keys)) ;; (db:get-keys dbstruct)) - ;; (runsda t (db:get-runs dbstruct runpatt #f #f '())) - (runsdat (rmt:get-runs-by-patt keys (or runpatt "%") (common:args-get-target) ;; (db:get-runs-by-patt dbstruct keys (or runpatt "%") (common:args-get-target) - #f #f '("id" "runname" "state" "status" "owner" "event_time" "comment") 0)) + ;; (runsdat (db:get-runs dbstruct runpatt #f #f '())) + ;; (runsdat (rmt:get-runs-by-patt keys (or runpatt "%") (common:args-get-target) ;; (db:get-runs-by-patt dbstruct keys (or runpatt "%") (common:args-get-target) + ;; #f #f '("id" "runname" "state" "status" "owner" "event_time" "comment") 0)) + (runsdat (rmt:get-runs-by-patt keys (or runpatt "%") + (common:args-get-target) #f #f '("id" "runname" "state" "status" "owner" "event_time" "comment") 0)) (runstmp (db:get-rows runsdat)) (header (db:get-header runsdat)) ;; this is "-since" support. This looks at last mod times of .db files ;; and collects those modified since the -since time. - (runs (if (and (not (null? runstmp)) - (args:get-arg "-since")) - (let ((changed-ids (db:get-changed-run-ids (string->number (args:get-arg "-since"))))) - (let loop ((hed (car runstmp)) - (tal (cdr runstmp)) - (res '())) - (let ((new-res (if (member (db:get-value-by-header hed header "id") changed-ids) - (cons hed res) - res))) - (if (null? tal) - (reverse new-res) - (loop (car tal)(cdr tal) new-res))))) - runstmp)) + (runs runstmp) + ;; (if (and (not (null? runstmp)) + ;; (args:get-arg "-since")) + ;; (let ((changed-ids (db:get-changed-run-ids (string->number (args:get-arg "-since"))))) + ;; (let loop ((hed (car runstmp)) + ;; (tal (cdr runstmp)) + ;; (res '())) + ;; (let ((new-res (if (member (db:get-value-by-header hed header "id") changed-ids) + ;; (cons hed res) + ;; res))) + ;; (if (null? tal) + ;; (reverse new-res) + ;; (loop (car tal)(cdr tal) new-res))))) + ;; runstmp)) (db-targets (args:get-arg "-list-db-targets")) (seen (make-hash-table)) (dmode (let ((d (args:get-arg "-dumpmode"))) (if d (string->symbol d) #f))) (data (make-hash-table)) @@ -1128,11 +1090,11 @@ (let* ((run-id (db:get-value-by-header run header "id")) (runname (db:get-value-by-header run header "runname")) (states (string-split (or (args:get-arg "-state") "") ",")) (statuses (string-split (or (args:get-arg "-status") "") ",")) (tests (if tests-spec - (rmt:get-tests-for-run run-id testpatt states statuses #f #f #f 'testname 'asc ;; (db:get-tests-for-run dbstruct run-id testpatt '() '() #f #f #f 'testname 'asc + (db:dispatch-query access-mode rmt:get-tests-for-run db:get-tests-for-run run-id testpatt states statuses #f #f #f 'testname 'asc ;; (db:get-tests-for-run dbstruct run-id testpatt '() '() #f #f #f 'testname 'asc ;; use qryvals if test-spec provided (if tests-spec (string-intersperse adj-tests-spec ",") ;; db:test-record-fields #f) @@ -1253,11 +1215,11 @@ ;; "\n rundir: " (get-value-by-fieldname test test-field-index "") ;; (sdb:qry 'getstr ;; (filedb:get-path *fdb* ;; (db:test-get-rundir test) ;; ) ) ;; Each test ;; DO NOT remote run - (let ((steps (rmt:get-steps-for-test run-id (db:test-get-id test)))) ;; (db:get-steps-for-test dbstruct run-id (db:test-get-id test)))) + (let ((steps (db:dispatch-query access-mode rmt:get-steps-for-test db:get-steps-for-test run-id (db:test-get-id test)))) ;; (db:get-steps-for-test dbstruct run-id (db:test-get-id test)))) (for-each (lambda (step) (format #t " Step: ~20a State: ~10a Status: ~10a Time ~22a\n" (tdb:step-get-stepname step) @@ -1864,11 +1826,12 @@ (begin (if (not (launch:setup)) (begin (debug:print 0 *default-log-port* "Failed to setup, exiting") (exit 1))) - (common:cleanup-db) + (let ((dbstruct (db:setup *toppath*))) + (common:cleanup-db dbstruct)) (set! *didsomething* #t))) (if (args:get-arg "-mark-incompletes") (begin (if (not (launch:setup)) @@ -1902,12 +1865,15 @@ (if (or (getenv "MT_RUNSCRIPT") (args:get-arg "-repl") (args:get-arg "-load")) (let* ((toppath (launch:setup)) - (dbstruct (if toppath (make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f))) - (if dbstruct + (dbstruct (if (and toppath + (common:on-homehost?)) + (db:setup) + #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f))) + (if *toppath* (cond ((getenv "MT_RUNSCRIPT") ;; How to run megatest scripts ;; ;; #!/bin/bash @@ -1920,11 +1886,10 @@ (repl)) (else (begin (set! *db* dbstruct) - (set! *client-non-blocking-mode* #t) (import extras) ;; might not be needed ;; (import csi) (import readline) (import apropos) ;; (import (prefix sqlite3 sqlite3:)) ;; doesn't work ... @@ -1939,11 +1904,12 @@ (or (get-environment-variable "HOME") ".") "/.megatest_history")) (current-input-port (make-gnu-readline-port "megatest> ")))) (if (args:get-arg "-repl") (repl) (load (args:get-arg "-load"))) - (db:close-all dbstruct)) + ;; (db:close-all dbstruct) <= taken care of by on-exit call + ) (exit))) (set! *didsomething* #t)))) ;;====================================================================== ;; Wait on a run to complete @@ -1990,11 +1956,11 @@ ;; ;; ;; redo me (set! *didsomething* #t))) (if (args:get-arg "-import-megatest.db") (begin (db:multi-db-sync - #f ;; do all run-ids + (db:setup) 'killservers 'dejunk 'adj-testids 'old2new ;; 'new2old @@ -2002,11 +1968,11 @@ (set! *didsomething* #t))) (if (args:get-arg "-sync-to-megatest.db") (begin (db:multi-db-sync - #f ;; do all run-ids + (db:setup) 'new2old ) (set! *didsomething* #t))) (if (args:get-arg "-generate-html") @@ -2018,11 +1984,10 @@ ;;====================================================================== ;; Exit and clean up ;;====================================================================== -(if *runremote* (close-all-connections!)) (if (not *didsomething*) (debug:print 0 *default-log-port* help)) (set! *time-to-exit* #t) Index: mt.scm ================================================================== --- mt.scm +++ mt.scm @@ -177,25 +177,29 @@ (begin (debug:print-error 0 *default-log-port* "bad data handed to mt:test-set-state-status-by-id, run-id=" run-id ", test-id=" test-id ", newstate=" newstate) (print-call-chain (current-error-port)) #f) (begin - (cond - ((and newstate newstatus newcomment) - (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id)) - ((and newstate newstatus) - (rmt:general-call 'state-status run-id newstate newstatus test-id)) - (else - (if newstate (rmt:general-call 'set-test-state run-id newstate test-id)) - (if newstatus (rmt:general-call 'set-test-status run-id newstatus test-id)) - (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id)))) + ;; cond + ;; ((and newstate newstatus newcomment) + ;; (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id)) + ;; ((and newstate newstatus) + ;; (rmt:general-call 'state-status run-id newstate newstatus test-id)) + ;; (else + ;; (if newstate (rmt:general-call 'set-test-state run-id newstate test-id)) + ;; (if newstatus (rmt:general-call 'set-test-status run-id newstatus test-id)) + ;; (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id)))) + (rmt:roll-up-pass-fail-counts run-id test-id #f newstate newstatus newcomment) (mt:process-triggers run-id test-id newstate newstatus) #t))) (define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment) (let ((test-id (rmt:get-test-id run-id test-name item-path))) - (mt:test-set-state-status-by-id run-id test-id new-state new-status new-comment))) + (rmt:roll-up-pass-fail-counts run-id test-name item-path new-state new-status new-comment) + (mt:process-triggers run-id test-id new-state new-status) + #t)) + ;;(mt:test-set-state-status-by-id run-id test-id new-state new-status new-comment))) (define (mt:lazy-read-test-config test-name) (let ((tconf (hash-table-ref/default *testconfigs* test-name #f))) (if tconf tconf DELETED newdashboard.scm Index: newdashboard.scm ================================================================== --- newdashboard.scm +++ /dev/null @@ -1,635 +0,0 @@ -;;====================================================================== -;; Copyright 2006-2013, Matthew Welland. -;; -;; This program is made available under the GNU GPL version 2.0 or -;; greater. See the accompanying file COPYING for details. -;; -;; This program is distributed WITHOUT ANY WARRANTY; without even the -;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR -;; PURPOSE. -;;====================================================================== - -(use format numbers) -(require-library iup) -(import (prefix iup iup:)) -(use canvas-draw) - -(use sqlite3 srfi-1 posix regex regex-case srfi-69) -(import (prefix sqlite3 sqlite3:)) - -(declare (uses margs)) -(declare (uses launch)) -(declare (uses megatest-version)) -(declare (uses gutils)) -(declare (uses db)) -(declare (uses server)) -(declare (uses synchash)) -(declare (uses dcommon)) -(declare (uses tree)) - -(include "common_records.scm") -(include "db_records.scm") -(include "key_records.scm") - -(define help (conc -"Megatest Dashboard, documentation at http://www.kiatoa.com/fossils/megatest - version " megatest-version " - license GPL, Copyright (C) Matt Welland 2011 - -Usage: dashboard [options] - -h : this help - -server host:port : connect to host:port instead of db access - -test testid : control test identified by testid - -guimonitor : control panel for runs - -Misc - -rows N : set number of rows -")) - -;; process args -(define remargs (args:get-args - (argv) - (list "-rows" - "-run" - "-test" - "-debug" - "-host" - ) - (list "-h" - "-guimonitor" - "-main" - "-v" - "-q" - ) - args:arg-hash - 0)) - -(if (args:get-arg "-h") - (begin - (print help) - (exit))) - -(if (not (launch:setup)) - (begin - (print "Failed to find megatest.config, exiting") - (exit 1))) - -;; (if (args:get-arg "-host") -;; (begin -;; (set! *runremote* (string-split (args:get-arg "-host" ":"))) -;; (client:launch)) -;; (client:launch)) - -;; ease debugging by loading ~/.dashboardrc -(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.dashboardrc"))) - (if (file-exists? debugcontrolf) - (load debugcontrolf))) - -(define *dbdir* (db:dbfile-path #f)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) -(define *dbstruct-local* (make-dbr:dbstruct path: *dbdir* - local: #t)) -(define *db-file-path* (db:dbfile-path 0)) - -;; HACK ALERT: this is a hack, please fix. -(define *read-only* (not (file-read-access? *db-file-path*))) - -(debug:setup) - -(define *tim* (iup:timer)) -(define *ord* #f) - -(iup:attribute-set! *tim* "TIME" 300) -(iup:attribute-set! *tim* "RUN" "YES") - -(define (message-window msg) - (iup:show - (iup:dialog - (iup:vbox - (iup:label msg #:margin "40x40"))))) - -(define (iuplistbox-fill-list lb items . default) - (let ((i 1) - (selected-item (if (null? default) #f (car default)))) - (iup:attribute-set! lb "VALUE" (if selected-item selected-item "")) - (for-each (lambda (item) - (iup:attribute-set! lb (number->string i) item) - (if selected-item - (if (equal? selected-item item) - (iup:attribute-set! lb "VALUE" item))) ;; (number->string i)))) - (set! i (+ i 1))) - items) - i)) - -(define (pad-list l n)(append l (make-list (- n (length l))))) - - -(define (mkstr . x) - (string-intersperse (map conc x) ",")) - -(define (update-search x val) - (hash-table-set! *searchpatts* x val)) - -;; mtest is actually the megatest.config file -;; -(define (mtest window-id) - (let* ((curr-row-num 0) - (rawconfig (read-config (conc *toppath* "/megatest.config") #f 'return-string)) - (keys-matrix (dcommon:keys-matrix rawconfig)) - (setup-matrix (dcommon:section-matrix rawconfig "setup" "Varname" "Value")) - (jobtools-matrix (iup:matrix - #:expand "YES" - #:numcol 1 - #:numlin 5 - #:numcol-visible 1 - #:numlin-visible 3)) - (validvals-matrix (iup:matrix - #:expand "YES" - #:numcol 1 - #:numlin 2 - #:numcol-visible 1 - #:numlin-visible 2)) - (envovrd-matrix (iup:matrix - #:expand "YES" - #:numcol 1 - #:numlin 20 - #:numcol-visible 1 - #:numlin-visible 8)) - (disks-matrix (iup:matrix - #:expand "YES" - #:numcol 1 - #:numlin 20 - #:numcol-visible 1 - #:numlin-visible 8)) - ) - (iup:attribute-set! disks-matrix "0:0" "Disk Name") - (iup:attribute-set! disks-matrix "0:1" "Disk Path") - (iup:attribute-set! disks-matrix "WIDTH1" "120") - (iup:attribute-set! disks-matrix "WIDTH0" "100") - (iup:attribute-set! disks-matrix "ALIGNMENT1" "ALEFT") - (iup:attribute-set! disks-matrix "FIXTOTEXT" "C1") - (iup:attribute-set! disks-matrix "RESIZEMATRIX" "YES") - - ;; fill in existing info - (for-each - (lambda (mat fname) - (set! curr-row-num 1) - (for-each - (lambda (var) - (iup:attribute-set! mat (conc curr-row-num ":0") var) - (iup:attribute-set! mat (conc curr-row-num ":1") (config-lookup rawconfig fname var)) - (set! curr-row-num (+ curr-row-num 1))) - (configf:section-vars rawconfig fname))) - (list setup-matrix jobtools-matrix validvals-matrix envovrd-matrix disks-matrix) - (list "setup" "jobtools" "validvalues" "env-override" "disks")) - - (for-each - (lambda (mat) - (iup:attribute-set! mat "0:1" "Value") - (iup:attribute-set! mat "0:0" "Var") - (iup:attribute-set! mat "ALIGNMENT1" "ALEFT") - (iup:attribute-set! mat "FIXTOTEXT" "C1") - (iup:attribute-set! mat "RESIZEMATRIX" "YES") - (iup:attribute-set! mat "WIDTH1" "120") - (iup:attribute-set! mat "WIDTH0" "100") - ) - (list setup-matrix jobtools-matrix validvals-matrix envovrd-matrix)) - - (iup:attribute-set! validvals-matrix "WIDTH1" "290") - (iup:attribute-set! envovrd-matrix "WIDTH1" "290") - - (iup:vbox - (iup:hbox - - (iup:vbox - (let ((tabs (iup:tabs - ;; The required tab - (iup:hbox - ;; The keys - (iup:frame - #:title "Keys (required)" - (iup:vbox - (iup:label (conc "Set the fields for organising your runs\n" - "here. Note: can only be changed before\n" - "running the first run when megatest.db\n" - "is created.")) - keys-matrix)) - (iup:vbox - ;; The setup section - (iup:frame - #:title "Setup" - (iup:vbox - (iup:label (conc "max_concurrent_jobs : limits total concurrent jobs (optional)\n" - "linktree : directory where linktree will be created.")) - setup-matrix)) - ;; The jobtools - (iup:frame - #:title "Jobtools" - (iup:vbox - (iup:label (conc "launcher : tool or script to run jobs (try nbfake)\n" - "useshell : use system to run your launcher\n" - "workhosts : spread jobs out on these hosts")) - jobtools-matrix)) - ;; The disks - (iup:frame - #:title "Disks" - (iup:vbox - (iup:label (conc "Enter names and existing paths of locations to run tests")) - disks-matrix)))) - ;; The optional tab - (iup:vbox - ;; The Environment Overrides - (iup:frame - #:title "Env override" - envovrd-matrix) - ;; The valid values - (iup:frame - #:title "Validvalues" - validvals-matrix) - )))) - (iup:attribute-set! tabs "TABTITLE0" "Required settings") - (iup:attribute-set! tabs "TABTITLE1" "Optional settings") - tabs)) - )))) - -;; The runconfigs.config file -;; -(define (rconfig window-id) - (iup:vbox - (iup:frame #:title "Default"))) - -;;====================================================================== -;; T E S T S -;;====================================================================== - -(define (tree-path->test-id path) - (if (not (null? path)) - (hash-table-ref/default (dboard:data-path-test-ids *data*) path #f) - #f)) - -(define (test-panel window-id) - (let* ((curr-row-num 0) - (viewlog (lambda (x) - (if (file-exists? logfile) - ;(system (conc "firefox " logfile "&")) - (iup:send-url logfile) - (message-window (conc "File " logfile " not found"))))) - (xterm (lambda (x) - (if (directory-exists? rundir) - (let ((shell (if (get-environment-variable "SHELL") - (conc "-e " (get-environment-variable "SHELL")) - ""))) - (system (conc "cd " rundir - ";xterm -T \"" (string-translate testfullname "()" " ") "\" " shell "&"))) - (message-window (conc "Directory " rundir " not found"))))) - (command-text-box (iup:textbox #:expand "HORIZONTAL" #:font "Courier New, -12")) - (command-launch-button (iup:button "Execute!" - ;; #:expand "HORIZONTAL" - #:size "50x" - #:action (lambda (x) - (let ((cmd (iup:attribute command-text-box "VALUE"))) - (system (conc cmd " &")))))) - (run-test (lambda (x) - (iup:attribute-set! - command-text-box "VALUE" - (conc "xterm -geometry 180x20 -e \"megatest -target " keystring " :runname " runname - " -runtests " (conc testname "/" (if (equal? item-path "") - "%" - item-path)) - ";echo Press any key to continue;bash -c 'read -n 1 -s'\"")))) - (remove-test (lambda (x) - (iup:attribute-set! - command-text-box "VALUE" - (conc "xterm -geometry 180x20 -e \"megatest -remove-runs -target " keystring " :runname " runname - " -testpatt " (conc testname "/" (if (equal? item-path "") - "%" - item-path)) - " -v;echo Press any key to continue;bash -c 'read -n 1 -s'\"")))) - (run-info-matrix (iup:matrix - #:expand "YES" - ;; #:scrollbar "YES" - #:numcol 1 - #:numlin 4 - #:numcol-visible 1 - #:numlin-visible 4 - #:click-cb (lambda (obj lin col status) - (print "obj: " obj " lin: " lin " col: " col " status: " status)))) - (test-info-matrix (iup:matrix - #:expand "YES" - #:numcol 1 - #:numlin 7 - #:numcol-visible 1 - #:numlin-visible 7)) - (test-run-matrix (iup:matrix - #:expand "YES" - #:numcol 1 - #:numlin 5 - #:numcol-visible 1 - #:numlin-visible 5)) - (meta-dat-matrix (iup:matrix - #:expand "YES" - #:numcol 1 - #:numlin 5 - #:numcol-visible 1 - #:numlin-visible 5)) - (steps-matrix (iup:matrix - #:expand "YES" - #:numcol 6 - #:numlin 50 - #:numcol-visible 6 - #:numlin-visible 8)) - (data-matrix (iup:matrix - #:expand "YES" - #:numcol 8 - #:numlin 50 - #:numcol-visible 8 - #:numlin-visible 8)) - (updater (lambda (testdat) - (test-update window-id testdat run-info-matrix test-info-matrix test-run-matrix meta-dat-matrix steps-matrix data-matrix)))) - - ;; Set the updater in updaters - (hash-table-set! (dboard:data-updaters *data*) window-id updater) - ;; - (for-each - (lambda (mat) - ;; (iup:attribute-set! mat "0:1" "Value") - ;; (iup:attribute-set! mat "0:0" "Var") - (iup:attribute-set! mat "HEIGHT0" 0) - (iup:attribute-set! mat "ALIGNMENT1" "ALEFT") - ;; (iup:attribute-set! mat "FIXTOTEXT" "C1") - (iup:attribute-set! mat "RESIZEMATRIX" "YES")) - ;; (iup:attribute-set! mat "WIDTH1" "120") - ;; (iup:attribute-set! mat "WIDTH0" "100")) - (list run-info-matrix test-info-matrix test-run-matrix meta-dat-matrix)) - - ;; Steps matrix - (iup:attribute-set! steps-matrix "0:1" "Step Name") - (iup:attribute-set! steps-matrix "0:2" "Start") - (iup:attribute-set! steps-matrix "WIDTH2" "40") - (iup:attribute-set! steps-matrix "0:3" "End") - (iup:attribute-set! steps-matrix "WIDTH3" "40") - (iup:attribute-set! steps-matrix "0:4" "Status") - (iup:attribute-set! steps-matrix "WIDTH4" "40") - (iup:attribute-set! steps-matrix "0:5" "Duration") - (iup:attribute-set! steps-matrix "WIDTH5" "40") - (iup:attribute-set! steps-matrix "0:6" "Log File") - (iup:attribute-set! steps-matrix "ALIGNMENT1" "ALEFT") - ;; (iup:attribute-set! steps-matrix "FIXTOTEXT" "C1") - (iup:attribute-set! steps-matrix "RESIZEMATRIX" "YES") - ;; (iup:attribute-set! steps-matrix "WIDTH1" "120") - ;; (iup:attribute-set! steps-matrix "WIDTH0" "100") - - ;; Data matrix - ;; - (let ((rownum 1)) - (for-each - (lambda (x) - (iup:attribute-set! data-matrix (conc "0:" rownum) x) - (iup:attribute-set! data-matrix (conc "WIDTH" rownum) "50") - (set! rownum (+ rownum 1))) - (list "Category" "Variable" "Value" "Expected" "Tolerance" "Status" "Units" "Type" "Comment"))) - (iup:attribute-set! data-matrix "REDRAW" "ALL") - - (for-each - (lambda (data) - (let ((mat (car data)) - (keys (cadr data)) - (rownum 1)) - (for-each - (lambda (key) - (iup:attribute-set! mat (conc rownum ":0") key) - (set! rownum (+ rownum 1))) - keys) - (iup:attribute-set! mat "REDRAW" "ALL"))) - (list - (list run-info-matrix '("Run Id" "Target" "Runname" "Run Start Time" )) - (list test-info-matrix '("Test Id" "Testname" "Itempath" "State" "Status" "Test Start Time" "Comment")) - (list test-run-matrix '("Hostname" "Host info" "Disk Free" "CPU Load" "Run Duration")) - (list meta-dat-matrix '("Author" "Owner" "Last Reviewed" "Tags" "Description")))) - - (iup:split - #:orientation "HORIZONTAL" - (iup:vbox - (iup:hbox - (iup:vbox - run-info-matrix - test-info-matrix) - ;; test-info-matrix) - (iup:vbox - test-run-matrix - meta-dat-matrix)) - (iup:vbox - (iup:vbox - (iup:hbox - (iup:button "View Log" #:action viewlog #:size "60x" ) ;; #:size "30x" - (iup:button "Start Xterm" #:action xterm #:size "60x" )) ;; #:size "30x" - (iup:hbox - (iup:button "Run Test" #:action run-test #:size "60x" ) ;; #:size "30x" - (iup:button "Clean Test" #:action remove-test #:size "60x" ))) ;; #:size "30x" - (iup:hbox - ;; hiup:split ;; hbox - ;; #:orientation "HORIZONTAL" - ;; #:value 300 - command-text-box - command-launch-button))) - (iup:vbox - (let ((tabs (iup:tabs - steps-matrix - data-matrix))) - (iup:attribute-set! tabs "TABTITLE0" "Test Steps") - (iup:attribute-set! tabs "TABTITLE1" "Test Data") - tabs))))) - -;; Test browser -(define (tests window-id) - (iup:split - (let* ((tb (iup:treebox - #:selection-cb - (lambda (obj id state) - ;; (print "obj: " obj ", id: " id ", state: " state) - (let* ((run-path (tree:node->path obj id)) - (test-id (tree-path->test-id (cdr run-path)))) - (if test-id - (hash-table-set! (dboard:data-curr-test-ids *data*) - window-id test-id)) - (print "path: " (tree:node->path obj id) " test-id: " test-id)))))) - (iup:attribute-set! tb "VALUE" "0") - (iup:attribute-set! tb "NAME" "Runs") - ;;(iup:attribute-set! tb "ADDEXPANDED" "NO") - (dboard:data-tests-tree-set! *data* tb) - tb) - (test-panel window-id))) - -;; The function to update the fields in the test view panel -(define (test-update window-id testdat run-info-matrix test-info-matrix test-run-matrix meta-dat-matrix steps-matrix data-matrix) - ;; get test-id - ;; then get test record - (if testdat - (let* ((test-id (hash-table-ref/default (dboard:data-curr-test-ids *data*) window-id #f)) - (test-data (hash-table-ref/default testdat test-id #f)) - (run-id (db:test-get-run_id test-data)) - (targ/runname (hash-table-ref/default (dboard:data-run-keys *data*) - run-id - '())) - (target (if (null? targ/runname) "" (string-intersperse (reverse (cdr (reverse targ/runname))) "/"))) - (runname (if (null? targ/runname) "" (car (cdr targ/runname)))) - (steps-dat (tests:get-compressed-steps *dbstruct-local* run-id test-id))) - - (if test-data - (begin - ;; - (for-each - (lambda (data) - (let ((mat (car data)) - (vals (cadr data)) - (rownum 1)) - (for-each - (lambda (key) - (let ((cell (conc rownum ":1"))) - (if (not (equal? (iup:attribute mat cell)(conc key))) - (begin - ;; (print "setting cell " cell " in matrix " mat " to value " key) - (iup:attribute-set! mat cell (conc key)) - (iup:attribute-set! mat "REDRAW" cell))) - (set! rownum (+ rownum 1)))) - vals))) - (list - (list run-info-matrix - (if test-id - (list (db:test-get-run_id test-data) - target - runname - "n/a") - (make-list 4 ""))) - (list test-info-matrix - (if test-id - (list test-id - (db:test-get-testname test-data) - (db:test-get-item-path test-data) - (db:test-get-state test-data) - (db:test-get-status test-data) - (seconds->string (db:test-get-event_time test-data)) - (db:test-get-comment test-data)) - (make-list 7 ""))) - (list test-run-matrix - (if test-id - (list (db:test-get-host test-data) - (db:test-get-uname test-data) - (db:test-get-diskfree test-data) - (db:test-get-cpuload test-data) - (seconds->hr-min-sec (db:test-get-run_duration test-data))) - (make-list 5 ""))) - )) - (dcommon:populate-steps steps-dat steps-matrix)))))) - ;;(list meta-dat-matrix - ;; (if test-id - ;; (list ( - - -;; db:test-get-id -;; db:test-get-run_id -;; db:test-get-testname -;; db:test-get-state -;; db:test-get-status -;; db:test-get-event_time -;; db:test-get-host -;; db:test-get-cpuload -;; db:test-get-diskfree -;; db:test-get-uname -;; db:test-get-rundir -;; db:test-get-item-path -;; db:test-get-run_duration -;; db:test-get-final_logf -;; db:test-get-comment -;; db:test-get-fullname - - -;;====================================================================== -;; R U N C O N T R O L -;;====================================================================== - -;; Overall runs browser -;; -(define (runs window-id) - (let* ((runs-matrix (iup:matrix - #:expand "YES" - ;; #:fittosize "YES" - #:scrollbar "YES" - #:numcol 100 - #:numlin 100 - #:numcol-visible 7 - #:numlin-visible 7 - #:click-cb (lambda (obj lin col status) - (print "obj: " obj " lin: " lin " col: " col " status: " status))))) - - (iup:attribute-set! runs-matrix "RESIZEMATRIX" "YES") - (iup:attribute-set! runs-matrix "WIDTH0" "100") - - (dboard:data-runs-matrix-set! *data* runs-matrix) - (iup:hbox - (iup:frame - #:title "Runs browser" - (iup:vbox - runs-matrix))))) - -;; Browse and control a single run -;; -(define (runcontrol window-id) - (iup:hbox)) - -;;====================================================================== -;; D A S H B O A R D -;;====================================================================== - -;; Main Panel -(define (main-panel window-id) - (iup:dialog - #:title "Megatest Control Panel" - #:menu (dcommon:main-menu) - #:shrink "YES" - (let ((tabtop (iup:tabs - (runs window-id) - (tests window-id) - (runcontrol window-id) - (mtest window-id) - (rconfig window-id) - ))) - (iup:attribute-set! tabtop "TABTITLE0" "Runs") - (iup:attribute-set! tabtop "TABTITLE1" "Tests") - (iup:attribute-set! tabtop "TABTITLE2" "Run Control") - (iup:attribute-set! tabtop "TABTITLE3" "megatest.config") - (iup:attribute-set! tabtop "TABTITLE4" "runconfigs.config") - tabtop))) - -(define *current-window-id* 0) - -(define (newdashboard dbstruct) - (let* ((data (make-hash-table)) - (keys (db:get-keys dbstruct)) - (runname "%") - (testpatt "%") - (keypatts (map (lambda (k)(list k "%")) keys)) - (states '()) - (statuses '()) - (nextmintime (current-milliseconds)) - (my-window-id *current-window-id*)) - (set! *current-window-id* (+ 1 *current-window-id*)) - (dboard:data-runs-set! *data* data) ;; make this data available to the rest of the application - (iup:show (main-panel my-window-id)) - ;; Yes, running iup:show will pop up a new panel - ;; (iup:show (main-panel my-window-id)) - (iup:callback-set! *tim* - "ACTION_CB" - (lambda (x) - ;; Want to dedicate no more than 50% of the time to this so skip if - ;; 2x delta time has not passed since last query - (if (< nextmintime (current-milliseconds)) - (let* ((starttime (current-milliseconds)) - (changes (dcommon:run-update keys data runname keypatts testpatt states statuses 'full my-window-id)) - (endtime (current-milliseconds))) - (set! nextmintime (+ endtime (* 2 (- endtime starttime)))) - (debug:print 11 *default-log-port* "CHANGE(S): " (car changes) "...")) - (debug:print-info 11 *default-log-port* "Server overloaded")))))) - -(dboard:data-updaters-set! *data* (make-hash-table)) -(newdashboard *dbstruct-local*) -(iup:main-loop) ADDED oldsrc/debugger.scm Index: oldsrc/debugger.scm ================================================================== --- /dev/null +++ oldsrc/debugger.scm @@ -0,0 +1,73 @@ +(use iup) + +(define *debugger-control* #f) +(define *debugger-rownum* 0) +(define *debugger-matrix* #f) +(define *debugger* #f) + +(define (debugger) + (if (not *debugger*) + (set! *debugger* + (thread-start! + (make-thread + (lambda () + (show + (dialog + (let ((pause #f) + (mtrx (matrix + #:expand "YES" + #:numlin 30 + #:numcol 3 + #:numlin-visible 20 + #:numcol-visible 2 + #:alignment1 "ALEFT" + ))) + (set! pause (button "Pause" + #:action (lambda (obj) + (set! *debugger-control* (not *debugger-control*)) + (attribute-set! pause "BGCOLOR" (if *debugger-control* + "200 0 0" + "0 0 200"))))) + (set! *debugger-matrix* mtrx) + (attribute-set! mtrx "WIDTH1" "300") + (vbox + mtrx + (hbox + pause))))) + (main-loop))))))) + +(define (debugger-start #!key (start 2)) + (set! *debugger-rownum* start)) + +(define (debugger-trace-var varname varval) + (let ((oldval (attribute *debugger-matrix* (conc *debugger-rownum* ":1"))) + (newval (conc varval))) + (if (not (equal? oldval newval)) + (begin + ;; (print "DEBUG: " varname " = " newval) + (attribute-set! *debugger-matrix* (conc *debugger-rownum* ":0") varname) + (attribute-set! *debugger-matrix* (conc *debugger-rownum* ":1") (conc varval)) + ;; (attribute-set! *debugger-matrix* "FITTOTEXT" "C1") + )) + (set! *debugger-rownum* (+ *debugger-rownum* 1)))) + + +(define (debugger-pauser) + (debugger) + (attribute-set! *debugger-matrix* "REDRAW" "ALL") + (let loop () + (if *debugger-control* + (begin + (print "PAUSED!") + (thread-sleep! 1) + (loop)) + ;;(thread-sleep! 0.01) + ))) + +;; ;; lets use the debugger eh? +;; (debugger-start) +;; (debugger-trace-var "can-run-more" can-run-more) +;; (debugger-trace-var "hed" hed) +;; (debugger-trace-var "prereqs-not-met" (runs:pretty-string prereqs-not-met)) +;; (debugger-pauser) + ADDED oldsrc/newdashboard.scm Index: oldsrc/newdashboard.scm ================================================================== --- /dev/null +++ oldsrc/newdashboard.scm @@ -0,0 +1,635 @@ +;;====================================================================== +;; Copyright 2006-2013, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. +;;====================================================================== + +(use format numbers) +(require-library iup) +(import (prefix iup iup:)) +(use canvas-draw) + +(use sqlite3 srfi-1 posix regex regex-case srfi-69) +(import (prefix sqlite3 sqlite3:)) + +(declare (uses margs)) +(declare (uses launch)) +(declare (uses megatest-version)) +(declare (uses gutils)) +(declare (uses db)) +(declare (uses server)) +(declare (uses synchash)) +(declare (uses dcommon)) +(declare (uses tree)) + +(include "common_records.scm") +(include "db_records.scm") +(include "key_records.scm") + +(define help (conc +"Megatest Dashboard, documentation at http://www.kiatoa.com/fossils/megatest + version " megatest-version " + license GPL, Copyright (C) Matt Welland 2011 + +Usage: dashboard [options] + -h : this help + -server host:port : connect to host:port instead of db access + -test testid : control test identified by testid + -guimonitor : control panel for runs + +Misc + -rows N : set number of rows +")) + +;; process args +(define remargs (args:get-args + (argv) + (list "-rows" + "-run" + "-test" + "-debug" + "-host" + ) + (list "-h" + "-guimonitor" + "-main" + "-v" + "-q" + ) + args:arg-hash + 0)) + +(if (args:get-arg "-h") + (begin + (print help) + (exit))) + +(if (not (launch:setup)) + (begin + (print "Failed to find megatest.config, exiting") + (exit 1))) + +;; (if (args:get-arg "-host") +;; (begin +;; (set! *runremote* (string-split (args:get-arg "-host" ":"))) +;; (client:launch)) +;; (client:launch)) + +;; ease debugging by loading ~/.dashboardrc +(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.dashboardrc"))) + (if (file-exists? debugcontrolf) + (load debugcontrolf))) + +(define *dbdir* (db:dbfile-path #f)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db")) +(define *dbstruct-local* (make-dbr:dbstruct path: *dbdir* + local: #t)) +(define *db-file-path* (db:dbfile-path 0)) + +;; HACK ALERT: this is a hack, please fix. +(define *read-only* (not (file-read-access? *db-file-path*))) + +(debug:setup) + +(define *tim* (iup:timer)) +(define *ord* #f) + +(iup:attribute-set! *tim* "TIME" 300) +(iup:attribute-set! *tim* "RUN" "YES") + +(define (message-window msg) + (iup:show + (iup:dialog + (iup:vbox + (iup:label msg #:margin "40x40"))))) + +(define (iuplistbox-fill-list lb items . default) + (let ((i 1) + (selected-item (if (null? default) #f (car default)))) + (iup:attribute-set! lb "VALUE" (if selected-item selected-item "")) + (for-each (lambda (item) + (iup:attribute-set! lb (number->string i) item) + (if selected-item + (if (equal? selected-item item) + (iup:attribute-set! lb "VALUE" item))) ;; (number->string i)))) + (set! i (+ i 1))) + items) + i)) + +(define (pad-list l n)(append l (make-list (- n (length l))))) + + +(define (mkstr . x) + (string-intersperse (map conc x) ",")) + +(define (update-search x val) + (hash-table-set! *searchpatts* x val)) + +;; mtest is actually the megatest.config file +;; +(define (mtest window-id) + (let* ((curr-row-num 0) + (rawconfig (read-config (conc *toppath* "/megatest.config") #f 'return-string)) + (keys-matrix (dcommon:keys-matrix rawconfig)) + (setup-matrix (dcommon:section-matrix rawconfig "setup" "Varname" "Value")) + (jobtools-matrix (iup:matrix + #:expand "YES" + #:numcol 1 + #:numlin 5 + #:numcol-visible 1 + #:numlin-visible 3)) + (validvals-matrix (iup:matrix + #:expand "YES" + #:numcol 1 + #:numlin 2 + #:numcol-visible 1 + #:numlin-visible 2)) + (envovrd-matrix (iup:matrix + #:expand "YES" + #:numcol 1 + #:numlin 20 + #:numcol-visible 1 + #:numlin-visible 8)) + (disks-matrix (iup:matrix + #:expand "YES" + #:numcol 1 + #:numlin 20 + #:numcol-visible 1 + #:numlin-visible 8)) + ) + (iup:attribute-set! disks-matrix "0:0" "Disk Name") + (iup:attribute-set! disks-matrix "0:1" "Disk Path") + (iup:attribute-set! disks-matrix "WIDTH1" "120") + (iup:attribute-set! disks-matrix "WIDTH0" "100") + (iup:attribute-set! disks-matrix "ALIGNMENT1" "ALEFT") + (iup:attribute-set! disks-matrix "FIXTOTEXT" "C1") + (iup:attribute-set! disks-matrix "RESIZEMATRIX" "YES") + + ;; fill in existing info + (for-each + (lambda (mat fname) + (set! curr-row-num 1) + (for-each + (lambda (var) + (iup:attribute-set! mat (conc curr-row-num ":0") var) + (iup:attribute-set! mat (conc curr-row-num ":1") (config-lookup rawconfig fname var)) + (set! curr-row-num (+ curr-row-num 1))) + (configf:section-vars rawconfig fname))) + (list setup-matrix jobtools-matrix validvals-matrix envovrd-matrix disks-matrix) + (list "setup" "jobtools" "validvalues" "env-override" "disks")) + + (for-each + (lambda (mat) + (iup:attribute-set! mat "0:1" "Value") + (iup:attribute-set! mat "0:0" "Var") + (iup:attribute-set! mat "ALIGNMENT1" "ALEFT") + (iup:attribute-set! mat "FIXTOTEXT" "C1") + (iup:attribute-set! mat "RESIZEMATRIX" "YES") + (iup:attribute-set! mat "WIDTH1" "120") + (iup:attribute-set! mat "WIDTH0" "100") + ) + (list setup-matrix jobtools-matrix validvals-matrix envovrd-matrix)) + + (iup:attribute-set! validvals-matrix "WIDTH1" "290") + (iup:attribute-set! envovrd-matrix "WIDTH1" "290") + + (iup:vbox + (iup:hbox + + (iup:vbox + (let ((tabs (iup:tabs + ;; The required tab + (iup:hbox + ;; The keys + (iup:frame + #:title "Keys (required)" + (iup:vbox + (iup:label (conc "Set the fields for organising your runs\n" + "here. Note: can only be changed before\n" + "running the first run when megatest.db\n" + "is created.")) + keys-matrix)) + (iup:vbox + ;; The setup section + (iup:frame + #:title "Setup" + (iup:vbox + (iup:label (conc "max_concurrent_jobs : limits total concurrent jobs (optional)\n" + "linktree : directory where linktree will be created.")) + setup-matrix)) + ;; The jobtools + (iup:frame + #:title "Jobtools" + (iup:vbox + (iup:label (conc "launcher : tool or script to run jobs (try nbfake)\n" + "useshell : use system to run your launcher\n" + "workhosts : spread jobs out on these hosts")) + jobtools-matrix)) + ;; The disks + (iup:frame + #:title "Disks" + (iup:vbox + (iup:label (conc "Enter names and existing paths of locations to run tests")) + disks-matrix)))) + ;; The optional tab + (iup:vbox + ;; The Environment Overrides + (iup:frame + #:title "Env override" + envovrd-matrix) + ;; The valid values + (iup:frame + #:title "Validvalues" + validvals-matrix) + )))) + (iup:attribute-set! tabs "TABTITLE0" "Required settings") + (iup:attribute-set! tabs "TABTITLE1" "Optional settings") + tabs)) + )))) + +;; The runconfigs.config file +;; +(define (rconfig window-id) + (iup:vbox + (iup:frame #:title "Default"))) + +;;====================================================================== +;; T E S T S +;;====================================================================== + +(define (tree-path->test-id path) + (if (not (null? path)) + (hash-table-ref/default (dboard:data-path-test-ids *data*) path #f) + #f)) + +(define (test-panel window-id) + (let* ((curr-row-num 0) + (viewlog (lambda (x) + (if (file-exists? logfile) + ;(system (conc "firefox " logfile "&")) + (iup:send-url logfile) + (message-window (conc "File " logfile " not found"))))) + (xterm (lambda (x) + (if (directory-exists? rundir) + (let ((shell (if (get-environment-variable "SHELL") + (conc "-e " (get-environment-variable "SHELL")) + ""))) + (system (conc "cd " rundir + ";xterm -T \"" (string-translate testfullname "()" " ") "\" " shell "&"))) + (message-window (conc "Directory " rundir " not found"))))) + (command-text-box (iup:textbox #:expand "HORIZONTAL" #:font "Courier New, -12")) + (command-launch-button (iup:button "Execute!" + ;; #:expand "HORIZONTAL" + #:size "50x" + #:action (lambda (x) + (let ((cmd (iup:attribute command-text-box "VALUE"))) + (system (conc cmd " &")))))) + (run-test (lambda (x) + (iup:attribute-set! + command-text-box "VALUE" + (conc "xterm -geometry 180x20 -e \"megatest -target " keystring " :runname " runname + " -runtests " (conc testname "/" (if (equal? item-path "") + "%" + item-path)) + ";echo Press any key to continue;bash -c 'read -n 1 -s'\"")))) + (remove-test (lambda (x) + (iup:attribute-set! + command-text-box "VALUE" + (conc "xterm -geometry 180x20 -e \"megatest -remove-runs -target " keystring " :runname " runname + " -testpatt " (conc testname "/" (if (equal? item-path "") + "%" + item-path)) + " -v;echo Press any key to continue;bash -c 'read -n 1 -s'\"")))) + (run-info-matrix (iup:matrix + #:expand "YES" + ;; #:scrollbar "YES" + #:numcol 1 + #:numlin 4 + #:numcol-visible 1 + #:numlin-visible 4 + #:click-cb (lambda (obj lin col status) + (print "obj: " obj " lin: " lin " col: " col " status: " status)))) + (test-info-matrix (iup:matrix + #:expand "YES" + #:numcol 1 + #:numlin 7 + #:numcol-visible 1 + #:numlin-visible 7)) + (test-run-matrix (iup:matrix + #:expand "YES" + #:numcol 1 + #:numlin 5 + #:numcol-visible 1 + #:numlin-visible 5)) + (meta-dat-matrix (iup:matrix + #:expand "YES" + #:numcol 1 + #:numlin 5 + #:numcol-visible 1 + #:numlin-visible 5)) + (steps-matrix (iup:matrix + #:expand "YES" + #:numcol 6 + #:numlin 50 + #:numcol-visible 6 + #:numlin-visible 8)) + (data-matrix (iup:matrix + #:expand "YES" + #:numcol 8 + #:numlin 50 + #:numcol-visible 8 + #:numlin-visible 8)) + (updater (lambda (testdat) + (test-update window-id testdat run-info-matrix test-info-matrix test-run-matrix meta-dat-matrix steps-matrix data-matrix)))) + + ;; Set the updater in updaters + (hash-table-set! (dboard:data-updaters *data*) window-id updater) + ;; + (for-each + (lambda (mat) + ;; (iup:attribute-set! mat "0:1" "Value") + ;; (iup:attribute-set! mat "0:0" "Var") + (iup:attribute-set! mat "HEIGHT0" 0) + (iup:attribute-set! mat "ALIGNMENT1" "ALEFT") + ;; (iup:attribute-set! mat "FIXTOTEXT" "C1") + (iup:attribute-set! mat "RESIZEMATRIX" "YES")) + ;; (iup:attribute-set! mat "WIDTH1" "120") + ;; (iup:attribute-set! mat "WIDTH0" "100")) + (list run-info-matrix test-info-matrix test-run-matrix meta-dat-matrix)) + + ;; Steps matrix + (iup:attribute-set! steps-matrix "0:1" "Step Name") + (iup:attribute-set! steps-matrix "0:2" "Start") + (iup:attribute-set! steps-matrix "WIDTH2" "40") + (iup:attribute-set! steps-matrix "0:3" "End") + (iup:attribute-set! steps-matrix "WIDTH3" "40") + (iup:attribute-set! steps-matrix "0:4" "Status") + (iup:attribute-set! steps-matrix "WIDTH4" "40") + (iup:attribute-set! steps-matrix "0:5" "Duration") + (iup:attribute-set! steps-matrix "WIDTH5" "40") + (iup:attribute-set! steps-matrix "0:6" "Log File") + (iup:attribute-set! steps-matrix "ALIGNMENT1" "ALEFT") + ;; (iup:attribute-set! steps-matrix "FIXTOTEXT" "C1") + (iup:attribute-set! steps-matrix "RESIZEMATRIX" "YES") + ;; (iup:attribute-set! steps-matrix "WIDTH1" "120") + ;; (iup:attribute-set! steps-matrix "WIDTH0" "100") + + ;; Data matrix + ;; + (let ((rownum 1)) + (for-each + (lambda (x) + (iup:attribute-set! data-matrix (conc "0:" rownum) x) + (iup:attribute-set! data-matrix (conc "WIDTH" rownum) "50") + (set! rownum (+ rownum 1))) + (list "Category" "Variable" "Value" "Expected" "Tolerance" "Status" "Units" "Type" "Comment"))) + (iup:attribute-set! data-matrix "REDRAW" "ALL") + + (for-each + (lambda (data) + (let ((mat (car data)) + (keys (cadr data)) + (rownum 1)) + (for-each + (lambda (key) + (iup:attribute-set! mat (conc rownum ":0") key) + (set! rownum (+ rownum 1))) + keys) + (iup:attribute-set! mat "REDRAW" "ALL"))) + (list + (list run-info-matrix '("Run Id" "Target" "Runname" "Run Start Time" )) + (list test-info-matrix '("Test Id" "Testname" "Itempath" "State" "Status" "Test Start Time" "Comment")) + (list test-run-matrix '("Hostname" "Host info" "Disk Free" "CPU Load" "Run Duration")) + (list meta-dat-matrix '("Author" "Owner" "Last Reviewed" "Tags" "Description")))) + + (iup:split + #:orientation "HORIZONTAL" + (iup:vbox + (iup:hbox + (iup:vbox + run-info-matrix + test-info-matrix) + ;; test-info-matrix) + (iup:vbox + test-run-matrix + meta-dat-matrix)) + (iup:vbox + (iup:vbox + (iup:hbox + (iup:button "View Log" #:action viewlog #:size "60x" ) ;; #:size "30x" + (iup:button "Start Xterm" #:action xterm #:size "60x" )) ;; #:size "30x" + (iup:hbox + (iup:button "Run Test" #:action run-test #:size "60x" ) ;; #:size "30x" + (iup:button "Clean Test" #:action remove-test #:size "60x" ))) ;; #:size "30x" + (iup:hbox + ;; hiup:split ;; hbox + ;; #:orientation "HORIZONTAL" + ;; #:value 300 + command-text-box + command-launch-button))) + (iup:vbox + (let ((tabs (iup:tabs + steps-matrix + data-matrix))) + (iup:attribute-set! tabs "TABTITLE0" "Test Steps") + (iup:attribute-set! tabs "TABTITLE1" "Test Data") + tabs))))) + +;; Test browser +(define (tests window-id) + (iup:split + (let* ((tb (iup:treebox + #:selection-cb + (lambda (obj id state) + ;; (print "obj: " obj ", id: " id ", state: " state) + (let* ((run-path (tree:node->path obj id)) + (test-id (tree-path->test-id (cdr run-path)))) + (if test-id + (hash-table-set! (dboard:data-curr-test-ids *data*) + window-id test-id)) + (print "path: " (tree:node->path obj id) " test-id: " test-id)))))) + (iup:attribute-set! tb "VALUE" "0") + (iup:attribute-set! tb "NAME" "Runs") + ;;(iup:attribute-set! tb "ADDEXPANDED" "NO") + (dboard:data-tests-tree-set! *data* tb) + tb) + (test-panel window-id))) + +;; The function to update the fields in the test view panel +(define (test-update window-id testdat run-info-matrix test-info-matrix test-run-matrix meta-dat-matrix steps-matrix data-matrix) + ;; get test-id + ;; then get test record + (if testdat + (let* ((test-id (hash-table-ref/default (dboard:data-curr-test-ids *data*) window-id #f)) + (test-data (hash-table-ref/default testdat test-id #f)) + (run-id (db:test-get-run_id test-data)) + (targ/runname (hash-table-ref/default (dboard:data-run-keys *data*) + run-id + '())) + (target (if (null? targ/runname) "" (string-intersperse (reverse (cdr (reverse targ/runname))) "/"))) + (runname (if (null? targ/runname) "" (car (cdr targ/runname)))) + (steps-dat (tests:get-compressed-steps *dbstruct-local* run-id test-id))) + + (if test-data + (begin + ;; + (for-each + (lambda (data) + (let ((mat (car data)) + (vals (cadr data)) + (rownum 1)) + (for-each + (lambda (key) + (let ((cell (conc rownum ":1"))) + (if (not (equal? (iup:attribute mat cell)(conc key))) + (begin + ;; (print "setting cell " cell " in matrix " mat " to value " key) + (iup:attribute-set! mat cell (conc key)) + (iup:attribute-set! mat "REDRAW" cell))) + (set! rownum (+ rownum 1)))) + vals))) + (list + (list run-info-matrix + (if test-id + (list (db:test-get-run_id test-data) + target + runname + "n/a") + (make-list 4 ""))) + (list test-info-matrix + (if test-id + (list test-id + (db:test-get-testname test-data) + (db:test-get-item-path test-data) + (db:test-get-state test-data) + (db:test-get-status test-data) + (seconds->string (db:test-get-event_time test-data)) + (db:test-get-comment test-data)) + (make-list 7 ""))) + (list test-run-matrix + (if test-id + (list (db:test-get-host test-data) + (db:test-get-uname test-data) + (db:test-get-diskfree test-data) + (db:test-get-cpuload test-data) + (seconds->hr-min-sec (db:test-get-run_duration test-data))) + (make-list 5 ""))) + )) + (dcommon:populate-steps steps-dat steps-matrix)))))) + ;;(list meta-dat-matrix + ;; (if test-id + ;; (list ( + + +;; db:test-get-id +;; db:test-get-run_id +;; db:test-get-testname +;; db:test-get-state +;; db:test-get-status +;; db:test-get-event_time +;; db:test-get-host +;; db:test-get-cpuload +;; db:test-get-diskfree +;; db:test-get-uname +;; db:test-get-rundir +;; db:test-get-item-path +;; db:test-get-run_duration +;; db:test-get-final_logf +;; db:test-get-comment +;; db:test-get-fullname + + +;;====================================================================== +;; R U N C O N T R O L +;;====================================================================== + +;; Overall runs browser +;; +(define (runs window-id) + (let* ((runs-matrix (iup:matrix + #:expand "YES" + ;; #:fittosize "YES" + #:scrollbar "YES" + #:numcol 100 + #:numlin 100 + #:numcol-visible 7 + #:numlin-visible 7 + #:click-cb (lambda (obj lin col status) + (print "obj: " obj " lin: " lin " col: " col " status: " status))))) + + (iup:attribute-set! runs-matrix "RESIZEMATRIX" "YES") + (iup:attribute-set! runs-matrix "WIDTH0" "100") + + (dboard:data-runs-matrix-set! *data* runs-matrix) + (iup:hbox + (iup:frame + #:title "Runs browser" + (iup:vbox + runs-matrix))))) + +;; Browse and control a single run +;; +(define (runcontrol window-id) + (iup:hbox)) + +;;====================================================================== +;; D A S H B O A R D +;;====================================================================== + +;; Main Panel +(define (main-panel window-id) + (iup:dialog + #:title "Megatest Control Panel" + #:menu (dcommon:main-menu) + #:shrink "YES" + (let ((tabtop (iup:tabs + (runs window-id) + (tests window-id) + (runcontrol window-id) + (mtest window-id) + (rconfig window-id) + ))) + (iup:attribute-set! tabtop "TABTITLE0" "Runs") + (iup:attribute-set! tabtop "TABTITLE1" "Tests") + (iup:attribute-set! tabtop "TABTITLE2" "Run Control") + (iup:attribute-set! tabtop "TABTITLE3" "megatest.config") + (iup:attribute-set! tabtop "TABTITLE4" "runconfigs.config") + tabtop))) + +(define *current-window-id* 0) + +(define (newdashboard dbstruct) + (let* ((data (make-hash-table)) + (keys (db:get-keys dbstruct)) + (runname "%") + (testpatt "%") + (keypatts (map (lambda (k)(list k "%")) keys)) + (states '()) + (statuses '()) + (nextmintime (current-milliseconds)) + (my-window-id *current-window-id*)) + (set! *current-window-id* (+ 1 *current-window-id*)) + (dboard:data-runs-set! *data* data) ;; make this data available to the rest of the application + (iup:show (main-panel my-window-id)) + ;; Yes, running iup:show will pop up a new panel + ;; (iup:show (main-panel my-window-id)) + (iup:callback-set! *tim* + "ACTION_CB" + (lambda (x) + ;; Want to dedicate no more than 50% of the time to this so skip if + ;; 2x delta time has not passed since last query + (if (< nextmintime (current-milliseconds)) + (let* ((starttime (current-milliseconds)) + (changes (dcommon:run-update keys data runname keypatts testpatt states statuses 'full my-window-id)) + (endtime (current-milliseconds))) + (set! nextmintime (+ endtime (* 2 (- endtime starttime)))) + (debug:print 11 *default-log-port* "CHANGE(S): " (car changes) "...")) + (debug:print-info 11 *default-log-port* "Server overloaded")))))) + +(dboard:data-updaters-set! *data* (make-hash-table)) +(newdashboard *dbstruct-local*) +(iup:main-loop) Index: remotediff-nmsg.scm ================================================================== --- remotediff-nmsg.scm +++ remotediff-nmsg.scm @@ -38,10 +38,15 @@ #f) (loop (read-line inp))))))) (define *max-running* 40) +(define my-mutex-lock! conc) +(define my-mutex-unlock! conc) +;; (define my-mutex-lock! mutex-lock!) +;; (define my-mutex-unlock! mutex-unlock!) + (define (gather-dir-info path) (let ((mtx1 (make-mutex)) (threads (make-hash-table)) (last-num 0) (req (nn-socket 'req))) @@ -58,47 +63,47 @@ ((directory? p) '(dir)) ((symbolic-link? p) (list 'symlink (read-symbolic-link p))) (else '(data))))) (if (eq? (car info) 'data) (let loop ((start-time (current-seconds))) - (mutex-lock! mtx1) + (my-mutex-lock! mtx1) (let* ((num-threads (hash-table-size threads)) (ok-to-run (> *max-running* num-threads))) ;; (if (> (abs (- num-threads last-num)) 2) ;; (begin ;; ;; (print "num-threads:" num-threads) ;; (set! last-num num-threads))) - (mutex-unlock! mtx1) + (my-mutex-unlock! mtx1) (if ok-to-run (let ((run-time-start (current-seconds))) ;; (print "num threads: " num-threads) (let ((th1 (make-thread (lambda () (let ((cksum (checksum mtx1 p cmd: "md5sum")) (run-time (- (current-seconds) run-time-start))) - (mutex-lock! mtx1) + (my-mutex-lock! mtx1) (client-send-receive req (conc p " " cksum)) - (mutex-unlock! mtx1)) + (my-mutex-unlock! mtx1)) (let loop2 () - (mutex-lock! mtx1) + (my-mutex-lock! mtx1) (let ((registered (hash-table-exists? threads p))) (if registered (begin ;; (print "deleting thread reference for " p) (hash-table-delete! threads p))) ;; delete myself - (mutex-unlock! mtx1) + (my-mutex-unlock! mtx1) (if (not registered) (begin (thread-sleep! 0.5) (loop2)))))) p))) (thread-start! th1) ;; (thread-sleep! 0.05) ;; give things a little time to get going ;; (thread-join! th1) ;; - (mutex-lock! mtx1) + (my-mutex-lock! mtx1) (hash-table-set! threads p th1) - (mutex-unlock! mtx1) + (my-mutex-unlock! mtx1) )) ;; thread is launched (let ((run-time (- (current-seconds) start-time))) ;; couldn't launch yet (cond ((< run-time 5)) ;; blast on through ((< run-time 30)(thread-sleep! 0.1)) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -6,172 +6,184 @@ ;; ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. ;;====================================================================== - -(use json format) ;; RADT => purpose of json format?? +;; +(use format typed-records) ;; RADT => purpose of json format?? (declare (unit rmt)) (declare (uses api)) (declare (uses tdb)) (declare (uses http-transport)) +(declare (uses rpc-transport)) ;;(declare (uses nmsg-transport)) +(include "common_records.scm") + ;; ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! ;; -;; ;; For debugging add the following to ~/.megatestrc -;; -;; (require-library trace) -;; (import trace) -;; (trace -;; rmt:send-receive -;; api:execute-requests -;; ) - ;; generate entries for ~/.megatestrc with the following ;; ;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u +(defstruct remote + (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) + (server-url (if *toppath* (server:read-dotserver *toppath*))) ;; (server:check-if-running *toppath*) #f)) + (last-server-check 0) ;; last time we checked to see if the server was alive + (conndat #f) + (transport *transport-type*) + (server-timeout (or (server:get-timeout) 100))) ;; default to 100 seconds ;;====================================================================== ;; S U P P O R T F U N C T I O N S ;;====================================================================== -;; -(define (rmt:write-frequency-over-limit? cmd run-id) - (and (not (member cmd api:read-only-queries)) - (let* ((tmprec (hash-table-ref/default *write-frequency* run-id #f)) - (record (if tmprec tmprec - (let ((v (vector (current-seconds) 0))) - (hash-table-set! *write-frequency* run-id v) - v))) - (count (+ 1 (vector-ref record 1))) - (start (vector-ref record 0)) - (queries-per-second (/ (* count 1.0) - (max (- (current-seconds) start) 1)))) - (vector-set! record 1 count) - (if (and (> count 10) - (> queries-per-second 10)) - (begin - (debug:print-info 1 *default-log-port* "db write rate too high, starting a server, count=" count " start=" start " run-id=" run-id " queries-per-second=" queries-per-second) - #t) - #f)))) - ;; if a server is either running or in the process of starting call client:setup ;; else return #f to let the calling proc know that there is no server available ;; (define (rmt:get-connection-info run-id) - (let ((cinfo (hash-table-ref/default *runremote* run-id #f))) + (let ((cinfo (remote-conndat *runremote*))) (if cinfo cinfo - ;; NB// can cache the answer for server running for 10 seconds ... - ;; ;; (and (not (rmt:write-frequency-over-limit? cmd run-id)) (if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id) (client:setup run-id) #f)))) (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected - ;; clean out old connections - ;; (mutex-lock! *db-multi-sync-mutex*) - (let ((expire-time (- (current-seconds) (server:get-timeout) 10))) ;; don't forget the 10 second margin - (for-each - (lambda (run-id) - (let ((connection (hash-table-ref/default *runremote* run-id #f))) - (if (and (vector? connection) - (< (http-transport:server-dat-get-last-access connection) expire-time)) - (begin - (debug:print-info 0 *default-log-port* "Discarding connection to server for run-id " run-id ", too long between accesses") - ;; bb- disabling nanomsg - ;; SHOULD CLOSE THE CONNECTION HERE - ;; (case *transport-type* - ;; ((nmsg)(nn-close (http-transport:server-dat-get-socket - ;; (hash-table-ref *runremote* run-id))))) - (hash-table-delete! *runremote* run-id))))) - (hash-table-keys *runremote*))) - ;; (mutex-unlock! *db-multi-sync-mutex*) - ;; (mutex-lock! *send-receive-mutex*) - (let* ((run-id (if rid rid 0)) - (connection-info (rmt:get-connection-info run-id))) - ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also) - (if connection-info - ;; use the server if have connection info - (let* ((dat (case *transport-type* - ((http)(condition-case - (http-transport:client-api-send-receive run-id connection-info cmd params) - ((commfail)(vector #f "communications fail")) - ((exn)(vector #f "other fail")))) - ;; ((nmsg)(condition-case - ;; (nmsg-transport:client-api-send-receive run-id connection-info cmd params) - ;; ((timeout)(vector #f "timeout talking to server")))) - (else (exit)))) - (success (if (vector? dat) (vector-ref dat 0) #f)) - (res (if (vector? dat) (vector-ref dat 1) #f))) - (if (vector? connection-info)(http-transport:server-dat-update-last-access connection-info)) - (if success - (begin - ;; (mutex-unlock! *send-receive-mutex*) - (case *transport-type* - ((http) res) ;; (db:string->obj res)) - ;; ((nmsg) res) - )) ;; (vector-ref res 1))) - (begin ;; let ((new-connection-info (client:setup run-id))) - (debug:print 0 *default-log-port* "WARNING: Communication failed, trying call to rmt:send-receive again.") - ;; (case *transport-type* - ;; ((nmsg)(nn-close (http-transport:server-dat-get-socket connection-info)))) - (hash-table-delete! *runremote* run-id) ;; don't keep using the same connection - ;; NOTE: killing server causes this process to block forever. No idea why. Dec 2. - ;; (if (eq? (modulo attemptnum 5) 0) - ;; (tasks:kill-server-run-id run-id tag: "api-send-receive-failed")) - ;; (mutex-unlock! *send-receive-mutex*) ;; close the mutex here to allow other threads access to communications - (tasks:start-and-wait-for-server (tasks:open-db) run-id 15) - ;; (nmsg-transport:client-api-send-receive run-id connection-info cmd param remtries: (- remtries 1)))))) - - ;; no longer killing the server in http-transport:client-api-send-receive - ;; may kill it here but what are the criteria? - ;; start with three calls then kill server - ;; (if (eq? attemptnum 3)(tasks:kill-server-run-id run-id)) - ;; (thread-sleep! 2) - (rmt:send-receive cmd run-id params attemptnum: (+ attemptnum 1))))) - ;; no connection info? try to start a server, or access locally if no - ;; server and the query is read-only - ;; - ;; Note: The tasks db was checked for a server in starting mode in the rmt:get-connection-info call - ;; - (if (and (< attemptnum 15) - (member cmd api:write-queries)) - (let ((faststart (configf:lookup *configdat* "server" "faststart"))) - (hash-table-delete! *runremote* run-id) - ;; (mutex-unlock! *send-receive-mutex*) - (if (and faststart (equal? faststart "no")) - (begin - (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db)) run-id 10) - (thread-sleep! (random 5)) ;; give some time to settle and minimize collison? - (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) - (let ((start-time (current-milliseconds)) - (max-query (string->number (or (configf:lookup *configdat* "server" "server-query-threshold") - "300"))) - (newres (rmt:open-qry-close-locally cmd run-id params))) - (let ((delta (- (current-milliseconds) start-time))) - (if (> delta max-query) - (begin - (debug:print-info 0 *default-log-port* "Starting server as query time " delta " is over the limit of " max-query) - (server:kind-run run-id))) - ;; return the result! - newres) - ))) - (begin - ;; (debug:print-error 0 *default-log-port* "Communication failed!") - ;; (mutex-unlock! *send-receive-mutex*) - ;; (exit) - (rmt:open-qry-close-locally cmd run-id params) - ))))) + + ;; do all the prep locked under the rmt-mutex + (mutex-lock! *rmt-mutex*) + + ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in *runremote* + ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. + ;; 3. do the query, if on homehost use local access + ;; + (let* ((start-time (current-seconds))) ;; snapshot time so all use cases get same value + (cond + ;; give up if more than 15 attempts + ((> attemptnum 15) + (debug:print 0 *default-log-port* "ERROR: 15 tries to start/connect to server. Giving up.") + (exit 1)) + ;; reset the connection if it has been unused too long + ((and *runremote* + (remote-conndat *runremote*) + (let ((expire-time (- start-time (remote-server-timeout *runremote*)))) + (< (http-transport:server-dat-get-last-access (remote-conndat *runremote*)) expire-time))) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8") + (remote-conndat-set! *runremote* #f) + (mutex-unlock! *rmt-mutex*) + (rmt:send-receive cmd rid params attemptnum: attemptnum)) + ;; ensure we have a record for our connection for given area + ((not *runremote*) + (set! *runremote* (make-remote)) + (mutex-unlock! *rmt-mutex*) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 1") + (rmt:send-receive cmd rid params attemptnum: attemptnum)) + ;; ensure we have a homehost record + ((not (pair? (remote-hh-dat *runremote*))) ;; have a homehost record? + (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little + (remote-hh-dat-set! *runremote* (common:get-homehost)) + (mutex-unlock! *rmt-mutex*) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") + (rmt:send-receive cmd rid params attemptnum: attemptnum)) + ;; on homehost and this is a read + ((and (cdr (remote-hh-dat *runremote*)) ;; on homehost + (member cmd api:read-only-queries)) ;; this is a read + (mutex-unlock! *rmt-mutex*) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 3") + (rmt:open-qry-close-locally cmd 0 params)) + ;; on homehost and this is a write, we already have a server + ((and (cdr (remote-hh-dat *runremote*)) ;; on homehost + (not (member cmd api:read-only-queries)) ;; this is a write + (remote-server-url *runremote*)) ;; have a server + (mutex-unlock! *rmt-mutex*) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4") + (rmt:open-qry-close-locally cmd 0 params)) + ;; on homehost and this is a write, we have a server (we know because case 4 checked) + ((and (cdr (remote-hh-dat *runremote*)) ;; on homehost + (not (member cmd api:read-only-queries))) + (mutex-unlock! *rmt-mutex*) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") + (rmt:open-qry-close-locally cmd 0 params)) + ;; no server contact made and this is a write, passively start a server + ((and (not (remote-server-url *runremote*)) + (not (member cmd api:read-only-queries))) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") + (let ((serverconn (server:read-dotserver *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call + (if serverconn + (remote-server-url-set! *runremote* serverconn) ;; the string can be consumed by the client setup if needed + (if (not (server:start-attempted? *toppath*)) + (server:kind-run *toppath*)))) + (if (cdr (remote-hh-dat *runremote*)) ;; we are on the homehost, just do the call + (begin + (mutex-unlock! *rmt-mutex*) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5.1") + (rmt:open-qry-close-locally cmd 0 params)) + (begin ;; not on homehost, start server and wait + (mutex-unlock! *rmt-mutex*) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5.2") + (tasks:start-and-wait-for-server (tasks:open-db) 0 15) + (rmt:send-receive cmd rid params attemptnum: attemptnum)))) + ;; if not on homehost ensure we have a connection to a live server + ;; NOTE: we *have* a homehost record by now + ((and (not (cdr (remote-hh-dat *runremote*))) ;; are we on a homehost? + (not (remote-conndat *runremote*))) ;; and no connection + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6 hh-dat: " (remote-hh-dat *runremote*) " conndat: " (remote-conndat *runremote*)) + (mutex-unlock! *rmt-mutex*) + (tasks:start-and-wait-for-server (tasks:open-db) 0 15) + (let* ((cinfo (rmt:get-connection-info 0)) + (transport (if cinfo + (vector-ref cinfo 6) + (server:get-transport)))) ;; TODO: replace with tasks:server-dat-accessor-?? for transport + (remote-conndat-set! *runremote* cinfo) ;; calls client:setup which calls client:setup-http + (remote-transport-set! *runremote* transport)) + (rmt:send-receive cmd rid params attemptnum: attemptnum)) + ;; all set up if get this far, dispatch the query + ((cdr (remote-hh-dat *runremote*)) ;; we are on homehost + (mutex-unlock! *rmt-mutex*) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 7") + (rmt:open-qry-close-locally cmd (if rid rid 0) params)) + ;; not on homehost, do server query + (else + (mutex-unlock! *rmt-mutex*) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") + (let* ((conninfo (remote-conndat *runremote*)) + (dat (case (remote-transport *runremote*) + ((http) (condition-case ;; handling here has caused a lot of problems. However it is needed to deal with attemtped communication to servers that have gone away + (http-transport:client-api-send-receive 0 conninfo cmd params) + ((commfail)(vector #f "communications fail")) + ((exn)(vector #f "other fail" (print-call-chain))))) + ((rpc) (condition-case ;; handling here has caused a lot of problems. However it is needed to deal with attemtped communication to servers that have gone away + (rpc-transport:client-api-send-receive 0 conninfo cmd params) + ((commfail)(vector #f "communications fail")) + ((exn)(vector #f "other fail" (print-call-chain))))) + (else + (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport *runremote*) " not supported (1)") + (exit)))) + (success (if (vector? dat) (vector-ref dat 0) #f)) + (res (if (vector? dat) (vector-ref dat 1) #f))) + (if (vector? conninfo)(http-transport:server-dat-update-last-access conninfo)) ;; refresh access time + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat) + (if success + (case (remote-transport *runremote*) + ((http rpc) res) + (else + (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport *runremote*) " is unknown") + (exit 1))) + (begin + (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) + (remote-conndat-set! *runremote* #f) + (remote-server-url-set! *runremote* #f) + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1") + (tasks:start-and-wait-for-server (tasks:open-db) 0 15) + (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))))))))) (define (rmt:update-db-stats run-id rawcmd params duration) (mutex-lock! *db-stats-mutex*) (handle-exceptions exn @@ -227,18 +239,23 @@ (loop (car tal)(cdr tal) newmax-cmd currmax))))))) (mutex-unlock! *db-stats-mutex*) res)) (define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) - (let* ((dbstruct-local (db:open-local-db-handle)) - (db-file-path (db:dbfile-path 0)) - ;; (read-only (not (file-read-access? db-file-path))) + (let* ((qry-is-write (not (member cmd api:read-only-queries))) + (db-file-path (db:dbfile-path)) ;; 0)) + (dbstruct-local (db:setup)) ;; make-dbr:dbstruct path: dbdir local: #t))) + (read-only (not (file-write-access? db-file-path))) (start (current-milliseconds)) - (resdat (api:execute-requests dbstruct-local (vector (symbol->string cmd) params))) + (resdat (if (not (and read-only qry-is-write)) + (api:execute-requests dbstruct-local (vector (symbol->string cmd) params)) + (vector #t '()))) (success (vector-ref resdat 0)) (res (vector-ref resdat 1)) (duration (- (current-milliseconds) start))) + (if (and read-only qry-is-write) + (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd)) (if (not success) (if (> remretries 0) (begin (debug:print-error 0 *default-log-port* "local query failed. Trying again.") (thread-sleep! (/ (random 5000) 1000)) ;; some random delay @@ -246,46 +263,46 @@ (begin (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up") #f)) (begin ;; (rmt:update-db-stats run-id cmd params duration) - ;; mark this run as dirty if this was a write - (if (not (member cmd api:read-only-queries)) + ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it + (if qry-is-write (let ((start-time (current-seconds))) (mutex-lock! *db-multi-sync-mutex*) - ;; (if (not (hash-table-ref/default *db-local-sync* run-id #f)) - ;; just set it every time. Is a write more expensive than a read and does it matter? - (hash-table-set! *db-local-sync* (or run-id 0) start-time) ;; the oldest "write" - (mutex-unlock! *db-multi-sync-mutex*))) - res)))) + (set! *db-last-write* start-time) ;; the oldest "write" + (mutex-unlock! *db-multi-sync-mutex*))))) + res)) (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((run-id (if run-id run-id 0)) - ;; (jparams (db:obj->string params)) ;; (rmt:dat->json-str params)) + (transport (or (remote-transport *runremote*) (server:get-transport))) (res (handle-exceptions exn #f - (http-transport:client-api-send-receive run-id connection-info cmd params)))) -;; ((commfail) (vector #f "communications fail"))))) + (case transport + ((http) (http-transport:client-api-send-receive run-id connection-info cmd params)) + ((rpc) (rpc-transport:client-api-send-receive run-id connection-info cmd params)) + (else + (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport *runremote*) " not supported (2)") + (exit)) + + )))) (if (and res (vector-ref res 0)) (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!! #f))) -;; (db:string->obj (vector-ref dat 1)) -;; (begin -;; (debug:print-error 0 *default-log-port* "rmt:send-receive-no-auto-client-setup failed, attempting to continue. Got " dat) -;; dat)))) - -;; Wrap json library for strings (why the ports crap in the first place?) -(define (rmt:dat->json-str dat) - (with-output-to-string - (lambda () - (json-write dat)))) - -(define (rmt:json-str->dat json-str) - (with-input-from-string json-str - (lambda () - (json-read)))) + +;; ;; Wrap json library for strings (why the ports crap in the first place?) +;; (define (rmt:dat->json-str dat) +;; (with-output-to-string +;; (lambda () +;; (json-write dat)))) +;; +;; (define (rmt:json-str->dat json-str) +;; (with-input-from-string json-str +;; (lambda () +;; (json-read)))) ;;====================================================================== ;; ;; A C T U A L A P I C A L L S ;; @@ -304,18 +321,23 @@ ;;====================================================================== ;; M I S C ;;====================================================================== (define (rmt:login run-id) - (rmt:send-receive 'login run-id (list *toppath* megatest-version run-id *my-client-signature*))) + (rmt:send-receive 'login run-id (list *toppath* megatest-version *my-client-signature*))) ;; This login does no retries under the hood - it acts a bit like a ping. ;; Deprecated for nmsg-transport. ;; -(define (rmt:login-no-auto-client-setup connection-info run-id) - (case *transport-type* - ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login run-id (list *toppath* megatest-version run-id *my-client-signature*))) +(define (rmt:login-no-auto-client-setup connection-info) + (case *transport-type* ;; run-id of 0 is just a placeholder + ((http rpc)(rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version *my-client-signature*))) + (else + (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport *runremote*) " not supported (3)") + (exit)) + + ;;((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info 'login (list *toppath* megatest-version run-id *my-client-signature*))) )) ;; hand off a call to one of the db:queries statements ;; added run-id to make looking up the correct db possible @@ -525,12 +547,12 @@ (define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup) (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup))) ;; state and status are extra hints not usually used in the calculation ;; -(define (rmt:roll-up-pass-fail-counts run-id test-name item-path state status) - (rmt:send-receive 'roll-up-pass-fail-counts run-id (list run-id test-name item-path state status))) +(define (rmt:roll-up-pass-fail-counts run-id test-name item-path state status comment) + (rmt:send-receive 'roll-up-pass-fail-counts run-id (list run-id test-name item-path state status comment))) (define (rmt:update-pass-fail-counts run-id test-name) (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name)) (define (rmt:top-test-set-per-pf-counts run-id test-name) @@ -589,12 +611,12 @@ (define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit fields last-runs-update) ;; fields of #f uses default (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit fields last-runs-update))) (define (rmt:find-and-mark-incomplete run-id ovr-deadtime) - (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime)) - (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime)))) + ;; (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime)) + (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))) ;; ) (define (rmt:get-main-run-stats run-id) (rmt:send-receive 'get-main-run-stats #f (list run-id))) (define (rmt:get-var varname) Index: rpc-transport.scm ================================================================== --- rpc-transport.scm +++ rpc-transport.scm @@ -1,7 +1,7 @@ -;; Copyright 2006-2012, Matthew Welland. +;; Copyright 2006-2016, Matthew Welland. ;; ;; This program is made available under the GNU GPL version 2.0 or ;; greater. See the accompanying file COPYING for details. ;; ;; This program is distributed WITHOUT ANY WARRANTY; without even the @@ -21,208 +21,621 @@ (declare (uses tests)) (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. (include "common_records.scm") (include "db_records.scm") + +(define *heartbeat-mutex* (make-mutex)) +(define *server-loop-heart-beat* (current-seconds)) + ;; procstr is the name of the procedure to be called as a string -(define (rpc-transport:autoremote procstr params) - (handle-exceptions - exn - (begin - (debug:print 1 *default-log-port* "Remote failed for " proc " " params) - (apply (eval (string->symbol procstr)) params)) - ;; (if *runremote* - ;; (apply (eval (string->symbol (conc "remote:" procstr))) params) - (apply (eval (string->symbol procstr)) params))) +(define (rpc-transport:autoremote procstr params) ;; may be unused, I think api-exec deprecates this one. + (let* ((procsym (if (symbol? procstr) + procstr + (string->symbol (->string procstr)))) + (res + (begin + (apply (eval procsym) params)))) + res)) + + +;; rpc receiver +(define (rpc-transport:api-exec cmd params) + (let* ( (resdat (api:execute-requests *dbstruct-db* (vector cmd params))) ;; #( flag result ) + (flag (vector-ref resdat 0)) + (res (vector-ref resdat 1))) + + (mutex-lock! *heartbeat-mutex*) + + (set! *last-db-access* (current-seconds)) ;; bump *last-db-access*; this will renew keep-running thread's lease on life for another (server:get-timeout) seconds + ;;(BB> "in api-exec; last-db-access updated to "*last-db-access*) + (mutex-unlock! *heartbeat-mutex*) + + res)) + + +;; retry an operation (depends on srfi-18) +;; ================== +;; idea here is to avoid spending time on coding retrying something. Trying to be generic here. +;; +;; Exception handling: +;; ------------------- +;; if evaluating the thunk results in exception, it will be retried. +;; on last try, if final-failure-returns-actual is true, the exception will be re-thrown to caller. +;; +;; look at options below #!key to see how to configure behavior +;; +;; +(define (retry-thunk + the-thunk + #!key ;;;; options below + (accept-result? (lambda (x) x)) ;; retry if predicate applied to thunk's result is false + (retries 4) ;; how many tries + (failure-value #f) ;; return this on final failure, unless following option is enabled: + (final-failure-returns-actual #f) ;; on failure, on the last try, just return the result, not failure-value + + (retry-delay 0.1) ;; delay between tries + (back-off-factor 1) ;; multiply retry-delay by this factor on retry + (random-delay 0.1) ;; add a random portion of this value to wait + + (chatty #f) ;; print status as we go, for debugging. + ) + + (when chatty (print) (print "Entered retry-thunk") (print "-=-=-=-=-=-")) + (let* ((guarded-thunk ;; we are guarding the thunk against exceptions. We will record whether result of evaluation is an exception or a regular result. + (lambda () + (let* ((EXCEPTION (gensym)) ;; using gensym to avoid potential collision + (res + (condition-case + (the-thunk) ;; this is what we are guarding the execution of + [x () (cons EXCEPTION x)] + ))) + (cond + ((and (pair? res) (eq? (car res) EXCEPTION)) + (if chatty + (print " - the-thunk threw exception >"(cdr res)"<")) + (cons 'exception (cdr res))) + (else + (if chatty + (print " - the-thunk returned result >"res"<")) + (cons 'regular-result res))))))) + + (let loop ((guarded-res (guarded-thunk)) + (retries-left retries) + (fail-wait retry-delay)) + (if chatty (print " ==========")) + (let* ((wait-time (+ fail-wait (+ (* fail-wait back-off-factor) + (* random-delay + (/ (random 1024) 1024) )))) + (res-type (car guarded-res)) + (res-value (cdr guarded-res))) + (cond + ((and (eq? res-type 'regular-result) (accept-result? res-value)) + (if chatty (print " + return result that satisfied accept-result? >"res-value"<")) + res-value) + + ((> retries-left 0) + (if chatty (print " - sleep "wait-time)) + (thread-sleep! wait-time) + (if chatty (print " + retry ["retries-left" tries left]")) + (loop (guarded-thunk) + (sub1 retries-left) + wait-time)) + + ((eq? res-type 'regular-result) + (if final-failure-returns-actual + (begin + (if chatty (print " + last try failed- return the result >"res-value"<")) + res-value) + (begin + (if chatty (print " + last try failed- return canned failure value >"failure-value"<")) + failure-value))) + + (else ;; no retries left; result was not accepted and res-type can only be 'exception + (if final-failure-returns-actual + (begin + (if chatty (print " + last try failed with exception- re-throw it >"res-value"<")) + (abort res-value)); re-raise the exception. TODO: find a way for call-history to show as though from entry to this function + (begin + (if chatty (print " + last try failed with exception- return canned failure value >"failure-value"<")) + failure-value)))))))) + + +(define (rpc-transport:server-shutdown server-id rpc:listener ) ;;#!key (from-on-exit #f)) + ;;(on-exit (lambda () #t)) ;; turn off on-exit stuff + ;;(tcp-close rpc:listener) ;; gotta exit nicely + ;;(tasks:server-set-state! (db:delay-if-busy (tasks:open-db)) server-id "stopped") + + + ;; TODO: (low) the following is extraordinaritly slow. Maybe we don't even need portlogger for rpc anyway?? the exception-based failover when ports are taken is fast! + ;;(portlogger:open-run-close portlogger:set-port (rpc:default-server-port) "released") + + (set! *time-to-exit* #t) + ;;(if *dbstruct-db* (db:sync-touched *dbstruct-db* *run-id* force-sync: #t)) + + (server:remove-dotserver-file *toppath* "anyhost:anyport" force: #t) + (tasks:server-delete-record (db:delay-if-busy (tasks:open-db)) server-id " rpc-transport:keep-running complete") + + (rpc:close-all-connections!) + ;;(BB> "Before (exit) (from-on-exit="from-on-exit")") + ;;(unless from-on-exit (exit)) ;; sometimes we hang (around) here with 100% cpu. + ;;(BB> "After") + ;; strace reveals endless: + ;; getrusage(RUSAGE_SELF, {ru_utime={413, 917868}, ru_stime={0, 60003}, ...}) = 0 + ;; getrusage(RUSAGE_SELF, {ru_utime={414, 9874}, ru_stime={0, 60003}, ...}) = 0 + ;; getrusage(RUSAGE_SELF, {ru_utime={414, 13874}, ru_stime={0, 60003}, ...}) = 0 + ;; getrusage(RUSAGE_SELF, {ru_utime={414, 105880}, ru_stime={0, 60003}, ...}) = 0 + ;; getrusage(RUSAGE_SELF, {ru_utime={414, 109880}, ru_stime={0, 60003}, ...}) = 0 + ;; getrusage(RUSAGE_SELF, {ru_utime={414, 201886}, ru_stime={0, 60003}, ...}) = 0 + ;; getrusage(RUSAGE_SELF, {ru_utime={414, 205886}, ru_stime={0, 60003}, ...}) = 0 + ;; getrusage(RUSAGE_SELF, {ru_utime={414, 297892}, ru_stime={0, 60003}, ...}) = 0 + ;; getrusage(RUSAGE_SELF, {ru_utime={414, 301892}, ru_stime={0, 60003}, ...}) = 0 + ;; getrusage(RUSAGE_SELF, {ru_utime={414, 393898}, ru_stime={0, 60003}, ...}) = 0 + ;; getrusage(RUSAGE_SELF, {ru_utime={414, 397898}, ru_stime={0, 60003}, ...}) = 0 + ;; make a post to chicken-users w/ http://paste.call-cc.org/paste?id=60a4b66a29ccf7d11359ea866db642c970735978 + + + ;; (if from-on-exit + ;; ;; avoid above condition! End current process externally since 1 in 20 (exit)'s result in hung, 100% cpu zombies. (see above) + + (system (conc "kill -9 "(current-process-id))) + ) + ;; all routes though here end in exit ... ;; ;; start_server? ;; (define (rpc-transport:launch run-id) - (let* ((tdbdat (tasks:open-db))) - (BB> "rpc-transport:launch fired for run-id="run-id) - (set! *run-id* run-id) - (if (args:get-arg "-daemonize") - (daemon:ize)) - (if (server:check-if-running run-id) - (begin - (debug:print 0 *default-log-port* "INFO: Server for run-id " run-id " already running") - (exit 0))) - (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id)) - (remtries 4)) - (if (not server-id) - (if (> remtries 0) - (begin - (thread-sleep! 2) - (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id) - (- remtries 1))) - (begin - ;; since we didn't get the server lock we are going to clean up and bail out - (debug:print-info 2 *default-log-port* "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") - (tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat) " rpc-transport:launch"))) - (begin - (rpc-transport:run (if (args:get-arg "-server")(args:get-arg "-server") "-") run-id server-id) - (exit)))))) + (set! *run-id* run-id) + + ;; ;; send to background if requested + ;; (when (args:get-arg "-daemonize") + ;; (daemon:ize) + ;; (when *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it + ;; (current-error-port *alt-log-file*) + ;; (current-output-port *alt-log-file*))) + + ;; double check we dont alrady have a running server for this run-id + (when (and (server:read-dotserver *toppath*) + (server:check-if-running run-id)) + (debug:print 0 *default-log-port* "INFO: Server for run-id " run-id " already running") + (exit 0)) + + ;; did not find server running, let's clean up the table of dead servers + (tasks:server-force-clean-running-records-for-run-id (db:delay-if-busy (tasks:open-db)) run-id "notresponding") + + (server:dotserver-starting) + + + + + + ;; let's get a server-id for this server + ;; if at first we do not suceed, try 3 more times. + (let ((server-id (retry-thunk + (lambda () (tasks:server-lock-slot (db:delay-if-busy (tasks:open-db)) run-id 'rpc)) + chatty: #f + final-failure-returns-actual: #t + retries: 4))) + (when (not server-id) ;; dang we couldn't get a server-id. + ;; since we didn't get the server lock we are going to clean up and bail out + + + (debug:print-info 2 *default-log-port* "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") + (tasks:server-delete-records-for-this-pid (db:delay-if-busy (tasks:open-db)) " rpc-transport:launch") + (server:dotserver-starting-remove) + (exit 1)) + + ;; we got a server-id (and a corresponding entry in servers table in globally shared mdb) + ;; all systems go. Proceed to setup rpc server. + (rpc-transport:run + (if (args:get-arg "-server") + (args:get-arg "-server") + "-") + run-id + server-id) + (exit))) + +(define *rpc-listener-port* #f) +(define *rpc-listener-port-bind-timestamp* #f) + +(define *on-exit-flag #f) + +(define (rpc-transport:server-dat-get-iface vec) (vector-ref vec 0)) +(define (rpc-transport:server-dat-get-port vec) (vector-ref vec 1)) +(define (rpc-transport:server-dat-get-last-access vec) (vector-ref vec 5)) +(define (rpc-transport:server-dat-get-transport vec) (vector-ref vec 6)) +(define (rpc-transport:server-dat-update-last-access vec) + (if (vector? vec) + (vector-set! vec 5 (current-seconds)) + (begin + (print-call-chain (current-error-port)) + (debug:print-error 0 *default-log-port* "call to rpc-transport:server-dat-update-last-access with non-vector!!")))) + + +(define *api-exec-ht* (make-hash-table)) +(define *api-exec-mutex* (make-mutex)) +;; let's see if caching the rpc stub curbs thread-profusion on server side +(define (rpc-transport:get-api-exec iface port) + (mutex-lock! *api-exec-mutex*) + (let* ((lu (hash-table-ref/default *api-exec-ht* (cons iface port) #f))) + (if lu + (begin + (mutex-unlock! *api-exec-mutex*) + lu) + (let ((res (rpc:procedure 'api-exec iface port))) + (hash-table-set! *api-exec-ht* (cons iface port) res) + (mutex-unlock! *api-exec-mutex*) + res)))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; this client-side procedure makes rpc call to server and returns result +;; +(define (rpc-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)) + ;;(BB> "entered rpc-transport:client-api-send-receive with run-id="run-id " serverdat="serverdat" cmd="cmd" params="params" numretries="numretries) + (if (not (vector? serverdat)) + (begin + (BB> "WHAT?? for run-id="run-id", serverdat="serverdat) + (print-call-chain) + (rpc:close-all-connections!) + (exit 1))) + (let* ((iface (rpc-transport:server-dat-get-iface serverdat)) + (port (rpc-transport:server-dat-get-port serverdat)) + (res #f) + (api-exec (rpc-transport:get-api-exec iface port)) ;; chached by host/port. may need to clear... + (send-receive (lambda () + (tcp-buffer-size 0) + (set! res (retry-thunk + (lambda () + (condition-case + ;;(vector #t (run-remote cmd params)) + (vector 'success (api-exec cmd params)) + [x (exn i/o net) (vector 'comms-fail (conc "communications fail ["(->string x)"]") x)] + [x () (vector 'other-fail "other fail ["(->string x)"]" x)])) + chatty: #f + accept-result?: (lambda(x) + (and (vector? x) (vector-ref x 0))) + retries: 8 + back-off-factor: 1.5 + random-wait: 0.2 + retry-delay: 0.1 + final-failure-returns-actual: #t)) + ;;(BB> "HEY res="res) + res + )) + (th1 (make-thread send-receive "send-receive")) + (time-out-reached #f) + (time-out (lambda () + (thread-sleep! 45) + (set! time-out-reached #t) + (thread-terminate! th1) + + #f)) + + (th2 (make-thread time-out "time out"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + (thread-terminate! th2) + ;;(BB> "alt got res="res) + (debug:print-info 11 *default-log-port* "got res=" res) + (if (vector? res) + (case (vector-ref res 0) + ((success) (vector #t (vector-ref res 1))) + ( + (comms-fail other-fail) + ;;(comms-fail) + (debug:print 0 *default-log-port* "WARNING: comms failure for rpc request >>"res"<<") + ;;(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (vector #f (vector-ref res 1))) + (else + (debug:print-error 0 *default-log-port* "error occured at server, info=" (vector-ref res 1)) + (debug:print 0 *default-log-port* " client call chain:") + (print-call-chain (current-error-port)) + (debug:print 0 *default-log-port* " server call chain:") + (pp (vector-ref res 1) (current-error-port)) + (signal (vector-ref res 2)))) + (signal (make-composite-condition + (make-property-condition + 'timeout + 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))) + + (define (rpc-transport:run hostn run-id server-id) (debug:print 2 *default-log-port* "Attempting to start the rpc server ...") ;; (trace rpc:publish-procedure!) - (rpc:publish-procedure! 'server:login server:login) - (rpc:publish-procedure! 'testing (lambda () "Just testing")) + ;;====================================================================== + ;; start of publish-procedure section + ;;====================================================================== + (rpc:publish-procedure! 'server:login server:login) ;; this allows client to validate it is the same megatest instance as the server. No security here, just making sure we're in the right room. + (rpc:publish-procedure! + 'testing + (lambda () + "Just testing")) + + ;; procedure to receive arbitrary API request from client's rpc:send-receive/rpc-transport:client-api-send-receive + (rpc:publish-procedure! 'rpc-transport:autoremote rpc-transport:autoremote) + ;; can use this to run most anything at the remote + (rpc:publish-procedure! 'api-exec rpc-transport:api-exec) + + + ;;====================================================================== + ;; end of publish-procedure section + ;;====================================================================== + (let* ((db #f) - (hostname (get-host-name)) - (ipaddrstr (let ((ipstr (if (string=? "-" hostn) + (hostname (let ((res (get-host-name))) res)) + (server-start-time (current-seconds)) + (server-timeout (server:get-timeout)) + (ipaddrstr (let* ((ipstr (if (string=? "-" hostn) ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") (server:get-best-guess-address hostname) - #f))) - (if ipstr ipstr hostn))) ;; hostname))) - (start-port (open-run-close tasks:server-get-next-port tasks:open-db)) + #f)) + (res (if ipstr ipstr hostn))) + res)) ;; hostname))) + (start-port (let ((res (portlogger:open-run-close portlogger:find-port))) ;; BB> TODO: remove portlogger! + res)) (link-tree-path (configf:lookup *configdat* "setup" "linktree")) - (rpc:listener (rpc-transport:find-free-port-and-open (rpc:default-server-port))) + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; rpc:listener is the tcp-listen result from inside the find-free-port-and-open complex. + ;; It is our handle on the listening tcp port + ;; We will attach this to our rpc server with rpc:make-server in thread th1 . + (rpc:listener (rpc-transport:find-free-port-and-open start-port)) (th1 (make-thread (lambda () - ((rpc:make-server rpc:listener) #t)) + ;;(BB> "BEFORE rpc:make-server") + ((rpc:make-server rpc:listener) #t) + ;;(BB> "BEFORE rpc:make-server") + ) "rpc:server")) - ;; (cute (rpc:make-server rpc:listener) "rpc:server") - ;; 'rpc:server)) - (hostname (if (string=? "-" hostn) + + + (hostname (if (string=? "-" hostn) (get-host-name) hostn)) (ipaddrstr (if (string=? "-" hostn) (server:get-best-guess-address hostname) ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") - #f)) - (portnum (rpc:default-server-port)) - (host:port (conc (if ipaddrstr ipaddrstr hostname) ":" portnum)) - (tdb (tasks:open-db))) + (string-intersperse + (map number->string + (u8vector->list + (hostname->ip hostn))) ".") + )) + (portnum (let ((res (rpc:default-server-port))) res)) + (host:port (conc (if ipaddrstr ipaddrstr hostname) ":" portnum))) + + (when (not (equal? ipaddrstr (server:get-best-guess-address (get-host-name)))) + + (debug:print 0 *default-log-port* "Error: This host "(ip->string (hostname->ip (get-host-name)))" ("(get-host-name)") is not the homehost "ipaddrstr" ("(ip->hostname (string->ip ipaddrstr))"; Cannot proceed.") + (server:dotserver-starting-remove) + (tcp-close rpc:listener) ;; gotta exit nicely and free up that tcp port + (exit)) + + (tasks:server-set-interface-port (db:delay-if-busy (tasks:open-db)) server-id ipaddrstr portnum) + + ;;============================================================ + ;; activate thread th1 to attach opened tcp port to rpc server + ;;============================================================= (thread-start! th1) - (set! db *inmemdb*) - (open-run-close tasks:server-set-interface-port - tasks:open-db - server-id - ipaddrstr portnum) + (set! db *dbstruct-db*) + (debug:print 0 *default-log-port* "Server started on " host:port) - - ;; (trace rpc:publish-procedure!) - ;; (rpc:publish-procedure! 'server:login server:login) - ;; (rpc:publish-procedure! 'testing (lambda () "Just testing")) - - ;;====================================================================== - ;; ;; end of publish-procedure section - ;;====================================================================== - ;; - (on-exit (lambda () - (open-run-close tasks:server-set-state! tasks:open-db server-id "stopped"))) - - (set! *rpc:listener* rpc:listener) - (tasks:server-set-state! tdb server-id "running") - (set! *inmemdb* (db:setup run-id)) - ;; if none running or if > 20 seconds since - ;; server last used then start shutdown - (let loop ((count 0)) - (thread-sleep! 5) ;; no need to do this very often - (let ((numrunning -1)) ;; (db:get-count-tests-running db))) - (if (or (> numrunning 0) - (> (+ *last-db-access* 60)(current-seconds))) - (begin - (debug:print-info 0 *default-log-port* "Server continuing, tests running: " numrunning ", seconds since last db access: " (- (current-seconds) *last-db-access*)) - (loop (+ 1 count))) - (begin - (debug:print-info 0 *default-log-port* "Starting to shutdown the server side") - (open-run-close tasks:server-delete-record tasks:open-db server-id " rpc-transport:try-start-server stop") - (thread-sleep! 10) - (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) - (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting") - )))))) - -(define (rpc-transport:find-free-port-and-open port) + ;;(BB> "before SELF-TEST") + (if (retry-thunk (lambda () + (rpc-transport:self-test run-id ipaddrstr portnum)) + final-failure-returns-actual: #t ;; TODO: remove this line + ) + (debug:print 0 *default-log-port* "INFO: rpc self test passed!") + (begin + (debug:print 0 *default-log-port* "Error: rpc listener did not pass self test. Shutting down. On: " host:port) + (tasks:server-set-state! (db:delay-if-busy (tasks:open-db)) server-id "dead") + (tcp-close rpc:listener) ;; gotta exit nicely and free up that tcp port + (rpc-transport:server-shutdown server-id rpc:listener) + (server:dotserver-starting-remove) + (exit))) + + + + + ;;(on-exit (lambda () + ;; (rpc-transport:server-shutdown server-id rpc:listener from-on-exit: #t))) + + ;; check again for running servers for this run-id in case one has snuck in since we checked last in rpc-transport:launch + (if (not (equal? server-id (tasks:server-am-i-the-server? (db:delay-if-busy (tasks:open-db)) run-id)));; try to ensure no double registering of servers + (begin ;; i am not the server, another server snuck in and beat this one to the punch + (tcp-close rpc:listener) ;; gotta exit nicely and free up that tcp port + (tasks:server-set-state! (db:delay-if-busy (tasks:open-db)) server-id "collision") + (server:dotserver-starting-remove)) + + (begin ;; i am the server + ;; setup the in-memory db + (set! *dbstruct-db* (db:setup run-id)) + (db:get-db *dbstruct-db* run-id) + + ;; at this point, satisfied server has started + ;; let's make it official + (server:write-dotserver *toppath* (conc ipaddrstr ":" portnum)) + (mutex-lock! *heartbeat-mutex*) + (set! *last-db-access* (current-seconds)) + (mutex-unlock! *heartbeat-mutex*) + (set! *rpc:listener* rpc:listener) + (tasks:server-set-state! (db:delay-if-busy (tasks:open-db)) server-id "running") ;; update our mdb servers entry + + + + ;; this let loop will hold open this thread until we want the server to shut down. + ;; if no requests received within the last 20 seconds : + ;; database hasnt changed in ?? + ;; + + + + ;; keep-running loop: polls last-db-access to see if we have timed out. keep running if not. + (let loop ((count 0) + (bad-sync-count 0)) + (BB> "keep running: count = "count) + ;; Use this opportunity to sync the inmemdb to db + + (let ((start-time (current-milliseconds)) + (sync-time #f) + (rem-time #f)) + + ;; following is now done in common:watchdog, commenting out. sync-time will now be 0; can live with that. + ;; ;; inmemddb is a dbstruct + ;; (condition-case + ;; (db:sync-touched *dbstruct-db* *run-id* force-sync: #t) + ;; ((sync-failed)(cond + ;; ((> bad-sync-count 10) ;; time to give up + ;; (rpc-transport:server-shutdown server-id rpc:listener)) + ;; (else ;; (> bad-sync-count 0) ;; we've had a fail or two, delay and loop + ;; (thread-sleep! 5) + ;; (loop count (+ bad-sync-count 1))))) + ;; ((exn) + ;; (debug:print-error 0 *default-log-port* "error from sync code other than 'sync-failed. Attempting to gracefully shutdown the server ") + ;; (rpc-transport:server-shutdown server-id rpc:listener))) + + (set! sync-time (- (current-milliseconds) start-time)) + (set! rem-time (quotient (- 4000 sync-time) 1000)) + (debug:print 4 *default-log-port* "SYNC: time= " sync-time ", rem-time=" rem-time) + + (if (and (<= rem-time 4) + (> rem-time 0)) + (thread-sleep! rem-time) + (thread-sleep! 4))) ;; fallback for if the math is changed ... + + (if (< count 1) ;; 3x3 = 9 secs aprox + (loop (+ count 1) bad-sync-count)) + + ;; BB: don't see how this is possible with RPC + ;; ;; Check that iface and port have not changed (can happen if server port collides) + ;; (mutex-lock! *heartbeat-mutex*) + ;; (set! sdat *server-info*) + ;; (mutex-unlock! *heartbeat-mutex*) + + ;; (if (or (not (equal? sdat (list iface port))) + ;; (not server-id)) + ;; (begin + ;; (debug:print-info 0 *default-log-port* "interface changed, refreshing iface and port info") + ;; (set! iface (car sdat)) + ;; (set! port (cadr sdat)))) + + ;; Transfer *last-db-access* to last-access to use in checking that we are still alive + (mutex-lock! *heartbeat-mutex*) + (set! last-access *last-db-access*) + (mutex-unlock! *heartbeat-mutex*) + + ;; (debug:print 11 *default-log-port* "last-access=" last-access ", server-timeout=" server-timeout) + ;; + ;; no_traffic, no running tests, if server 0, no running servers + ;; + ;; (let ((wait-on-running (configf:lookup *configdat* "server" b"wait-on-running"))) ;; wait on running tasks (if not true then exit on time out) + ;; + (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600)) + (adjusted-timeout (if (> hrs-since-start 1) + (- server-timeout (inexact->exact (round (* hrs-since-start 60)))) ;; subtract 60 seconds per hour + server-timeout))) + (if (common:low-noise-print 120 "server timeout") + (debug:print-info 0 *default-log-port* "Adjusted server timeout: " adjusted-timeout)) + (if (and *server-run* + (> (+ last-access server-timeout) + (current-seconds))) + (begin + (if (common:low-noise-print 120 "server continuing") + (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))) + ;; + ;; Consider implementing some smarts here to re-insert the record or kill self is + ;; the db indicates so + ;; + (if (tasks:server-am-i-the-server? (db:delay-if-busy (tasks:open-db)) run-id) + (tasks:server-set-state! (db:delay-if-busy (tasks:open-db)) server-id "running")) + ;; + (loop 0 bad-sync-count)) + (begin + ;;(BB> "SERVER SHUTDOWN CALLED! last-access="last-access" current-seconds="(current-seconds)" server-timeout="server-timeout) + + (rpc-transport:server-shutdown server-id rpc:listener))))) + ;; end new loop + )))) + + +(define (rpc-transport:find-free-port-and-open port #!key ) (handle-exceptions exn - (begin + (begin (print "Failed to bind to port " (rpc:default-server-port) ", trying next port") - (rpc-transport:find-free-port-and-open (+ port 1))) + (rpc-transport:find-free-port-and-open (add1 port))) (rpc:default-server-port port) + (set! *rpc-listener-port* port) ;; a bit paranoid about rpc:default-server-port parameter not changing across threads (as params are wont to do). keeping this global in my back pocket in case this causes problems + (set! *rpc-listener-port-bind-timestamp* (current-milliseconds)) ;; may want to test how long it has been since the last bind attempt happened... (tcp-read-timeout 240000) - (tcp-listen (rpc:default-server-port) 10000))) - + (tcp-buffer-size 0) ;; gotta do this because http-transport undoes it. + (tcp-listen (rpc:default-server-port) 10000) + )) + (define (rpc-transport:ping run-id host port) (handle-exceptions exn (begin - (print "SERVER_NOT_FOUND") + (print "SERVER_NOT_FOUND exn="exn) (exit 1)) (let ((login-res ((rpc:procedure 'server:login host port) *toppath*))) - (if (and (list? login-res) - (car login-res)) + (if login-res (begin (print "LOGIN_OK") (exit 0)) (begin (print "LOGIN_FAILED") (exit 1)))))) -(define (rpc-transport:client-setup run-id #!key (remtries 10)) - (if *runremote* - (begin - (debug:print-error 0 *default-log-port* "Attempt to connect to server but already connected") - #f) - (let* ((host-info (hash-table-ref/default *runremote* run-id #f))) ;; (open-run-close db:get-var #f "SERVER")) - (if host-info - (let ((iface (car host-info)) - (port (cadr host-info)) - (ping-res ((rpc:procedure 'server:login host port) *toppath*))) - (if ping-res - (let ((server-dat (list iface port #f #f #f))) - (hash-table-set! *runremote* run-id server-dat) - server-dat) - (begin - (server:try-running run-id) - (thread-sleep! 2) - (rpc-transport:client-setup run-id (- remtries 1))))) - (let* ((server-db-info (open-run-close tasks:get-server tasks:open-db run-id))) - (debug:print-info 0 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) - (if server-db-info - (let* ((iface (tasks:hostinfo-get-interface server-db-info)) - (port (tasks:hostinfo-get-port server-db-info)) - (server-dat (list iface port #f #f #f)) - (ping-res ((rpc:procedure 'server:login host port) *toppath*))) - (if start-res - (begin - (hash-table-set! *runremote* run-id server-dat) - server-dat) - (begin - (server:try-running run-id) - (thread-sleep! 2) - (rpc-transport:client-setup run-id (- remtries 1))))) - (begin - (server:try-running run-id) - (thread-sleep! 2) - (rpc-transport:client-setup run-id (- remtries 1))))))))) -;; -;; (port (if (and hostinfo (> (length hostdat) 1))(cadr hostdat) #f))) -;; (if (and port -;; (string->number port)) -;; (let ((portn (string->number port))) -;; (debug:print-info 2 *default-log-port* "Setting up to connect to host " host ":" port) -;; (handle-exceptions -;; exn -;; (begin -;; (debug:print-error 0 *default-log-port* "Failed to open a connection to the server at host: " host " port: " port) -;; (debug:print 0 *default-log-port* " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn)) -;; ;; (open-run-close -;; ;; (lambda (db . param) -;; ;; (sqlite3:execute db "DELETE FROM metadat WHERE var='SERVER'")) -;; ;; #f) -;; (set! *runremote* #f)) -;; (if (and (not (args:get-arg "-server")) ;; no point in the server using the server using the server -;; ((rpc:procedure 'server:login host portn) *toppath*)) -;; (begin -;; (debug:print-info 2 *default-log-port* "Logged in and connected to " host ":" port) -;; (set! *runremote* (vector host portn))) -;; (begin -;; (debug:print-info 2 *default-log-port* "Failed to login or connect to " host ":" port) -;; (set! *runremote* #f))))) -;; (debug:print-info 2 *default-log-port* "no server available"))))) - +(define (rpc-transport:self-test run-id host port) + (if (not host) + (abort "host not set.")) + (if (not port) + (abort "port not set.")) + (tcp-buffer-size 0) ;; gotta do this because http-transport undoes it. + (let* ((testing-res ((rpc:procedure 'testing host port))) + (login-res ((rpc:procedure 'server:login host port) *toppath*)) + (res (and login-res (equal? testing-res "Just testing")))) + + (if login-res + (begin + ;;(BB> "Self test PASS. login-res="login-res" testing-res="testing-res" *toppath*="*toppath*) + #t) + (begin + (BB> "Self test fail. login-res="login-res" testing-res="testing-res" *toppath*="*toppath*) + + #f)) + res)) + +(define (rpc-transport:client-setup run-id server-dat #!key (remaining-tries 10)) + ;;(BB> "entered rpc-transport:client-setup with run-id="run-id" and server-dat="server-dat" and retries="remaining-tries) + (tcp-buffer-size 0) + (debug:print-info 0 *default-log-port* "rpc-transport:client-setup run-id="run-id" server-dat=" server-dat ", remaining-tries=" remaining-tries) + (let* ((iface (tasks:hostinfo-get-interface server-dat)) + (hostname (tasks:hostinfo-get-hostname server-dat)) + (port (tasks:hostinfo-get-port server-dat)) + (runremote-server-dat (vector iface port #f #f #f (current-seconds) 'rpc)) ;; http version := (vector iface port api-uri api-url api-req (current-seconds) 'http ) + (ping-res (retry-thunk (lambda () ;; make 3 attempts to ping. + ((rpc:procedure 'server:login iface port) *toppath*)) + chatty: #f + retries: 3))) + ;; we got here from rmt:get-connection-info on the condition that *runremote* has no entry for run-id... + (if ping-res + (begin + (debug:print-info 0 *default-log-port* "rpc-transport:client-setup CONNECTION ESTABLISHED run-id="run-id" server-dat=" server-dat) + runremote-server-dat) + (begin ;; login failed but have a server record, clean out the record and try again + (debug:print-info 0 *default-log-port* "rpc-transport:client-setup UNABLE TO CONNECT run-id="run-id" server-dat=" server-dat) + (tasks:kill-server-run-id run-id) + (tasks:server-force-clean-run-record (db:delay-if-busy (tasks:open-db)) run-id iface port + " rpc-transport:client-setup (server-dat = #t)") + (if (> remaining-tries 2) + (thread-sleep! (+ 1 (random 5))) ;; spread out the starts a little + (thread-sleep! (+ 15 (random 20)))) ;; it isn't going well. give it plenty of time + (server:try-running run-id) + (thread-sleep! 5) ;; give server a little time to start up + (client:setup run-id remaining-tries: (sub1 remaining-tries)))))) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -222,11 +222,11 @@ ;; override the number of reruns from the configs (if (and config-reruns (> run-count config-reruns)) (set! run-count config-reruns)) - (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) + ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) (let ((sighand (lambda (signum) ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting (set! *time-to-exit* #t) (print "Received signal " signum ", cleaning up before exit. Please wait...") @@ -928,11 +928,11 @@ (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")) ;; (debug:print 0 *default-log-port* " prereqs: " prereqs-not-met) (hash-table-set! test-registry hed 'removed) (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. - (rmt:roll-up-pass-fail-counts run-id test-name item-path #f "FAIL") ;; treat as FAIL + (rmt:roll-up-pass-fail-counts run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL (list (if (null? tal)(car newtal)(car tal)) tal reg reruns))))) ;; can't drop this - maybe running? Just keep trying @@ -1669,11 +1669,11 @@ (debug:print-info 4 *default-log-port* "runs:operate-on run=" run ", header=" header) (if (not (null? tests)) (begin (case action ((remove-runs) - (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) + ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) ;; seek and kill in flight -runtests with % as testpatt here ;; (if (equal? testpatt "%") (tasks:kill-runner target run-name testpatt) ;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt)) (debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname"))) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -34,11 +34,10 @@ (if (not hostport) #f (conc "http://" (car hostport) ":" (cadr hostport)))) (define *server-loop-heart-beat* (current-seconds)) -(define *heartbeat-mutex* (make-mutex)) ;;====================================================================== ;; S E R V E R ;;====================================================================== @@ -47,20 +46,24 @@ ;; all routes though here end in exit ... ;; ;; start_server ;; -(define (server:launch run-id transport-type) - (BB> "server:launch fired for run-id="run-id" transport-type="transport-type) - (case transport-type - ((http)(http-transport:launch run-id)) - ;;((nmsg)(nmsg-transport:launch run-id)) - ((rpc) (rpc-transport:launch run-id)) - (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type)))) -;; (else (debug:print-error 0 *default-log-port* "No known transport set, transport=" transport ", using rpc") -;; (rpc-transport:launch run-id))))) - +(define (server:launch run-id transport-type-raw) + (let ((transport-type + (cond + ((string? transport-type-raw) (string->symbol transport-type-raw)) + (else transport-type-raw)))) + + ;;(BB> "server:launch fired for run-id="run-id" transport-type="transport-type) + + (case transport-type + ((http)(http-transport:launch run-id)) + ;;((nmsg)(nmsg-transport:launch run-id)) + ((rpc) (rpc-transport:launch run-id)) + (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type))))) + ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== ;; Get the transport @@ -68,11 +71,11 @@ (if *transport-type* *transport-type* (let ((ttype (string->symbol (or (args:get-arg "-transport") (configf:lookup *configdat* "server" "transport") - "rpc")))) + *DEFAULT-TRANSPORT*)))) (set! *transport-type* ttype) ttype))) ;; Generate a unique signature for this server (define (server:mk-signature) @@ -104,43 +107,31 @@ ;; Given a run id start a server process ### NOTE ### > file 2>&1 ;; if the run-id is zero and the target-host is set ;; try running on that host ;; incidental: rotate logs in logs/ dir. ;; -(define (server:run run-id) +(define (server:run areapath) ;; areapath is ignored for now. (let* ((curr-host (get-host-name)) (curr-ip (server:get-best-guess-address curr-host)) - (target-host (configf:lookup *configdat* "server" "homehost" )) + (curr-pid (current-process-id)) + (homehost (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" )) + (target-host (car homehost)) (testsuite (common:get-testsuite-name)) - (logfile (conc *toppath* "/logs/" run-id ".log")) + (logfile (conc *toppath* "/logs/server.log")) (cmdln (conc (common:get-megatest-exe) - " -server " (or target-host "-") " -run-id " run-id (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") - (conc " -daemonize -log " logfile) - "") - " -m testsuite:" testsuite))) ;; (conc " >> " logfile " 2>&1 &"))))) - (debug:print 0 *default-log-port* "INFO: Starting server (" cmdln ") as none running ...") + " -server " (or target-host "-") " -run-id " 0 + (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") + (conc " -daemonize -log " logfile) + "") + " -transport " (server:get-transport) + " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &"))))) + (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread"))) + ;; we want the remote server to start in *toppath* so push there (push-directory *toppath*) - (if (not (directory-exists? "logs"))(create-directory "logs")) - - ;; Rotate logs, logic: - ;; if > 500k and older than 1 week: - ;; remove previous compressed log and compress this log - ;; - (directory-fold - (lambda (file rem) - (if (and (string-match "^.*.log" file) - (> (file-size (conc "logs/" file)) 200000)) - (let ((gzfile (conc "logs/" file ".gz"))) - (if (file-exists? gzfile) - (begin - (debug:print-info 0 *default-log-port* "removing " gzfile) - (delete-file gzfile))) - (debug:print-info 0 *default-log-port* "compressing " file) - (system (conc "gzip logs/" file))))) - '() - "logs") - + (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") + (thread-start! log-rotate) + ;; host.domain.tld match host? (if (and target-host ;; look at target host, is it host.domain.tld or ip address and does it ;; match current ip or hostname (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) @@ -152,11 +143,11 @@ (setenv "TARGETHOST_LOGF" logfile) (common:wait-for-normalized-load 4 " delaying server start due to load" remote-host: (get-environment-variable "TARGETHOST")) ;; do not try starting servers on an already overloaded machine, just wait forever (system (conc "nbfake " cmdln)) (unsetenv "TARGETHOST_LOGF") (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) - ;; (system cmdln) + (thread-join! log-rotate) (pop-directory))) (define (server:get-client-signature) ;; BB> why is this proc named "get-"? it returns nothing -- set! has not return value. (if *my-client-signature* *my-client-signature* (let ((sig (server:mk-signature))) @@ -163,91 +154,161 @@ (set! *my-client-signature* sig) *my-client-signature*))) ;; kind start up of servers, wait 40 seconds before allowing another server for a given ;; run-id to be launched -(define (server:kind-run run-id) - (let ((last-run-time (hash-table-ref/default *server-kind-run* run-id #f))) +(define (server:kind-run areapath) + (let ((last-run-time (hash-table-ref/default *server-kind-run* areapath #f))) (if (or (not last-run-time) (> (- (current-seconds) last-run-time) 30)) (begin - (server:run run-id) - (hash-table-set! *server-kind-run* run-id (current-seconds)))))) + (server:run areapath) + (hash-table-set! *server-kind-run* areapath (current-seconds)))))) ;; The generic run a server command. Dispatches the call to server 0 if run-id != 0 ;; -(define (server:try-running run-id) - (if (eq? run-id 0) - (server:run run-id) - (rmt:start-server run-id))) - -(define (server:check-if-running run-id) - (let ((tdbdat (tasks:open-db))) - (let loop ((server (tasks:get-server (db:delay-if-busy tdbdat) run-id)) - (trycount 0)) - (if server - ;; note: client:start will set *runremote*. this needs to be changed - ;; also, client:start will login to the server, also need to change that. - ;; - ;; client:start returns #t if login was successful. - ;; - (let ((res (case *transport-type* - ((http)(server:ping-server run-id - (tasks:hostinfo-get-interface server) - (tasks:hostinfo-get-port server))) - ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server) - ;; (tasks:hostinfo-get-port server) - ;; timeout: 2)) - ))) - ;; if the server didn't respond we must remove the record +;; (define (server:try-running run-id) +;; (if (eq? run-id 0) +;; (server:run run-id) +;; (rmt:start-server run-id))) +(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG. + +(define (server:start-attempted? areapath) + (let ((flagfile (conc areapath "/.starting-server"))) + (handle-exceptions + exn + #f ;; if things go wrong pretend we can't see the file + (and (file-exists? flagfile) + (< (- (current-seconds) + (file-modification-time flagfile)) + 15))))) ;; exists and less than 15 seconds old + +(define (server:read-dotserver areapath) + (let ((dotfile (conc areapath "/.server"))) + (handle-exceptions + exn + #f ;; if things go wrong pretend we can't see the file + (if (and (file-exists? dotfile) + (file-read-access? dotfile)) + (with-input-from-file + dotfile + (lambda () + (read-line))) + #f)))) + + +(define (server:dotserver-starting) + (with-output-to-file + (conc *toppath* "/.starting-server") + (lambda () + (print (current-process-id) " on " (get-host-name))))) + +(define (server:dotserver-starting-remove) + (delete-file* (conc *toppath* "/.starting-server"))) + + + +;; write a .server file in *toppath* with hostport +;; return #t on success, #f otherwise +;; +(define (server:write-dotserver areapath hostport) + (let ((lock-file (conc areapath "/.server.lock")) + (server-file (conc areapath "/.server"))) + (if (common:simple-file-lock lock-file) + (let ((res (handle-exceptions + exn + #f ;; failed for some reason, for the moment simply return #f + (with-output-to-file server-file + (lambda () + (print hostport))) + #t))) + (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " created") + (common:simple-file-release-lock lock-file) + res) + #f))) + +(define (server:remove-dotserver-file areapath hostport #!key (force #f)) + (let ((dotserver (server:read-dotserver areapath)) + (server-file (conc areapath "/.server")) + (lock-file (conc areapath "/.server.lock"))) + (if (or force (and dotserver (string-match (conc ".*:" hostport "$") dotserver))) ;; port matches, good enough info to decide to remove the file + (if (common:simple-file-lock lock-file) + (begin + (handle-exceptions + exn + #f + (delete-file* server-file)) + (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " removed") + (common:simple-file-release-lock lock-file)))))) + +;; no longer care if multiple servers are started by accident. older servers will drop off in time. +;; +(define (server:check-if-running areapath) + (let* ((dotserver (server:read-dotserver areapath))) ;; tdbdat (tasks:open-db))) + (if dotserver + (let* ((res (case *transport-type* + ((http rpc)(server:ping-server dotserver)) + ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server) + ))) (if res - #t - (begin - (debug:print-info 0 *default-log-port* "server at " server " not responding, removing record") - (tasks:server-force-clean-running-records-for-run-id (db:delay-if-busy tdbdat) run-id - " server:check-if-running") - res))) - #f)))) + dotserver + #f)) + #f))) ;; called in megatest.scm, host-port is string hostname:port ;; -(define (server:ping run-id host:port) - (let ((tdbdat (tasks:open-db))) - (let* ((host-port (let ((slst (string-split host:port ":"))) - (if (eq? (length slst) 2) - (list (car slst)(string->number (cadr slst))) - #f))) - (toppath (launch:setup)) - (server-db-dat (if (not host-port)(tasks:get-server (db:delay-if-busy tdbdat) run-id) #f))) - (if (not run-id) - (begin - (debug:print-error 0 *default-log-port* "must specify run-id when doing ping, -run-id n") - (print "ERROR: No run-id") - (exit 1)) - (if (and (not host-port) - (not server-db-dat)) - (begin - (print "ERROR: bad host:port") - (exit 1)) - (let* ((iface (if host-port (car host-port) (tasks:hostinfo-get-interface server-db-dat))) - (port (if host-port (cadr host-port)(tasks:hostinfo-get-port server-db-dat))) - (server-dat (http-transport:client-connect iface port)) - (login-res (rmt:login-no-auto-client-setup server-dat run-id))) - (if (and (list? login-res) - (car login-res)) - (begin - (print "LOGIN_OK") - (exit 0)) - (begin - (print "LOGIN_FAILED") - (exit 1))))))))) +;; NOTE: This is NOT called directly from clients as not all transports support a client running +;; in the same process as the server. +;; +(define (server:ping host-port-in #!key (do-exit #f)) + (let ((host:port (if (not host-port-in) ;; use read-dotserver to find + (server:read-dotserver *toppath*) + (if (number? host-port-in) ;; we were handed a server-id + (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in))) + ;; (print "srec: " srec " host-port-in: " host-port-in) + (if srec + (conc (vector-ref srec 3) ":" (vector-ref srec 4)) + (conc "no such server-id " host-port-in))) + host-port-in)))) + (let* ((host-port (if host:port + (let ((slst (string-split host:port ":"))) + (if (eq? (length slst) 2) + (list (car slst)(string->number (cadr slst))) + #f)) + #f)) + (toppath (launch:setup))) + ;; (print "host-port=" host-port) + (if (not host-port) + (begin + (if host-port-in + (debug:print 0 *default-log-port* "ERROR: bad host:port")) + (if do-exit (exit 1)) + #f) + (let* ((iface (car host-port)) + (port (cadr host-port)) + (server-dat + (case (remote-transport *runremote*) + ((http) (http-transport:client-connect iface port)) + ((rpc) (rpc-transport:client-connect iface port)) + (else + (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport *runremote*) " not supported (4)") + (exit)))) + (login-res (rmt:login-no-auto-client-setup server-dat))) + (if (and (list? login-res) + (car login-res)) + (begin + (print "LOGIN_OK") + (if do-exit (exit 0))) + (begin + (print "LOGIN_FAILED") + (if do-exit (exit 1))))))))) ;; run ping in separate process, safest way in some cases ;; -(define (server:ping-server run-id iface port) +(define (server:ping-server ifaceport) (with-input-from-pipe - (conc (common:get-megatest-exe) " -run-id " run-id " -ping " (conc iface ":" port)) + (conc (common:get-megatest-exe) " -ping " ifaceport) (lambda () (let loop ((inl (read-line)) (res "NOREPLY")) (if (eof-object? inl) (case (string->symbol res) @@ -256,18 +317,14 @@ (else #f)) (loop (read-line) inl)))))) (define (server:login toppath) (lambda (toppath) - (set! *last-db-access* (current-seconds)) + (set! *db-last-access* (current-seconds)) ;; might not be needed. (if (equal? *toppath* toppath) - (begin - ;; (debug:print-info 2 *default-log-port* "login successful") - #t) - (begin - ;; (debug:print-info 2 *default-log-port* "login failed") - #f)))) + #t + #f))) (define (server:get-timeout) (let ((tmo (configf:lookup *configdat* "server" "timeout"))) (if (and (string? tmo) (string->number tmo)) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -170,21 +170,21 @@ (define (tasks:hostinfo-get-pubport vec) (vector-ref vec 3)) (define (tasks:hostinfo-get-transport vec) (vector-ref vec 4)) (define (tasks:hostinfo-get-pid vec) (vector-ref vec 5)) (define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) -(define (tasks:server-lock-slot mdb run-id) +(define (tasks:server-lock-slot mdb run-id transport-type) (tasks:server-clean-out-old-records-for-run-id mdb run-id " tasks:server-lock-slot") (if (< (tasks:num-in-available-state mdb run-id) 4) (begin - (tasks:server-set-available mdb run-id) + (tasks:server-set-available mdb run-id transport-type) (thread-sleep! (/ (random 1500) 1000)) ;; (thread-sleep! 2) ;; Try removing this. It may not be needed. (tasks:server-am-i-the-server? mdb run-id)) #f)) ;; register that this server may come online (first to register goes though with the process) -(define (tasks:server-set-available mdb run-id) +(define (tasks:server-set-available mdb run-id transport-type) (sqlite3:execute mdb "INSERT INTO servers (pid,hostname,port,pubport,start_time, priority,state,mt_version,heartbeat, interface,transport,run_id) VALUES(?, ?, ?, ?, strftime('%s','now'), ?, ?, ?,-1,?, ?, ?);" (current-process-id) ;; pid @@ -194,11 +194,11 @@ (random 1000) ;; priority (used a tiebreaker on get-available) "available" ;; state (common:version-signature) ;; mt_version -1 ;; interface ;; (conc (server:get-transport)) ;; transport - (conc *transport-type*) ;; transport + (symbol->string transport-type) ;; transport run-id )) (define (tasks:num-in-available-state mdb run-id) (let ((res 0)) @@ -404,11 +404,15 @@ (if (common:low-noise-print 60 "tasks:start-and-wait-for-server" run-id) (debug:print 0 *default-log-port* "Try starting server for run-id " run-id)) (thread-sleep! (/ (random 2000) 1000)) (server:kind-run run-id) (thread-sleep! (min delay-time 1)) - (loop (tasks:get-server (db:delay-if-busy tdbdat) run-id)(+ delay-time 1)))))) + (if (not (or (server:start-attempted? *toppath*) + (server:read-dotserver *toppath*))) ;; no point in trying + (loop (tasks:get-server (db:delay-if-busy tdbdat) run-id)(+ delay-time 1)) + #f)) + #f))) (define (tasks:get-all-servers mdb) (let ((res '())) (sqlite3:for-each-row (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) @@ -416,10 +420,22 @@ (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res))) mdb "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id FROM servers WHERE state NOT LIKE 'defunct%' ORDER BY start_time DESC;") res)) + +(define (tasks:get-server-by-id mdb id) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) + ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 + (set! res (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id))) + mdb + "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id + FROM servers WHERE id=?;" + id) + res)) (define (tasks:get-server-records mdb run-id) (let ((res '())) (sqlite3:for-each-row (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -353,10 +353,11 @@ (pop-directory) result))))) (define (tests:test-force-state-status! run-id test-id state status) (rmt:test-set-status-state run-id test-id status state #f) + ;; (rmt:roll-up-pass-fail-counts run-id test-name item (mt:process-triggers run-id test-id state status)) ;; Do not rpc this one, do the underlying calls!!! (define (tests:test-set-status! run-id test-id state status comment dat #!key (work-area #f)) (let* ((real-status status) @@ -396,16 +397,17 @@ ;; update the primary record IF state AND status are defined (if (and state status) (begin (rmt:test-set-status-state run-id test-id real-status state (if waived waived comment)) - (mt:process-triggers run-id test-id state real-status))) + ;; (mt:process-triggers run-id test-id state real-status) ;; triggers are called in test-set-status-state + )) ;; if status is "AUTO" then call rollup (note, this one modifies data in test ;; run area, it does remote calls under the hood. - (if (and test-id state status (equal? status "AUTO")) - (rmt:test-data-rollup run-id test-id status)) + ;; (if (and test-id state status (equal? status "AUTO")) + ;; (rmt:test-data-rollup run-id test-id status)) ;; add metadata (need to do this way to avoid SQL injection issues) ;; :first_err ;; (let ((val (hash-table-ref/default otherdat ":first_err" #f))) @@ -441,11 +443,11 @@ (rmt:csv->test-data run-id test-id dat)))) ;; need to update the top test record if PASS or FAIL and this is a subtest (if (not (equal? item-path "")) - (rmt:roll-up-pass-fail-counts run-id test-name item-path state status)) + (rmt:roll-up-pass-fail-counts run-id test-name item-path state status #f)) (if (or (and (string? comment) (string-match (regexp "\\S+") comment)) waived) (let ((cmt (if waived waived comment))) @@ -479,13 +481,12 @@ (lockf (conc outputfilename ".lock"))) (let loop ((have-lock (common:simple-file-lock lockf))) (if have-lock (let ((script (configf:lookup *configdat* "testrollup" test-name))) (print "Obtained lock for " outputfilename) - ;; (rmt:top-test-set-per-pf-counts run-id test-name) - (rmt:roll-up-pass-fail-counts run-id test-name "" #f #f) - (rmt:top-test-set-per-pf-counts run-id test-name) + (rmt:roll-up-pass-fail-counts run-id test-name "" #f #f #f) + ;; (rmt:test-set-status-state run-id test-name #f #f #f) ;; (rmt:top-test-set-per-pf-counts run-id test-name) (if script (system (conc script " > " outputfilename " & ")) (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename)) (common:simple-file-release-lock lockf) (change-directory orig-dir) @@ -1017,78 +1018,80 @@ tcfg)))))) ;; sort tests by priority and waiton ;; Move test specific stuff to a test unit FIXME one of these days (define (tests:sort-by-priority-and-waiton test-records) - (let* ((mungepriority (lambda (priority) - (if priority - (let ((tmp (any->number priority))) - (if tmp tmp (begin (debug:print-error 0 *default-log-port* "bad priority value " priority ", using 0") 0))) - 0))) - (all-tests (hash-table-keys test-records)) - (all-waited-on (let loop ((hed (car all-tests)) - (tal (cdr all-tests)) - (res '())) - (let* ((trec (hash-table-ref test-records hed)) - (waitons (or (tests:testqueue-get-waitons trec) '()))) - (if (null? tal) - (append res waitons) - (loop (car tal)(cdr tal)(append res waitons)))))) - (sort-fn1 - (lambda (a b) - (let* ((a-record (hash-table-ref test-records a)) - (b-record (hash-table-ref test-records b)) - (a-waitons (or (tests:testqueue-get-waitons a-record) '())) - (b-waitons (or (tests:testqueue-get-waitons b-record) '())) - (a-config (tests:testqueue-get-testconfig a-record)) - (b-config (tests:testqueue-get-testconfig b-record)) - (a-raw-pri (config-lookup a-config "requirements" "priority")) - (b-raw-pri (config-lookup b-config "requirements" "priority")) - (a-priority (mungepriority a-raw-pri)) - (b-priority (mungepriority b-raw-pri))) - (tests:testqueue-set-priority! a-record a-priority) - (tests:testqueue-set-priority! b-record b-priority) - ;; (debug:print 0 *default-log-port* "a=" a ", b=" b ", a-waitons=" a-waitons ", b-waitons=" b-waitons) - (cond - ;; is - ((member a b-waitons) ;; is b waiting on a? - ;; (debug:print 0 *default-log-port* "case1") - #t) - ((member b a-waitons) ;; is a waiting on b? - ;; (debug:print 0 *default-log-port* "case2") - #f) - ((and (not (null? a-waitons)) ;; both have waitons - do not disturb - (not (null? b-waitons))) - ;; (debug:print 0 *default-log-port* "case2.1") - #t) - ((and (null? a-waitons) ;; no waitons for a but b has waitons - (not (null? b-waitons))) - ;; (debug:print 0 *default-log-port* "case3") - #f) - ((and (not (null? a-waitons)) ;; a has waitons but b does not - (null? b-waitons)) - ;; (debug:print 0 *default-log-port* "case4") - #t) - ((not (eq? a-priority b-priority)) ;; use - (> a-priority b-priority)) - (else - ;; (debug:print 0 *default-log-port* "case5") - (string>? a b)))))) - - (sort-fn2 - (lambda (a b) - (> (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records a))) - (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records b))))))) - ;; (let ((dot-res (tests:run-dot (tests:tests->dot test-records) "plain"))) - ;; (debug:print "dot-res=" dot-res)) - ;; (let ((data (map cdr (filter - ;; (lambda (x)(equal? "node" (car x))) - ;; (map string-split (tests:easy-dot test-records "plain")))))) - ;; (map car (sort data (lambda (a b) - ;; (> (string->number (caddr a))(string->number (caddr b))))))) - ;; )) - (sort all-tests sort-fn1))) ;; avoid dealing with deleted tests, look at the hash table + (if (eq? (hash-table-size test-records) 0) + '() + (let* ((mungepriority (lambda (priority) + (if priority + (let ((tmp (any->number priority))) + (if tmp tmp (begin (debug:print-error 0 *default-log-port* "bad priority value " priority ", using 0") 0))) + 0))) + (all-tests (hash-table-keys test-records)) + (all-waited-on (let loop ((hed (car all-tests)) + (tal (cdr all-tests)) + (res '())) + (let* ((trec (hash-table-ref test-records hed)) + (waitons (or (tests:testqueue-get-waitons trec) '()))) + (if (null? tal) + (append res waitons) + (loop (car tal)(cdr tal)(append res waitons)))))) + (sort-fn1 + (lambda (a b) + (let* ((a-record (hash-table-ref test-records a)) + (b-record (hash-table-ref test-records b)) + (a-waitons (or (tests:testqueue-get-waitons a-record) '())) + (b-waitons (or (tests:testqueue-get-waitons b-record) '())) + (a-config (tests:testqueue-get-testconfig a-record)) + (b-config (tests:testqueue-get-testconfig b-record)) + (a-raw-pri (config-lookup a-config "requirements" "priority")) + (b-raw-pri (config-lookup b-config "requirements" "priority")) + (a-priority (mungepriority a-raw-pri)) + (b-priority (mungepriority b-raw-pri))) + (tests:testqueue-set-priority! a-record a-priority) + (tests:testqueue-set-priority! b-record b-priority) + ;; (debug:print 0 *default-log-port* "a=" a ", b=" b ", a-waitons=" a-waitons ", b-waitons=" b-waitons) + (cond + ;; is + ((member a b-waitons) ;; is b waiting on a? + ;; (debug:print 0 *default-log-port* "case1") + #t) + ((member b a-waitons) ;; is a waiting on b? + ;; (debug:print 0 *default-log-port* "case2") + #f) + ((and (not (null? a-waitons)) ;; both have waitons - do not disturb + (not (null? b-waitons))) + ;; (debug:print 0 *default-log-port* "case2.1") + #t) + ((and (null? a-waitons) ;; no waitons for a but b has waitons + (not (null? b-waitons))) + ;; (debug:print 0 *default-log-port* "case3") + #f) + ((and (not (null? a-waitons)) ;; a has waitons but b does not + (null? b-waitons)) + ;; (debug:print 0 *default-log-port* "case4") + #t) + ((not (eq? a-priority b-priority)) ;; use + (> a-priority b-priority)) + (else + ;; (debug:print 0 *default-log-port* "case5") + (string>? a b)))))) + + (sort-fn2 + (lambda (a b) + (> (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records a))) + (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records b))))))) + ;; (let ((dot-res (tests:run-dot (tests:tests->dot test-records) "plain"))) + ;; (debug:print "dot-res=" dot-res)) + ;; (let ((data (map cdr (filter + ;; (lambda (x)(equal? "node" (car x))) + ;; (map string-split (tests:easy-dot test-records "plain")))))) + ;; (map car (sort data (lambda (a b) + ;; (> (string->number (caddr a))(string->number (caddr b))))))) + ;; )) + (sort all-tests sort-fn1)))) ;; avoid dealing with deleted tests, look at the hash table (define (tests:easy-dot test-records outtype) (let-values (((fd temp-path) (file-mkstemp (conc "/tmp/" (current-user-name) ".XXXXXX")))) (let ((all-testnames (hash-table-keys test-records)) (temp-port (open-output-file* fd))) @@ -1329,10 +1332,11 @@ "SELECT count(id) FROM test_rundat;") res)) 0) (define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname) + (rmt:general-call 'update-test-rundat run-id test-id (current-seconds) (or cpuload -1)(or diskfree -1) -1 (or minutes -1)) (if (and cpuload diskfree) (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id)) (if minutes (rmt:general-call 'update-run-duration run-id minutes test-id)) (if (and uname hostname) Index: tests/fullrun/megatest.config ================================================================== --- tests/fullrun/megatest.config +++ tests/fullrun/megatest.config @@ -44,10 +44,14 @@ # Set launchwait to no to use the more agressive code that does not wait for the launch to complete before proceeding # this may save a few milliseconds on launching tests # launchwait no waivercommentpatt ^WW\d+ [a-z].* incomplete-timeout 1 + +# wait 0.5 seconds between launching every process +# +launch-delay 0.5 # wait for runs to completely complete. yes, anything else is no run-wait yes # If set to "default" the old code is used. Otherwise defaults to 200 or uses @@ -153,11 +157,12 @@ # force use of server always # required yes # Use http instead of direct filesystem access -transport http +transport rpc +# transport http # transport fs # transport nmsg synchronous 0 @@ -245,11 +250,11 @@ ((sleeprunner) "sleeprunner") \ (else "nbfake"))} # launcher bsub -q priority -o $MT_TEST_RUN_DIR/openlava.log -# launcher #{shell if which bsub > /dev/null;then echo bsub -q priority -o openlava.log;else echo sleeprunner;fi} +# launcher #{ shell if which bsub > /dev/null;then echo bsub -q priority -o openlava.log;else echo sleeprunner;fi} # launcher nbfake [configf:settings trim-trailing-spaces yes] # Override the rollup for specific tests Index: tests/rununittest.sh ================================================================== --- tests/rununittest.sh +++ tests/rununittest.sh @@ -1,19 +1,21 @@ #!/bin/bash # Usage: rununittest.sh testname debuglevel # +banner $1 # put megatest on path from correct location mtbindir=$(readlink -f ../bin) export PATH="${mtbindir}:$PATH" # Clean setup # -dbdir=$(cd simplerun;megatest -show-config -section setup -var linktree)/.db -rm -f simplerun/megatest.db simplerun/monitor.db simplerun/db/monitor.db $dbdir/*.db +dbdir=$(echo /tmp/$USER/megatest_localdb/simplerun/.[a-zA-Z]*/) +echo "dbdir=$dbdir" +rm -f simplerun/megatest.db simplerun/monitor.db simplerun/db/monitor.db $dbdir rm -rf simplelinks/ simpleruns/ simplerun/db/ $dbdir mkdir -p simplelinks simpleruns (cd simplerun;cp ../../*_records.scm .;perl -pi.bak -e 's/define-inline/define/' *_records.scm) (cd simplerun;cp ../../altdb.scm .) Index: tests/unittests/basicserver.scm ================================================================== --- tests/unittests/basicserver.scm +++ tests/unittests/basicserver.scm @@ -10,19 +10,83 @@ (define run-id 1) (test "setup for run" #t (begin (launch:setup) (string? (getenv "MT_RUN_AREA_HOME")))) -;; NON Server tests go here - -(test #f #f (db:dbdat-get-path *db*)) -(test #f #f (db:get-run-name-from-id *db* run-id)) -;; (test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) - -;; (exit) - -;; Server tests go here +(test #f #t (and (server:kind-run *toppath*) #t)) + + +(define user (current-user-name)) +(define runname "mytestrun") +(define keys (rmt:get-keys)) +(define runinfo #f) +(define keyvals '(("SYSTEM" "abc")("RELEASE" "def"))) +(define header (list "SYSTEM" "RELEASE" "id" "runname" "state" "status" "owner" "event_time")) + +;; Setup +;; +;; (test #f #f (not (client:setup run-id))) +;; (test #f #f (not (hash-table-ref/default *runremote* run-id #f))) + +;; Login +;; +(test #f'(#t "successful login") + (rmt:login run-id)) + +;; Keys +;; +(test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) + +;; No data in db +;; +(test #f '() (rmt:get-all-run-ids)) +(test #f #f (rmt:get-run-name-from-id run-id)) +(test #f + (vector + header + (vector #f #f #f #f)) + (rmt:get-run-info run-id)) + +;; Insert data into db +;; +(test #f 1 (rmt:register-run keyvals runname "new" "n/a" user)) +;; (test #f #f (rmt:get-runs-by-patt keys runname)) +(test #f #t (rmt:general-call 'register-test run-id run-id "test-one" "")) +(define test-one-id #f) +(test #f 1 (let ((test-id (rmt:get-test-id run-id "test-one" ""))) + (set! test-one-id test-id) + test-id)) +(define test-one-rec #f) +(test #f "test-one" (let ((test-rec (rmt:get-test-info-by-id run-id test-one-id))) + (set! test-one-rec test-rec) + (vector-ref test-rec 2))) + +;; With data in db +;; +(print "Using runame=" runname) +(test #f '(1) (rmt:get-all-run-ids)) +(test #f runname (rmt:get-run-name-from-id run-id)) +(test #f + runname + (let ((run-info (rmt:get-run-info run-id))) + (db:get-value-by-header (db:get-rows run-info) + (db:get-header run-info) + "runname"))) + +;; test killing server +;; +(for-each + (lambda (run-id) + (test #f #t (and (tasks:kill-server-run-id run-id) #t)) + (test #f #f (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id))) + (list 0 1)) + +;; Tests to assess reading/writing while servers are starting/stopping +;; NO LONGER APPLICABLE + +;; Server tests go here +(define (server-tests-dont-run-right-now) (for-each (lambda (run-id) (test #f #f (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)) (server:kind-run run-id) (test "did server start within 20 seconds?" @@ -51,82 +115,14 @@ (begin (thread-sleep! 1.1) (loop (- remtries 1)(tasks:get-server (db:delay-if-busy (tasks:open-db)) run-id))) res))))) ) - (list 0 1)) - -(define user (current-user-name)) -(define runname "mytestrun") -(define keys (rmt:get-keys)) -(define runinfo #f) -(define keyvals '(("SYSTEM" "abc")("RELEASE" "def"))) -(define header (list "SYSTEM" "RELEASE" "id" "runname" "state" "status" "owner" "event_time")) - -;; Setup -;; -(test #f #f (not (client:setup run-id))) -(test #f #f (not (hash-table-ref/default *runremote* run-id #f))) - -;; Login -;; -(test #f'(#t "successful login") - (rmt:login-no-auto-client-setup (hash-table-ref/default *runremote* run-id #f) run-id)) -(test #f '(#t "successful login") - (rmt:login run-id)) - -;; Keys -;; -(test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) - -;; No data in db -;; -(test #f '() (rmt:get-all-run-ids)) -(test #f #f (rmt:get-run-name-from-id run-id)) -(test #f - (vector - header - (vector #f #f #f #f)) - (rmt:get-run-info run-id)) - -;; Insert data into db -;; -(test #f 1 (rmt:register-run keyvals runname "new" "n/a" user)) -;; (test #f #f (rmt:get-runs-by-patt keys runname)) -(test #f #t (rmt:general-call 'register-test run-id run-id "test-one" "")) -(define test-one-id #f) -(test #f 30001 (let ((test-id (rmt:get-test-id run-id "test-one" ""))) - (set! test-one-id test-id) - test-id)) -(define test-one-rec #f) -(test #f "test-one" (let ((test-rec (rmt:get-test-info-by-id run-id test-one-id))) - (set! test-one-rec test-rec) - (vector-ref test-rec 2))) - -;; With data in db -;; -(print "Using runame=" runname) -(test #f '(1) (rmt:get-all-run-ids)) -(test #f runname (rmt:get-run-name-from-id run-id)) -(test #f - runname - (let ((run-info (rmt:get-run-info run-id))) - (db:get-value-by-header (db:get-rows run-info) - (db:get-header run-info) - "runname"))) - -(for-each (lambda (run-id) -;; test killing server -;; -(tasks:kill-server-run-id run-id) - -(test #f #f (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)) -) -(list 0 1)) - -;; Tests to assess reading/writing while servers are starting/stopping -(define start-time (current-seconds)) + (list 0 1))) + +(define start-time (current-seconds)) +(define (reading-writing-while-server-starting-stopping-dont-run-now) (let loop ((test-state 'start)) (let* ((server-dats (tasks:get-server-records (db:delay-if-busy (tasks:open-db)) run-id)) (first-dat (if (not (null? server-dats)) (car server-dats) #f))) @@ -149,11 +145,11 @@ ((shutting-down) (loop test-state)) (else (print "Don't know what to do if get here")))) ((server-shutdown) (loop test-state))))) - +) ;;====================================================================== ;; END OF TESTS ;;====================================================================== Index: tests/unittests/runs.scm ================================================================== --- tests/unittests/runs.scm +++ tests/unittests/runs.scm @@ -1,6 +1,8 @@ (define keys (rmt:get-keys)) + +(test #f #t (and (server:kind-run *toppath*) #t)) (test "get all legal tests" (list "test1" "test2") (sort (hash-table-keys (tests:get-all)) string<=?)) (test "register-run" #t (number? (rmt:register-run @@ -9,12 +11,12 @@ "new" "n/a" "bob"))) (test #f #t (rmt:register-test 1 "nada" "")) -(test #f 30001 (rmt:get-test-id 1 "nada" "")) -(test #f "NOT_STARTED" (vector-ref (rmt:get-test-info-by-id 1 30001) 3)) ;; "nada" "") 3)) +(test #f 1 (rmt:get-test-id 1 "nada" "")) +(test #f "NOT_STARTED" (vector-ref (rmt:get-test-info-by-id 1 1) 3)) (test #f "FOO LIKE 'abc%def'" (db:patt->like "FOO" "abc%def")) (test #f "key2" (vector-ref (car (vector-ref (mt:get-runs-by-patt '("SYSTEM" "RELEASE") "%" "key1/key2") 1)) 1)) (test #f "SYSTEM,RELEASE,id,runname,state,status,owner,event_time" (car (runs:get-std-run-fields keys '("id" "runname" "state" "status" "owner" "event_time")))) @@ -49,11 +51,11 @@ ; (hash-table-set! args:arg-hash "-keepgoing" #t) (hash-table-set! args:arg-hash "-itempatt" "%") (hash-table-set! args:arg-hash "-testpatt" "%") (hash-table-set! args:arg-hash "-target" "ubuntu/r1.2") ;; SYSTEM/RELEASE (hash-table-set! args:arg-hash "-runname" "testrun") -(test "Setup for a run" #t (begin (launch:setup-for-run) #t)) +(test "Setup for a run" #t (string? (launch:setup))) (define *tdb* #f) (define keyvals #f) (test "target->keyval" #t (let ((kv (keys:target->keyval keys (args:get-arg "-target")))) (print "keyvals=" kv ", keys=" keys) @@ -151,11 +153,11 @@ (test "launch-test" #t (string? (file-exists? ;; (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) - (launch-test 30001 1 rinfo keyvals "run1" tconfig "test1" test-path '() (make-hash-table))))) + (launch-test 1 1 rinfo keyvals "run1" tconfig "test1" test-path '() (make-hash-table))))) ;;====================================================================== ;; M O R E R E M O T E C A L L S ;;====================================================================== @@ -169,13 +171,18 @@ ;; T E S T I T E M M A P ;;====================================================================== (test #f "a/b/c" (db:multi-pattern-apply "d/e/f" "d a\ne b\nf c")) (test #f "blah/foo/bar/baz" (db:convert-test-itempath "blah/baz/bar/foo" "^([^/]+)/([^/]+)/([^/]+)$ \\3/\\2/\\1")) -(test #f #t (db:compare-itempaths "abc/def/123" "abc/ghi/123" "ghi def")) -(test #f #f (db:compare-itempaths "some/5" "item/5" ".*/")) -(test #f #t (db:compare-itempaths "some/5" "item/5" ".*/ some/")) +(define itemmaps (alist->hash-table + '(("test1" "ghi def") + ("test2" ".*/") + ("test3" ".*/ some/")))) + +(test #f #t (db:compare-itempaths "test1" "abc/def/123" "abc/ghi/123" itemmaps)) +(test #f #f (db:compare-itempaths "test2" "some/5" "item/5" ".*/" itemmaps)) +(test #f #t (db:compare-itempaths "test3" "some/5" "item/5" ".*/ some/" itemmaps)) (test #f '() (rmt:get-prereqs-not-met 1 '("rollup") "some/5" mode: '(toplevel) itemmap: ".*/" "/")) (test #f '() (rmt:get-prereqs-not-met 1 '("rollup") "some/5" mode: '(normal) itemmap: ".*/" "/")) (test #f '() (rmt:get-prereqs-not-met 1 '("rollup") "some/5" mode: '(itemmatch) itemmap: ".*/" "/")) (test #f '() (rmt:get-prereqs-not-met 1 '("rollup") "some/5" mode: '(itemwait) itemmap: ".*/" "/")) ADDED thunk-utils.scm Index: thunk-utils.scm ================================================================== --- /dev/null +++ thunk-utils.scm @@ -0,0 +1,121 @@ +(use srfi-18) + + +;; wrap a proc with a mutex so that two threads may not call proc simultaneously. +;; will catch exceptions to ensure mutex is unlocked even if exception is thrown. +;; will generate a unique mutex for proc unless one is specified with canned-mutex: option +;; +;; example 1: (define thread-safe-+ (make-synchronized-proc +)) +;; example 2: (define thread-safe-plus +;; (make-synchronized-proc +;; (lambda (x y) +;; (+ x y)))) + +(define (make-synchronized-proc proc + #!key (canned-mutex #f)) + (let* ((guard-mutex (if canned-mutex canned-mutex (make-mutex))) + (guarded-proc ;; we are guarding the thunk against exceptions. We will record whether result of evaluation is an exception or a regular result. + (lambda args + (mutex-lock! guard-mutex) + (let* ((EXCEPTION (gensym)) ;; using gensym to avoid potential collision with a proc that returns a pair having the first element be our flag. gensym guarantees the symbol is unique. + (res + (condition-case + (apply proc args) ;; this is what we are guarding the execution of + [x () (cons EXCEPTION x)] + ))) + (mutex-unlock! guard-mutex) + (cond + ((and (pair? res) (eq? (car res) EXCEPTION)) + (raise (cdr res))) + (else + res)))))) + guarded-proc)) + + +;; retry an operation (depends on srfi-18) +;; ================== +;; idea here is to avoid spending time on coding retrying something. Trying to be generic here. +;; +;; Exception handling: +;; ------------------- +;; if evaluating the thunk results in exception, it will be retried. +;; on last try, if final-failure-returns-actual is true, the exception will be re-thrown to caller. +;; +;; look at options below #!key to see how to configure behavior +;; +;; + +(define (retry-thunk + the-thunk + #!key ;;;; options below + (accept-result? (lambda (x) x)) ;; retry if predicate applied to thunk's result is false + (retries 4) ;; how many tries + (failure-value #f) ;; return this on final failure, unless following option is enabled: + (final-failure-returns-actual #f) ;; on failure, on the last try, just return the result, not failure-value + + (retry-delay 0.1) ;; delay between tries + (back-off-factor 1) ;; multiply retry-delay by this factor on retry + (random-delay 0.1) ;; add a random portion of this value to wait + + (chatty #f) ;; print status as we go, for debugging. + ) + + (when chatty (print) (print "Entered retry-thunk") (print "-=-=-=-=-=-")) + (let* ((guarded-thunk ;; we are guarding the thunk against exceptions. We will record whether result of evaluation is an exception or a regular result. + (lambda () + (let* ((EXCEPTION (gensym)) ;; using gensym to avoid potential collision + (res + (condition-case + (the-thunk) ;; this is what we are guarding the execution of + [x () (cons EXCEPTION x)] + ))) + (cond + ((and (pair? res) (eq? (car res) EXCEPTION)) + (if chatty + (print " - the-thunk threw exception >"(cdr res)"<")) + (cons 'exception (cdr res))) + (else + (if chatty + (print " - the-thunk returned result >"res"<")) + (cons 'regular-result res))))))) + + (let loop ((guarded-res (guarded-thunk)) + (retries-left retries) + (fail-wait retry-delay)) + (if chatty (print " ==========")) + (let* ((wait-time (+ fail-wait (+ (* fail-wait back-off-factor) + (* random-delay + (/ (random 1024) 1024) )))) + (res-type (car guarded-res)) + (res-value (cdr guarded-res))) + (cond + ((and (eq? res-type 'regular-result) (accept-result? res-value)) + (if chatty (print " + return result that satisfied accept-result? >"res-value"<")) + res-value) + + ((> retries-left 0) + (if chatty (print " - sleep "wait-time)) + (thread-sleep! wait-time) + (if chatty (print " + retry ["retries-left" tries left]")) + (loop (guarded-thunk) + (sub1 retries-left) + wait-time)) + + ((eq? res-type 'regular-result) + (if final-failure-returns-actual + (begin + (if chatty (print " + last try failed- return the result >"res-value"<")) + res-value) + (begin + (if chatty (print " + last try failed- return canned failure value >"failure-value"<")) + failure-value))) + + (else ;; no retries left; result was not accepted and res-type can only be 'exception + (if final-failure-returns-actual + (begin + (if chatty (print " + last try failed with exception- re-throw it >"res-value"<")) + (abort res-value)); re-raise the exception. TODO: find a way for call-history to show as though from entry to this function + (begin + (if chatty (print " + last try failed with exception- return canned failure value >"failure-value"<")) + failure-value)))))))) + Index: tree.scm ================================================================== --- tree.scm +++ tree.scm @@ -135,10 +135,10 @@ ;; (print "obj: " obj ", id: " id ", state: " state) (let* ((run-path (tree:node->path obj id)) (run-id (tree-path->run-id (cdr run-path)))) (if run-id (begin - (dboard:data-curr-run-id-set! *data* run-id) + (dboard:data-curr-run-id-set! data run-id) (dashboard:update-run-summary-tab))) ;; (print "path: " (tree:node->path obj id) " run-id: " run-id) )))) |# ADDED utils/find-unused-globals.sh Index: utils/find-unused-globals.sh ================================================================== --- /dev/null +++ utils/find-unused-globals.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +echo "Finding unused globals:" + +for var in $(egrep '^\s*\(define\s+\*' *.scm|awk '{print $2}'|sort -u);do + if ! $(egrep -v '^\s*\(define' *scm| grep "$var" > /dev/null);then + echo "$var not used"; + fi; +done + +echo +echo "Finding globals without proper definition in common.scm:" + +for var in $(egrep -v '^\s*\(define' *.scm|\ + grep -P -v '^\s*;'|\ + grep -P '\*[a-zA-Z]+\S+\*'|\ + tr '*' '/' |\ + perl -p -e 's%.*(\/\S+\/).*%$1%'|\ + egrep '\/[a-zA-Z]+\S+\/'|\ + sort -u);do + newvar=$(echo $var | tr '/' '*') + # echo "VAR is $var, newvar is $newvar" + if ! $(grep -P '^\s*\(define\s+' common.scm|\ + grep -P -v '^\s*;'|\ + grep "$newvar" > /dev/null);then + echo "$newvar not defined in common.scm" + fi +done + Index: utils/installall.sh ================================================================== --- utils/installall.sh +++ utils/installall.sh @@ -10,14 +10,13 @@ # This program is distributed WITHOUT ANY WARRANTY; without even the # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR # PURPOSE. echo You may need to do the following first: -echo sudo apt-get install libreadline-dev -echo sudo apt-get install libwebkitgtk-dev +echo sudo apt-get install libreadline-dev libsqlite3-dev libwebkitgtk-dev echo sudo apt-get install libpangox-1.0-0 zlib1g-dev libfreetype6-dev cmake -echo sudo apt-get install libssl-dev +echo sudo apt-get install libssl-dev uuid-dev libglu1-mesa-dev echo sudo apt-get install libmotif3 -OR- set KTYPE=26g4 echo echo Set OPTION to std, currently OPTION=$OPTION echo echo Additionally, if you want mysql-client, you will need to make sure @@ -25,21 +24,37 @@ echo echo You are using PREFIX=$PREFIX echo You are using proxy="$proxy" echo echo "Set additional_libpath to help find gtk or other libraries, don't forget a leading :" + +if [[ "$OPTION"x == "x" ]];then + OPTION=std +fi SYSTEM_TYPE=$(lsb_release -irs |tr ' ' '_' |tr '\n' '-')$(uname -i)-$OPTION + +# default chicken version variables. Override in case statement as appropriate +CHICKEN_VERSION=4.10.0 +CHICKEN_BASEVER=4.10.0 # Set up variables # case $SYSTEM_TYPE in Ubuntu-16.04-x86_64-std) KTYPE=32 CDVER=5.10 IUPVER=3.17 IMVER=3.11 + ;; +Ubuntu-16.04-x86_64-new) + KTYPE=32 + CDVER=5.10 + IUPVER=3.17 + IMVER=3.11 + CHICKEN_VERSION=4.10.0 + CHICKEN_BASEVER=4.10.0 ;; Ubuntu-16.04-i686-std) KTYPE=32 CDVER=5.10 IUPVER=3.17 @@ -103,12 +118,10 @@ # Put all the downloaded tar files in tgz mkdir -p tgz # http://code.call-cc.org/releases/4.8.0/chicken-4.8.0.5.tar.gz -export CHICKEN_VERSION=4.11.0 -export CHICKEN_BASEVER=4.11.0 chicken_targz=chicken-${CHICKEN_VERSION}.tar.gz if ! [[ -e tgz/$chicken_targz ]]; then wget http://code.call-cc.org/releases/${CHICKEN_BASEVER}/${chicken_targz} mv $chicken_targz tgz fi @@ -177,11 +190,11 @@ cd $BUILDHOME # Some eggs are quoted since they are reserved to Bash # for f in matchable readline apropos base64 regex-literals format "regex-case" "test" coops trace csv dot-locking posix-utils posix-extras directory-utils hostinfo tcp rpc csv-xml fmt json md5; do # $CHICKEN_INSTALL $PROX -keep-installed matchable readline apropos base64 regex-literals format "regex-case" "test" coops trace csv dot-locking posix-utils posix-extras directory-utils hostinfo tcp rpc csv-xml fmt json md5 awful http-client spiffy uri-common intarweb http-client spiffy-request-vars md5 message-digest http-client spiffy-directory-listing -for egg in matchable readline apropos base64 regex-literals format "regex-case" "test" \ +for egg in matchable readline apropos dbi base64 regex-literals format "regex-case" "test" \ coops trace csv dot-locking posix-utils posix-extras directory-utils hostinfo \ tcp rpc csv-xml fmt json md5 awful http-client spiffy uri-common intarweb http-client \ spiffy-request-vars s md5 message-digest spiffy-directory-listing ssax sxml-serializer \ sxml-modifications logpro z3 call-with-environment-variables \ pathname-expand typed-records simple-exceptions numbers crypt parley srfi-42 \ @@ -302,12 +315,12 @@ cd histstore $PREFIX/bin/csc histstore.scm -o hs cp -f hs $PREFIX/bin/hs cd ../mutils $PREFIX/bin/chicken-install - cd ../dbi - $PREFIX/bin/chicken-install + # cd ../dbi + # $PREFIX/bin/chicken-install cd ../margs $PREFIX/bin/chicken-install fi cd $BUILDHOME