Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -4,11 +4,11 @@ INSTALL=install SRCFILES = common.scm items.scm launch.scm \ ods.scm runconfig.scm server.scm configf.scm \ db.scm keys.scm margs.scm megatest-version.scm \ process.scm runs.scm tasks.scm tests.scm genexample.scm \ - fs-transport.scm http-transport.scm filedb.scm \ + http-transport.scm filedb.scm \ client.scm gutils.scm synchash.scm daemon.scm mt.scm dcommon.scm \ tree.scm ezsteps.scm lock-queue.scm sdb.scm \ rmt.scm api.scm tdb.scm GUISRCF = dashboard-tests.scm dashboard-guimonitor.scm @@ -62,10 +62,11 @@ tests.o runs.o dashboard.o dashboard-tests.o dashboard-main.o : run_records.scm db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm tests.o tasks.o dashboard-tasks.o : task_records.scm runs.o : test_records.scm megatest.o : megatest-fossil-hash.scm +client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm fs-transport.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm zmq-transport.scm : common_records.scm # Temporary while transitioning to new routine # runs.o : run-tests-queue-classic.scm run-tests-queue-new.scm megatest-fossil-hash.scm : $(SRCFILES) megatest.scm *_records.scm Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -15,10 +15,30 @@ ;; These are called by the server on recipt of /api calls (define (api:execute-requests dbstruct cmd params) (case (string->symbol cmd) + ;; SERVERS + ((start-server) (apply server:kind-run params)) + ;; ((kill-server) + ;; (db:sync-tables (db:tbls *inmemdb*) *inmemdb* *db*) ;; (db:sync-to *inmemdb* *db*) + ;; (let ((hostname (car *runremote*)) + ;; (port (cadr *runremote*)) + ;; (pid (if (null? params) #f (car params))) + ;; (th1 (make-thread (lambda ()(thread-sleep! 3)(debug:print 0 "Server exiting!")(exit 0)) "Server exit thread"))) + ;; (debug:print 0 "WARNING: Server on " hostname ":" port " going down by user request!") + ;; (debug:print-info 1 "current pid=" (current-process-id)) + ;; (open-run-close tasks:server-deregister tasks:open-db + ;; hostname + ;; port: port) + ;; (set! *server-run* #f) + ;; (thread-sleep! 3) + ;; (if pid + ;; (process-signal pid signal/kill) + ;; (thread-start! th1)) + ;; '(#t "exit process started"))) + ;; KEYS ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) ((get-keys) (db:get-keys dbstruct)) ;; TESTS @@ -51,10 +71,12 @@ ((get-tests-for-runs-mindata) (apply db:get-tests-for-runs-mindata dbstruct params)) ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params)) ((delete-run) (apply db:delete-run dbstruct params)) ((get-runs) (apply db:get-runs dbstruct params)) ((get-all-run-ids) (db:get-all-run-ids dbstruct)) + ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) + ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) ;; STEPS @@ -72,27 +94,10 @@ (realparams (cddr params))) (db:with-db dbstruct run-id #t ;; these are all for modifying the db (lambda (db) (db:general-call db stmtname realparams))))) ((sync-inmem->db) (db:sync-touched dbstruct run-id force-sync: #t)) - ;; ((kill-server) - ;; (db:sync-tables (db:tbls *inmemdb*) *inmemdb* *db*) ;; (db:sync-to *inmemdb* *db*) - ;; (let ((hostname (car *runremote*)) - ;; (port (cadr *runremote*)) - ;; (pid (if (null? params) #f (car params))) - ;; (th1 (make-thread (lambda ()(thread-sleep! 3)(debug:print 0 "Server exiting!")(exit 0)) "Server exit thread"))) - ;; (debug:print 0 "WARNING: Server on " hostname ":" port " going down by user request!") - ;; (debug:print-info 1 "current pid=" (current-process-id)) - ;; (open-run-close tasks:server-deregister tasks:open-db - ;; hostname - ;; port: port) - ;; (set! *server-run* #f) - ;; (thread-sleep! 3) - ;; (if pid - ;; (process-signal pid signal/kill) - ;; (thread-start! th1)) - ;; '(#t "exit process started"))) ((sdb-qry) (apply sdb:qry params)) ;; TESTMETA ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params)) Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -53,62 +53,86 @@ ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; -(define (client:setup run-id #!key (remaining-tries 10)) +(define (client:setup run-id #!key (remaining-tries 10) (failed-connects 0)) (if (<= remaining-tries 0) (begin (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id) (exit 1)) - (let ((server-dat (and run-id (hash-table-ref/default *runremote* run-id #f)))) + (let ((host-info (hash-table-ref/default *runremote* run-id #f))) + (debug:print-info 0 "client:setup host-info=" host-info ", remaining-tries=" remaining-tries) (thread-sleep! 1) ;; try to avoid race conditons - (if server-dat - (let ((new-dat (http-transport:client-connect run-id ;; NB// confusion over server-dat and connection result! - (car server-dat) - (cadr server-dat)))) - (if new-dat ;; sucessful login? - new-dat - (begin ;; login failed - (debug:print 0 "INFO: login failed in client:setup with existing server-dat: " server-dat ", new-dat: " new-dat ", cleaning out records and then trying again") - (hash-table-delete! *runremote* run-id) - (open-run-close tasks:server-force-clean-run-record - tasks:open-db - run-id - (car server-dat) - (cadr server-dat)) - (thread-sleep! 5) - (client:setup run-id remaining-tries: (- remaining-tries 1))))) - (let* ((server-info (open-run-close tasks:get-server tasks:open-db run-id))) - (if server-info - (let ((new-dat (http-transport:client-connect run-id - (tasks:hostinfo-get-interface server-info) - (tasks:hostinfo-get-port server-info)))) + (if host-info + (let* ((iface (car host-info)) + (port (cadr host-info)) + (start-res (http-transport:client-connect iface port)) + ;; (ping-res (server:ping-server run-id iface port)) + (ping-res (rmt:login-no-auto-client-setup start-res run-id))) + (if ping-res ;; sucessful login? + start-res) ;; return the server info + (if (member remaining-tries '(3 4 6)) + (begin ;; login failed + (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" host-info) + (hash-table-delete! *runremote* run-id) + (open-run-close tasks:server-force-clean-run-record + tasks:open-db + run-id + (car host-info) + (cadr host-info) + " client:setup (host-info=#t)") + (thread-sleep! 5) + (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) + (begin + (debug:print 25 "INFO: client:setup failed to connect, start-res=" start-res ", run-id=" run-id ", host-info=" host-info) + (thread-sleep! 5) + (client:setup run-id remaining-tries: (- remaining-tries 1)))))) + ;; YUK: rename server-dat here (if new-dat new-dat - (begin ;; login failed - (debug:print 0 "INFO: login failed in client:setup with no existing server-dat: " server-dat ", new-dat: " new-dat ", and server-info: " server-info ", cleaning out records and then trying again") - (hash-table-delete! *runremote* run-id) - (open-run-close tasks:server-force-clean-run-record - tasks:open-db - run-id - (tasks:hostinfo-get-interface server-dat) - (tasks:hostinfo-get-port server-dat)) - ;; (thread-sleep! 2) - (server:try-running run-id) - (thread-sleep! 5) ;; give server a little time to start up - (client:setup run-id remaining-tries: (- remaining-tries 1))))) + (debug:print-info 0 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) + (let* ((iface (tasks:hostinfo-get-interface server-dat)) + (port (tasks:hostinfo-get-port server-dat)) + (start-res (http-transport:client-connect iface port)) + ;; (ping-res (server:ping-server run-id iface port)) + (ping-res (rmt:login-no-auto-client-setup start-res run-id))) + (if (member remaining-tries '(2 5)) + (begin ;; login failed + (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) + ;;(debug:print 0 "INFO: login failed in client:setup with no existing server-dat: " server-dat ", new-dat: " new-dat ", and server-info: " server-info ", cleaning out records and then trying again") + (hash-table-delete! *runremote* run-id) + (open-run-close tasks:server-force-clean-run-record + tasks:open-db + run-id + (tasks:hostinfo-get-interface server-dat) + (tasks:hostinfo-get-port server-dat) + " client:setup (server-dat = #t)") + (server:try-running run-id) + (thread-sleep! 10) ;; give server a little time to start up + (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) + (begin + (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) + (thread-sleep! 5) + (client:setup run-id remaining-tries: (- remaining-tries 1)))))) (begin ;; no server registered - ;; (thread-sleep! 2) - (server:try-running run-id) - (thread-sleep! 5) ;; give server a little time to start up - (client:setup run-id remaining-tries: (- remaining-tries 1))))))))) + (if (eq? remaining-tries 2) + (begin + ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") + (client:setup run-id remaining-tries: 10)) + (begin + (debug:print 25 "INFO: client:setup start-res (not defined here), run-id=" run-id ", server-dat=" server-dat) + (if (< (open-run-close tasks:num-in-available-state tasks:open-db run-id) 3) + (begin + ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") + (server:try-running run-id))) + (thread-sleep! 10) ;; give server a little time to start up + (client:setup run-id remaining-tries: (- remaining-tries 1))))))))))) ;; keep this as a function to ease future (define (client:start run-id server-info) - (http-transport:client-connect run-id - (tasks:hostinfo-get-interface server-info) + (http-transport:client-connect (tasks:hostinfo-get-interface server-info) (tasks:hostinfo-get-port server-info))) ;; client:signal-handler (define (client:signal-handler signum) (handle-exceptions Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -23,10 +23,17 @@ ;; (require-library margs) ;; (include "margs.scm") (define getenv get-environment-variable) +(define (safe-setenv key val) + (if (and (string? val)(string? key)) + (handle-exceptions + exn + (debug:print 0 "ERROR: bad value for setenv, key=" key ", value=" val) + (setenv key val)) + (debug:print 0 "ERROR: bad value for setenv, key=" key ", value=" val))) (define home (getenv "HOME")) (define user (getenv "USER")) ;; GLOBAL GLETCHES @@ -60,10 +67,11 @@ (define *default-numtries* 10) (define *server-run* #t) (define *db-write-access* #t) (define *inmemdb* #f) (define *run-id* #f) +(define *server-kind-run* (make-hash-table)) (define *target* (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN (define *keys* (make-hash-table)) ;; cache the keys here (define *keyvals* (make-hash-table)) (define *toptest-paths* (make-hash-table)) ;; cache toptest path settings here @@ -99,10 +107,17 @@ ;; Generic string database (normalization of sorts) (define sdb:qry #f) ;; (make-sdb:qry)) ;; 'init #f) ;; Generic path database (normalization of sorts) (define *fdb* #f) + +;;====================================================================== +;; U S E F U L S T U F F +;;====================================================================== + +(define (common:get-megatest-exe) + (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest")) ;;====================================================================== ;; S T A T E S A N D S T A T U S E S ;;====================================================================== Index: common_records.scm ================================================================== --- common_records.scm +++ common_records.scm @@ -6,28 +6,33 @@ ;; ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. ;;====================================================================== + +(use trace) (define (debug:calc-verbosity vstr) (cond - (vstr - (let ((debugvals (string-split vstr ","))) - (if (> (length debugvals) 1) - (map string->number debugvals) - (string->number (car debugvals))))) - ((args:get-arg "-v") 2) + ((number? vstr) vstr) + ((not (string? vstr)) 1) + ;; ((string-match "^\\s*$" vstr) 1) + (vstr (let ((debugvals (filter number? (map string->number (string-split vstr ","))))) + (cond + ((> (length debugvals) 1) debugvals) + ((> (length debugvals) 0)(car debugvals)) + (else 1)))) + ((args:get-arg "-v") 2) ((args:get-arg "-q") 0) (else 1))) ;; check verbosity, #t is ok (define (debug:check-verbosity verbosity vstr) (if (not (or (number? verbosity) (list? verbosity))) (begin - (print "ERROR: Invalid debug value " vstr) + (print "ERROR: Invalid debug value \"" vstr "\"") #f) #t)) (define (debug:debug-mode n) (or (and (number? *verbosity*) @@ -38,10 +43,12 @@ (define (debug:setup) (let ((debugstr (or (args:get-arg "-debug") (getenv "MT_DEBUG_MODE")))) (set! *verbosity* (debug:calc-verbosity debugstr)) (debug:check-verbosity *verbosity* debugstr) + ;; if we were handed a bad verbosity rule then we will override it with 1 and continue + (if (not *verbosity*)(set! *verbosity* 1)) (if (or (args:get-arg "-debug") (not (getenv "MT_DEBUG_MODE"))) (setenv "MT_DEBUG_MODE" (if (list? *verbosity*) (string-intersperse (map conc *verbosity*) ",") (conc *verbosity*)))))) @@ -51,10 +58,11 @@ (if (debug:debug-mode n) (with-output-to-port (current-error-port) (lambda () (if *logging* (db:log-event (apply conc params)) + ;; (apply print "pid:" (current-process-id) " " params) (apply print params) ))))) (define (debug:print-info n . params) (if (debug:debug-mode n) @@ -61,13 +69,14 @@ (with-output-to-port (current-error-port) (lambda () (let ((res (format#format #f "INFO: (~2d) ~a" n (apply conc params)))) (if *logging* (db:log-event res) + ;; (apply print "pid:" (current-process-id) " " "INFO: (" n ") " params) ;; res) (apply print "INFO: (" n ") " params) ;; res) )))))) ;; if a value is printable (i.e. string or number) return the value ;; else return an empty string (define-inline (printable val) (if (or (number? val)(string? val)) val "")) Index: configf.scm ================================================================== --- configf.scm +++ configf.scm @@ -217,17 +217,11 @@ (envar (and environ-patt (string-search (regexp environ-patt) curr-section-name))) (realval (if envar (config:eval-string-in-environment val) val))) (debug:print-info 6 "read-config env setting, envar: " envar " realval: " realval " val: " val " key: " key " curr-section-name: " curr-section-name) - (if envar - (if (and (string? realval)(string? key)) - (handle-exceptions - exn - (debug:print 0 "ERROR: bad value for setenv, key=" key ", value=" realval) - (setenv key realval)) - (debug:print 0 "ERROR: bad value for setenv, key=" key ", value=" realval))) + (if envar (safe-setenv key realval)) (hash-table-set! res curr-section-name (config:assoc-safe-add alist key realval)) (loop (configf:read-line inp res allow-system) curr-section-name key #f))) (configf:key-no-val ( x key val) (let* ((alist (hash-table-ref/default res curr-section-name '()))) (hash-table-set! res curr-section-name Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -88,20 +88,10 @@ (print "Failed to find megatest.config, exiting") (exit 1))) (define *dbstruct-local* (make-dbr:dbstruct path: *toppath* local: #t)) -;; (define sdb:qry (make-sdb:qry)) ;; 'init #f) - -;; (if (args:get-arg "-host") -;; (begin -;; (set! *runremote* (string-split (args:get-arg "-host" ":"))) -;; (client:launch)) -;; (if (not (args:get-arg "-use-server")) -;; (set! *transport-type* 'fs) ;; force fs access -;; (client:launch))) - ;; HACK ALERT: this is a hack, please fix. (define *read-only* (not (file-read-access? (conc *toppath* "db/main.db")))) ;; (client:setup *dbstruct-local*) (define toplevel #f) @@ -990,17 +980,21 @@ ;; General info about the run(s) and megatest area (define (dashboard:summary db) (let ((rawconfig (read-config (conc *toppath* "/megatest.config") #f 'return-string))) (iup:vbox (iup:split - ;; #:value 500 + #:value 500 (iup:frame #:title "General Info" - (iup:hbox - (dcommon:keys-matrix rawconfig) - (dcommon:general-info) - )) + (iup:vbox + (iup:hbox + (iup:label "Area Path") + (iup:textbox #:value *toppath* #:expand "HORIZONTAL")) + (iup:hbox + (dcommon:keys-matrix rawconfig) + (dcommon:general-info) + ))) (iup:frame #:title "Server" (dcommon:servers-table))) (iup:frame #:title "Megatest config settings" Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -11,29 +11,21 @@ ;;====================================================================== ;; Database access ;;====================================================================== -(require-extension (srfi 18) extras tcp) ;; rpc) -;; (import (prefix rpc rpc:)) - +(require-extension (srfi 18) extras tcp) (use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64 format) (import (prefix sqlite3 sqlite3:)) (import (prefix base64 base64:)) -;; Note, try to remove this dependency -;; (use zmq) - (declare (unit db)) (declare (uses common)) (declare (uses keys)) (declare (uses ods)) -(declare (uses fs-transport)) (declare (uses client)) (declare (uses mt)) -;; (declare (uses sdb)) -;; (declare (uses filedb)) (include "common_records.scm") (include "db_records.scm") (include "key_records.scm") (include "run_records.scm") @@ -77,14 +69,22 @@ ;; (db:with-db dbstruct run-id sqlite3:exec "select blah from blaz;") ;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no ;; (define (db:with-db dbstruct run-id r/w proc . params) - (let* ((db (db:get-db dbstruct run-id))) + (let* ((db (db:get-db dbstruct run-id)) + ) + ;; (proc2 (lambda () (let ((res (apply proc db params))) (db:done-with dbstruct run-id r/w) res))) +;; (handle-exceptions +;; exn +;; (begin +;; (thread-sleep! 10) +;; (proc2)) +;; (proc2)))) ;;====================================================================== ;; K E E P F I L E D B I N dbstruct ;;====================================================================== @@ -460,12 +460,13 @@ (apply open-run-close-no-exception-handling proc idb params)) (apply open-run-close-no-exception-handling proc idb params))) ;; (define open-run-close (define open-run-close ;; (if (debug:debug-mode 2) - open-run-close-no-exception-handling) - ;; open-run-close-exception-handling)) + ;; open-run-close-no-exception-handling + open-run-close-exception-handling) +;;) (define (db:initialize-main-db db) (let* ((configdat (car *configinfo*)) ;; tut tut, global warning... (keys (keys:config-get-fields configdat)) (havekeys (> (length keys) 0)) @@ -642,11 +643,11 @@ (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) (deadtime (if (and deadtime-str (string->number deadtime-str)) (string->number deadtime-str) 7200)) ;; two hours - (run-ids (db:get-run-ids db))) ;; iterate over runs to divy up the calls + (run-ids (db:get-all-run-ids db))) ;; iterate over runs to divy up the calls (if (number? ovr-deadtime)(set! deadtime ovr-deadtime)) (for-each (lambda (run-id) ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes @@ -978,11 +979,11 @@ (define (db:get-targets dbstruct) (let* ((res '()) (keys (db:get-keys dbstruct)) (header keys) ;; (map key:get-fieldname keys)) (keystr (keys->keystr keys)) - (qrystr (conc "SELECT " keystr " FROM runs;")) + (qrystr (conc "SELECT " keystr " FROM runs WHERE state != 'deleted';")) (seen (make-hash-table))) (sqlite3:for-each-row (lambda (a . x) (let ((targ (cons a x))) (if (not (hash-table-ref/default seen targ #f)) @@ -1011,11 +1012,11 @@ (sqlite3:for-each-row (lambda (run-id) (set! run-ids (cons run-id run-ids))) (db:get-db dbstruct #f) "SELECT id FROM runs WHERE state != 'deleted';") - run-ids)) + (reverse run-ids))) ;; get some basic run stats ;; ;; ( (runname (( state count ) ... )) ;; ( ... @@ -1026,11 +1027,11 @@ ;; First get all the runname/run-ids (sqlite3:for-each-row (lambda (run-id runname) (set! runs-info (cons (list run-id runname) runs-info))) (db:get-db dbstruct #f) - "SELECT id,runname FROM runs;") + "SELECT id,runname FROM runs WHERE state != 'deleted';") ;; for each run get stats data (for-each (lambda (run-info) (let ((run-id (car run-info)) (run-name (cadr run-info))) @@ -1138,26 +1139,24 @@ (sqlite3:execute (db:get-db dbstruct #f) "UPDATE runs SET state=? WHERE id=?;" newlockval run-id) (sqlite3:execute (db:get-db dbstruct #f) "INSERT INTO access_log (user,accessed,args) VALUES(?,strftime('%s','now'),?);" user (conc newlockval " " run-id)) (debug:print-info 1 "" newlockval " run number " run-id))) -(define (db:get-all-run-ids dbstruct) - (let ((res '())) - (sqlite3:for-each-row - (lambda (run-id) - (set! res (cons run-id res))) - (db:get-db dbstruct #f) - "SELECT id FROM runs;") - (reverse res))) - -(define (db:get-run-ids db) - (let ((res '())) - (sqlite3:for-each-row - (lambda (id) - (set! res (cons id res))) +(define (db:set-run-status db run-id status #!key (msg #f)) + (if msg + (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id) + (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id))) + +(define (db:get-run-status db run-id) + (let ((res "n/a")) + (sqlite3:for-each-row + (lambda (status) + (set! res status)) db - "SELECT id FROM runs;"))) + "SELECT status FROM runs WHERE id=?;" + run-id) + res)) ;;====================================================================== ;; K E Y S ;;====================================================================== @@ -1177,36 +1176,46 @@ keys) (reverse res))) ;; get key vals for a given run-id (define (db:get-key-vals dbstruct run-id) - (let ((mykeyvals (hash-table-ref/default *keyvals* run-id #f))) - (if mykeyvals - mykeyvals - (let* ((keys (db:get-keys dbstruct)) - (res '())) - (for-each - (lambda (key) - (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) - (sqlite3:for-each-row - (lambda (key-val) - (set! res (cons key-val res))) - (db:get-db dbstruct #f) qry run-id))) - keys) - (let ((final-res (reverse res))) - (hash-table-set! *keyvals* run-id final-res) - final-res))))) + (let* ((keys (db:get-keys dbstruct)) + (res '())) + (for-each + (lambda (key) + (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) + (sqlite3:for-each-row + (lambda (key-val) + (set! res (cons key-val res))) + (db:get-db dbstruct #f) qry run-id))) + keys) + (let ((final-res (reverse res))) + (hash-table-set! *keyvals* run-id final-res) + final-res))) ;; The target is keyval1/keyval2..., cached in *target* as it is used often (define (db:get-target dbstruct run-id) - (let ((mytarg (hash-table-ref/default *target* run-id #f))) - (if mytarg - mytarg - (let* ((keyvals (db:get-key-vals dbstruct run-id)) - (thekey (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/"))) - (hash-table-set! *target* run-id thekey) - thekey)))) + (let* ((keyvals (db:get-key-vals dbstruct run-id)) + (thekey (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/"))) + thekey)) + +;; Get run-ids for runs with same target but different runnames and NOT run-id +;; +(define (db:get-prev-run-ids dbstruct run-id) + (let* ((keyvals (rmt:get-key-val-pairs run-id)) + (kvalues (map cadr keyvals)) + (keys (rmt:get-keys)) + (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND "))) + (let ((prev-run-ids '())) + (db:with-db dbstruct #f #f ;; #f means work with the zeroth db - i.e. the runs db + (lambda (db) + (apply sqlite3:for-each-row + (lambda (id) + (set! prev-run-ids (cons id prev-run-ids))) + db + (conc "SELECT id FROM runs WHERE " qrystr " AND state != 'deleted' AND id != ?;") (append kvalues (list run-id))))) + prev-run-ids))) ;;====================================================================== ;; T E S T S ;;====================================================================== @@ -1487,11 +1496,11 @@ (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir) ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 (set! res (cons (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir) res))) (db:get-db dbstruct run-id) - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE run_id=?;") + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE state != 'DELETED' AND run_id=?;") run-id) res)) (define (db:replace-test-records dbstruct run-id testrecs) (db:with-db dbstruct run-id #t @@ -1683,34 +1692,36 @@ ;;====================================================================== ;; Misc. test related queries ;;====================================================================== -(define (db:test-get-paths-matching-keynames-target-new dbstruct keynames target res testpatt statepatt statuspatt runname) +(define (db:get-run-ids-matching-target dbstruct keynames target res runname testpatt statepatt statuspatt) (let* ((row-ids '()) (keystr (string-intersperse (map (lambda (key val) (conc key " like '" val "'")) keynames (string-split target "/")) " AND ")) - (testqry (tests:match->sqlqry testpatt)) - (runsqry (sqlite3:prepare (db:get-db dbstruct #f)(conc "SELECT id FROM runs WHERE " keystr " AND runname LIKE '" runname "';"))) - (tstsqry (conc "SELECT rundir FROM tests WHERE " testqry " AND state LIKE '" statepatt "' AND status LIKE '" statuspatt "' ORDER BY event_time ASC;"))) - (debug:print 8 "db:test-get-paths-matching-keynames-target-new\n runsqry=" runsqry "\n tstsqry=" tstsqry) + ;; (testqry (tests:match->sqlqry testpatt)) + (runsqry (sqlite3:prepare (db:get-db dbstruct #f)(conc "SELECT id FROM runs WHERE " keystr " AND runname LIKE '" runname "';")))) + ;; (debug:print 8 "db:test-get-paths-matching-keynames-target-new\n runsqry=" runsqry "\n tstsqry=" testqry) (sqlite3:for-each-row (lambda (rid) (set! row-ids (cons rid row-ids))) runsqry) (sqlite3:finalize! runsqry) - (for-each (lambda (rid) - (sqlite3:for-each-row - (lambda (p) - (set! res (cons p res))) - (db:get-db dbstruct rid) - tstsqry)) - row-ids) + row-ids)) + +(define (db:test-get-paths-matching-keynames-target-new dbstruct run-id keynames target res testpatt statepatt statuspatt runname) + (let* ((testqry (tests:match->sqlqry testpatt)) + (tstsqry (conc "SELECT rundir FROM tests WHERE " testqry " AND state LIKE '" statepatt "' AND status LIKE '" statuspatt "' ORDER BY event_time ASC;"))) + (sqlite3:for-each-row + (lambda (p) + (set! res (cons p res))) + (db:get-db dbstruct run-id) + tstsqry) res)) ;;====================================================================== ;; QUEUE UP META, TEST STATUS AND STEPS REMOTE ACCESS ;;====================================================================== @@ -1878,49 +1889,10 @@ db:queries))) (if q (car q) #f)))) (apply sqlite3:execute db query params) #t)) -;; get the previous record for when this test was run where all keys match but runname -;; returns #f if no such test found, returns a single test record if found -;; -;; Run this server-side -;; -(define (db:get-previous-test-run-record dbstruct run-id test-name item-path) - (let* ((db (db:get-db dbstruct #f)) ;; - (keys (db:get-keys db)) - (selstr (string-intersperse keys ",")) - (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")) - (keyvals #f)) - ;; first look up the key values from the run selected by run-id - (sqlite3:for-each-row - (lambda (a . b) - (set! keyvals (cons a b))) - db - (conc "SELECT " selstr " FROM runs WHERE id=? ORDER BY event_time DESC;") run-id) - (if (not keyvals) - #f - (let ((prev-run-ids '())) - (apply sqlite3:for-each-row - (lambda (id) - (set! prev-run-ids (cons id prev-run-ids))) - db - (conc "SELECT id FROM runs WHERE " qrystr " AND id != ?;") (append keyvals (list run-id))) - ;; for each run starting with the most recent look to see if there is a matching test - ;; if found then return that matching test record - (debug:print 4 "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids) - (if (null? prev-run-ids) #f - (let loop ((hed (car prev-run-ids)) - (tal (cdr prev-run-ids))) - (let ((results (db:get-tests-for-run db hed (conc test-name "/" item-path) '() '() #f #f #f #f #f #f))) - (debug:print 4 "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results) - (if (and (null? results) - (not (null? tal))) - (loop (car tal)(cdr tal)) - (if (null? results) #f - (car results)))))))))) - ;; get the previous records for when these tests were run where all keys match but runname ;; NB// Merge this with test:get-previous-test-run-records? This one looks for all matching tests ;; can use wildcards. Also can likely be factored in with get test paths? ;; ;; Run this remotely!! Index: dcommon.scm ================================================================== --- dcommon.scm +++ dcommon.scm @@ -358,24 +358,24 @@ (define (dcommon:general-info) (let ((general-matrix (iup:matrix #:alignment1 "ALEFT" #:expand "YES" ;; "HORIZONTAL" #:numcol 1 - #:numlin 3 + #:numlin 2 #:numcol-visible 1 - #:numlin-visible 3))) - (iup:attribute-set! general-matrix "WIDTH1" "200") + #:numlin-visible 2))) + (iup:attribute-set! general-matrix "WIDTH1" "150") (iup:attribute-set! general-matrix "0:1" "About this Megatest area") ;; User (this is not always obvious - it is common to run as a different user (iup:attribute-set! general-matrix "1:0" "User") (iup:attribute-set! general-matrix "1:1" (current-user-name)) ;; Megatest area - (iup:attribute-set! general-matrix "2:0" "Area") - (iup:attribute-set! general-matrix "2:1" *toppath*) + ;; (iup:attribute-set! general-matrix "2:0" "Area") + ;; (iup:attribute-set! general-matrix "2:1" *toppath*) ;; Megatest version - (iup:attribute-set! general-matrix "3:0" "Version") - (iup:attribute-set! general-matrix "3:1" megatest-version) + (iup:attribute-set! general-matrix "2:0" "Version") + (iup:attribute-set! general-matrix "2:1" (conc megatest-version "-" (substring megatest-fossil-hash 0 4))) general-matrix)) (define (dcommon:run-stats dbstruct) (let* ((stats-matrix (iup:matrix expand: "YES")) @@ -445,13 +445,13 @@ (let* ((colnum 0) (rownum 0) (servers-matrix (iup:matrix #:expand "YES" #:numcol 7 #:numcol-visible 7 - #:numlin-visible 3 + #:numlin-visible 5 )) - (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "InPort" "State" "Transport")) + (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) (updater (lambda () (let ((servers (open-run-close tasks:get-all-servers tasks:open-db))) (iup:attribute-set! servers-matrix "NUMLIN" (length servers)) ;; (set! colnum 0) ;; (for-each (lambda (colname) @@ -466,25 +466,27 @@ (let* ((vals (list (vector-ref server 0) ;; Id (vector-ref server 9) ;; MT-Ver (vector-ref server 1) ;; Pid (vector-ref server 2) ;; Hostname (conc (vector-ref server 3) ":" (vector-ref server 4)) ;; IP:Port - (vector-ref server 5) ;; Pubport + (seconds->hr-min-sec (- (current-seconds)(vector-ref server 6))) + ;; (vector-ref server 5) ;; Pubport ;; (vector-ref server 10) ;; Last beat ;; (vector-ref server 6) ;; Start time ;; (vector-ref server 7) ;; Priority ;; (vector-ref server 8) ;; State - (if (< (vector-ref server 10) 20) ;; Status (Please redo this properly!) - "alive" - "dead") - (vector-ref server 11) ;; Transport + (vector-ref server 8) ;; State + (vector-ref server 12) ;; RunId ))) (for-each (lambda (val) - ;; (print "rownum: " rownum " colnum: " colnum " val: " val) - (iup:attribute-set! servers-matrix (conc rownum ":" colnum) val) - (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum)) - (set! colnum (+ 1 colnum))) + (let* ((row-col (conc rownum ":" colnum)) + (curr-val (iup:attribute servers-matrix row-col))) + (if (not (equal? (conc val) curr-val)) + (begin + (iup:attribute-set! servers-matrix row-col val) + (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum)))) + (set! colnum (+ 1 colnum)))) vals) (set! rownum (+ rownum 1))) (iup:attribute-set! servers-matrix "REDRAW" "ALL")) servers))))) (set! colnum 0) @@ -493,39 +495,41 @@ (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum)) (set! colnum (+ colnum 1))) colnames) (set! dashboard:update-servers-table updater) ;; (iup:attribute-set! servers-matrix "WIDTHDEF" "40") - (iup:hbox - (iup:vbox - (iup:button "Start" - ;; #:size "50x" - #:expand "YES" - #:action (lambda (obj) - (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" - "megatest -server - &"))) - ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) - (system cmd)))) - (iup:button "Stop" - #:expand "YES" - ;; #:size "50x" - #:action (lambda (obj) - (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" - "megatest -stop-server 0 &"))) - ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) - (system cmd)))) - (iup:button "Restart" - #:expand "YES" - ;; #:size "50x" - #:action (lambda (obj) - (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" - "megatest -stop-server 0;megatest -server - &"))) - ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) - (system cmd))))) - servers-matrix - ))) - + ;; (iup:hbox + ;; (iup:vbox + ;; (iup:button "Start" + ;; ;; #:size "50x" + ;; #:expand "YES" + ;; #:action (lambda (obj) + ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" + ;; "megatest -server - &"))) + ;; ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) + ;; (system cmd)))) + ;; (iup:button "Stop" + ;; #:expand "YES" + ;; ;; #:size "50x" + ;; #:action (lambda (obj) + ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" + ;; "megatest -stop-server 0 &"))) + ;; ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) + ;; (system cmd)))) + ;; (iup:button "Restart" + ;; #:expand "YES" + ;; ;; #:size "50x" + ;; #:action (lambda (obj) + ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" + ;; "megatest -stop-server 0;megatest -server - &"))) + ;; ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) + ;; (system cmd))))) + ;; servers-matrix + ;; ))) + servers-matrix + )) + ;; The main menu (define (dcommon:main-menu) (iup:menu ;; a menu is a special attribute to a dialog (think Gnome putting the menu at screen top) (iup:menu-item "Files" (iup:menu ;; Note that you can use either #:action or action: for options (iup:menu-item "Open" action: (lambda (obj) Index: docs/manual/howto.txt ================================================================== --- docs/manual/howto.txt +++ docs/manual/howto.txt @@ -5,10 +5,33 @@ Tricks ------ This section is a compendium of a various useful tricks for debugging, configuring and generally getting the most out of Megatest. + +Limiting your running jobs +-------------------------- + +The following example will limit a test in the jobgroup "group1" to no more than 10 tests simultaneously. + +In your testconfig: + +---------------- +[test_meta] +jobgroup group1 +---------------- + +In your megatest.config: + +--------------- +[jobgroups] +group1 10 +custdes 4 +--------------- + + + Debugging Tricks ---------------- Examining The Environment @@ -34,5 +57,13 @@ ------------------- runscript main.csh ------------------- +Debugging Server Problems +~~~~~~~~~~~~~~~~~~~~~~~~~ + +---------------- +sudo lsof -i +sudo netstat -lptu +sudo netstat -tulpn +---------------- Index: docs/manual/megatest_manual.html ================================================================== --- docs/manual/megatest_manual.html +++ docs/manual/megatest_manual.html @@ -1,1227 +1,1196 @@ - - - - - -The Megatest Users Manual - - - - - -
-
-

Preface

-
-

This book is organised as three sub-books; getting started, writing tests and reference.

-
-

Why Megatest?

-

The Megatest project was started for two reasons, the first was an -immediate and pressing need for a generalized tool to manage a suite -of regression tests and the second was the fact that the author had -written or maintained several such tools at different companies over -the years and it seemed a good thing to have a single open source -tool, flexible enough to meet the needs of any team doing continuous -integrating and or running a complex suite of tests for release -qualification.

-
-
-

Megatest Design Philosophy

-

Megatest is intended to provide the minimum needed resources to make -writing a suite of tests and tasks for implementing continuous build -for software, design engineering or process control (via owlfs for -example) without being specialized for any specific problem -space. Megatest in of itself does not know what constitutes a PASS or -FAIL of a test. In most cases megatest is best used in conjunction -with logpro or a similar tool to parse, analyze and decide on the test -outcome.

-
-
-

Megatest Architecture

-

All data to specify the tests and configure the system is stored in -plain text files. All system state is stored in an sqlite3 -database. Tests are launched using the launching system available for -the distributed compute platform in use. A template script is provided -which can launch jobs on local and remote Linux hosts. Currently -megatest uses the network filesystem to call home to your master -sqlite3 database.

-
-
-
-

Road Map

-

Note 1: This road-map is tentative and subject to change without notice.

-

Note 2: Starting over. Old plan is commented out.

-
-

Current Items

-
-
-

ww05 - migrate to inmem-db

-

Keep as much the same as possible. Add internal reference to almost -eliminate contention on db(s).

-
    -
  1. -

    -Add internal reference db -

    -
  2. -
  3. -

    -Verify that actions are accessing correct db -

    -
      -
    1. -

      --runtests - inmem -

      -
    2. -
    3. -

      --list-runs - local (but not megatest.db) -

      -
    4. -
    5. -

      -dashboard - local (but not megatest.db) -

      -
    6. -
    -
  4. -
  5. -

    -Mirror db to /var/tmp… -

    -
  6. -
  7. -

    -Dashboard read db from per-run db. -

    -
  8. -
  9. -

    -Dashboard read db from /var/tmp -

    -
  10. -
  11. -

    -Runs register in tasks table in monitor.db -

    -
  12. -
  13. -

    -Server polls tasks table for next action (in addition?) -

    -
  14. -
  15. -

    -Change run loop to execute in server, triggered by call to polling of tasks table -

    -
  16. -
-
-
-
-

Getting Started

-
-
Getting started with Megatest
-
-

How to install Megatest and set it up for running your regressions and continuous integration process.

-
-
-

Installation

-
-
-

Dependencies

-

Chicken scheme and a number of "eggs" are required for building -Megatest. See the script installall.sch in the utils directory of the -distribution for a mostly automated way to install everything needed -for building Megatest on Linux.

-


[An example footnote.]

-

And now for something completely different: monkeys, lions and -tigers (Bengal and Siberian) using the alternative syntax index -entries. - - - -Note that multi-entry terms generate separate index entries.

-

Here are a couple of image examples: an -images/smallnew.png - -example inline image followed by an example block image:

-
-
-Tiger image -
-
Figure 1. Tiger block image
-
-

Followed by an example table:

-
- - --- - - - - - - - - - - - - - - - -
Table 1. An example table
Option Description

-a USER GROUP

Add USER to GROUP.

-R GROUP

Disables access to GROUP.

-
-
-
Example 1. An example example
-
-

Lorum ipum…

-
-
-
-

Sub-section with Anchor

-

Sub-section at level 2.

-
-

Chapter Sub-section

-

Sub-section at level 3.

-
-
Chapter Sub-section
-

Sub-section at level 4.

-

This is the maximum sub-section depth supported by the distributed -AsciiDoc configuration. -
[A second example footnote.]

-
-
-
-
-
-
-

The Second Chapter

-
-

An example link to anchor at start of the first sub-section.

-

An example link to a bibliography entry [taoup].

-
-
-

Writing Tests

-
-

The First Chapter of the Second Part

-
-

Chapters grouped into book parts are at level 1 and can contain -sub-sections.

-
-
-

How To Do Things

-
-

Tricks

-
-

This section is a compendium of a various useful tricks for debugging, -configuring and generally getting the most out of Megatest.

-
-
-
-

Debugging Tricks

-
-
-

Examining The Environment

-
-

During Config File Processing

-
-
-

Organising Your Tests and Tasks

-
-
-
[tests-paths]
-1 #{get misc parent}/simplerun/tests
-
-
-
-
[setup]
-
-

The runscript method is a brute force way to run scripts where the -user is responsible for setting STATE and STATUS

-
-
-
runscript main.csh
-
-
-
-
-
-

Reference

-
-

The First Chapter of the Second Part

-
-

Chapters grouped into book parts are at level 1 and can contain -sub-sections.

-
-
-
-

The testconfig File

-
-
-

Setup section

-
-

Header

-
-
-
[setup]
-
-

The runscript method is a brute force way to run scripts where the -user is responsible for setting STATE and STATUS

-
-
-
runscript main.csh
-
-
-
-
-

Requirements section

-
-

Header

-
-
-
[requirements]
-
-
-
-

Wait on Other Tests

-
-
-
# A normal waiton waits for the prior tests to be COMPLETED
-# and PASS, CHECK or WAIVED
-waiton test1 test2
-
-
-
-

Mode

-

The default (i.e. if mode is not specified) is normal. All pre-dependent tests -must be COMPLETED and PASS, CHECK or WAIVED before the test will start

-
-
-
mode   normal
-
-

The toplevel mode requires only that the prior tests are COMPLETED.

-
-
-
mode toplevel
-
-

A item based waiton will start items in a test when the -same-named item is COMPLETED and PASS, CHECK or WAIVED -in the prior test

-
-
-
mode itemmatch
-
-
-
-
# With a toplevel test you may wish to generate your list
-# of tests to run dynamically
-#
-# waiton #{shell get-valid-tests-to-run.sh}
-
-
-
-

Run time limit

-
-
-
runtimelim 1h 2m 3s  # this will automatically kill the test if it runs for more than 1h 2m and 3s
-
-
-
-

Skip

-
-
-

Header

-
-
-
[skip]
-
-
-
-

Skip on Still-running Tests

-
-
-
# NB// If the prevrunning line exists with *any* value the test will
-# automatically SKIP if the same-named test is currently RUNNING
-
-prevrunning x
-
-
-
-

Skip if a File Exists

-
-
-
fileexists /path/to/a/file # skip if /path/to/a/file exists
-
-
-
-

Controlled waiver propagation

-

If test is FAIL and previous test in run with same MT_TARGET is WAIVED then apply the following rules from the testconfig: -If a waiver check is specified in the testconfig apply the check and if it passes then set this FAIL to WAIVED

-

Waiver check has two parts, 1) a list of waiver, rulename, filepatterns and 2) the rulename script spec (note that "diff" and "logpro" are predefined)

-
-
-
###### EXAMPLE FROM testconfig #########
-# matching file(s) will be diff'd with previous run and logpro applied
-# if PASS or WARN result from logpro then WAIVER state is set
-#
-[waivers]
-# logpro_file    rulename      input_glob
-waiver_1         logpro        lookittmp.log
-
-[waiver_rules]
-
-# This builtin rule is the default if there is no <waivername>.logpro file
-# diff   diff %file1% %file2%
-
-# This builtin rule is applied if a <waivername>.logpro file exists
-# logpro diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html
-
-
-
-
-

Ezsteps

-

To transfer the environment to the next step you can do the following:

-
-
-
$MT_MEGATEST -env2file .ezsteps/${stepname}
-
-
-
-

Megatest Internals

-
-
-server.png -
-
-
-
-
-
-

Appendix A: Example Appendix

-
-

One or more optional appendixes go here at section level zero.

-
-

Appendix Sub-section

-
- - - -
-
Note
-
Preface and appendix subsections start out of sequence at level -2 (level 1 is skipped). This only applies to multi-part book -documents.
-
-
-
-
-
-

Example Bibliography

-
-

The bibliography list is a style of AsciiDoc bulleted list.

-
    -
  • -

    -[taoup] Eric Steven Raymond. The Art of Unix - Programming. Addison-Wesley. ISBN 0-13-142901-9. -

    -
  • -
  • -

    -[walsh-muellner] Norman Walsh & Leonard Muellner. - DocBook - The Definitive Guide. O’Reilly & Associates. 1999. - ISBN 1-56592-580-7. -

    -
  • -
-
-
-
-

Example Glossary

-
-

Glossaries are optional. Glossaries entries are an example of a style -of AsciiDoc labeled lists.

-
-
-A glossary term -
-
-

- The corresponding (indented) definition. -

-
-
-A second glossary term -
-
-

- The corresponding (indented) definition. -

-
-
-
-
-
-

Example Colophon

-
-

Text at the end of a book describing facts about its production.

-
-
-
-

Example Index

-
-
-
-
-

- - - + + + + + +The Megatest Users Manual + + + + + +
+
+

Preface

+
+

This book is organised as three sub-books; getting started, writing tests and reference.

+
+

Why Megatest?

+

The Megatest project was started for two reasons, the first was an +immediate and pressing need for a generalized tool to manage a suite +of regression tests and the second was the fact that the author had +written or maintained several such tools at different companies over +the years and it seemed a good thing to have a single open source +tool, flexible enough to meet the needs of any team doing continuous +integrating and or running a complex suite of tests for release +qualification.

+
+
+

Megatest Design Philosophy

+

Megatest is intended to provide the minimum needed resources to make +writing a suite of tests and tasks for implementing continuous build +for software, design engineering or process control (via owlfs for +example) without being specialized for any specific problem +space. Megatest in of itself does not know what constitutes a PASS or +FAIL of a test. In most cases megatest is best used in conjunction +with logpro or a similar tool to parse, analyze and decide on the test +outcome.

+
+
+

Megatest Architecture

+

All data to specify the tests and configure the system is stored in +plain text files. All system state is stored in an sqlite3 +database. Tests are launched using the launching system available for +the distributed compute platform in use. A template script is provided +which can launch jobs on local and remote Linux hosts. Currently +megatest uses the network filesystem to call home to your master +sqlite3 database.

+
+
+
+

Road Map

+

Note 1: This road-map is tentative and subject to change without notice.

+

Note 2: Starting over. Old plan is commented out.

+
+

Current Items

+
+
+

ww05 - migrate to inmem-db

+

Keep as much the same as possible. Add internal reference to almost +eliminate contention on db(s).

+
    +
  1. +

    +Add internal reference db +

    +
  2. +
  3. +

    +Verify that actions are accessing correct db +

    +
      +
    1. +

      +-runtests - inmem +

      +
    2. +
    3. +

      +-list-runs - local (but not megatest.db) +

      +
    4. +
    5. +

      +dashboard - local (but not megatest.db) +

      +
    6. +
    +
  4. +
  5. +

    +Mirror db to /var/tmp… +

    +
  6. +
  7. +

    +Dashboard read db from per-run db. +

    +
  8. +
  9. +

    +Dashboard read db from /var/tmp +

    +
  10. +
  11. +

    +Runs register in tasks table in monitor.db +

    +
  12. +
  13. +

    +Server polls tasks table for next action (in addition?) +

    +
  14. +
  15. +

    +Change run loop to execute in server, triggered by call to polling of tasks table +

    +
  16. +
+
+
+
+

Getting Started

+
+
Getting started with Megatest
+
+

How to install Megatest and set it up for running your regressions and continuous integration process.

+
+
+

Installation

+
+
+

Dependencies

+

Chicken scheme and a number of "eggs" are required for building +Megatest. See the script installall.sch in the utils directory of the +distribution for a mostly automated way to install everything needed +for building Megatest on Linux.

+


[An example footnote.]

+

And now for something completely different: monkeys, lions and +tigers (Bengal and Siberian) using the alternative syntax index +entries. + + + +Note that multi-entry terms generate separate index entries.

+

Here are a couple of image examples: an +images/smallnew.png + +example inline image followed by an example block image:

+
+
+Tiger image +
+
Figure 1. Tiger block image
+
+

Followed by an example table:

+
+ + +++ + + + + + + + + + + + + + + + +
Table 1. An example table
Option Description

-a USER GROUP

Add USER to GROUP.

-R GROUP

Disables access to GROUP.

+
+
+
Example 1. An example example
+
+

Lorum ipum…

+
+
+
+

Sub-section with Anchor

+

Sub-section at level 2.

+
+

Chapter Sub-section

+

Sub-section at level 3.

+
+
Chapter Sub-section
+

Sub-section at level 4.

+

This is the maximum sub-section depth supported by the distributed +AsciiDoc configuration. +
[A second example footnote.]

+
+
+
+
+
+
+

The Second Chapter

+
+

An example link to anchor at start of the first sub-section.

+

An example link to a bibliography entry [taoup].

+
+
+

Writing Tests

+
+

The First Chapter of the Second Part

+
+

Chapters grouped into book parts are at level 1 and can contain +sub-sections.

+
+
+

How To Do Things

+
+

Tricks

+
+

This section is a compendium of a various useful tricks for debugging, +configuring and generally getting the most out of Megatest.

+
+
+
+

Debugging Tricks

+
+
+

Examining The Environment

+
+

During Config File Processing

+
+
+

Organising Your Tests and Tasks

+
+
+
[tests-paths]
+1 #{get misc parent}/simplerun/tests
+
+
+
+
[setup]
+
+

The runscript method is a brute force way to run scripts where the +user is responsible for setting STATE and STATUS

+
+
+
runscript main.csh
+
+
+
+
+
+
+

Tricks

+
+

This section is a compendium of a various useful tricks for debugging, +configuring and generally getting the most out of Megatest.

+
+
+
+

Debugging Tricks

+
+
+

Examining The Environment

+
+

During Config File Processing

+
+
+

Organising Your Tests and Tasks

+

/nfs/ch/disks/ch_unienv_disk005/qa_mrwellan/interim/src/megatest/tests/fdktestqa/testqa

+
+
+
[tests-paths]
+1 #{get misc parent}/simplerun/tests
+
+
+
+
[setup]
+
+

The runscript method is a brute force way to run scripts where the +user is responsible for setting STATE and STATUS

+
+
+
runscript main.csh
+
+

ww30.2 +cellname/LVS/cellname.LAYOUT_ERRORS

+

Error: text open

+

ww31.3 +cellname/LVS/cellname.LAYOUT_ERRORS

+

Error: text open +Reference

+
+
+

Chapters grouped into book parts are at level 1 and can contain +sub-sections.

+
+
+
[setup]
+
+

The runscript method is a brute force way to run scripts where the +user is responsible for setting STATE and STATUS

+
+
+
runscript main.csh
+
+
+
+
[requirements]
+
+
+
+
# A normal waiton waits for the prior tests to be COMPLETED
+# and PASS, CHECK or WAIVED
+waiton test1 test2
+
+

The default (i.e. if mode is not specified) is normal. All pre-dependent tests +must be COMPLETED and PASS, CHECK or WAIVED before the test will start

+
+
+
mode   normal
+
+

The toplevel mode requires only that the prior tests are COMPLETED.

+
+
+
mode toplevel
+
+

A item based waiton will start items in a test when the +same-named item is COMPLETED and PASS, CHECK or WAIVED +in the prior test

+
+
+
mode itemmatch
+
+
+
+
# With a toplevel test you may wish to generate your list
+# of tests to run dynamically
+#
+# waiton #{shell get-valid-tests-to-run.sh}
+
+
+
+
runtimelim 1h 2m 3s  # this will automatically kill the test if it runs for more than 1h 2m and 3s
+
+
+
+
[skip]
+
+
+
+
# NB// If the prevrunning line exists with *any* value the test will
+# automatically SKIP if the same-named test is currently RUNNING
+
+prevrunning x
+
+
+
+
fileexists /path/to/a/file # skip if /path/to/a/file exists
+
+

If test is FAIL and previous test in run with same MT_TARGET is WAIVED then apply the following rules from the testconfig: +If a waiver check is specified in the testconfig apply the check and if it passes then set this FAIL to WAIVED

+

Waiver check has two parts, 1) a list of waiver, rulename, filepatterns and 2) the rulename script spec (note that "diff" and "logpro" are predefined)

+
+
+
###### EXAMPLE FROM testconfig #########
+# matching file(s) will be diff'd with previous run and logpro applied
+# if PASS or WARN result from logpro then WAIVER state is set
+#
+[waivers]
+# logpro_file    rulename      input_glob
+waiver_1         logpro        lookittmp.log
+
+[waiver_rules]
+
+# This builtin rule is the default if there is no <waivername>.logpro file
+# diff   diff %file1% %file2%
+
+# This builtin rule is applied if a <waivername>.logpro file exists
+# logpro diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html
+
+

To transfer the environment to the next step you can do the following:

+
+
+
$MT_MEGATEST -env2file .ezsteps/${stepname}
+
+
+

Megatest Internals

+
+
+server.png +
+
+
+

One or more optional appendixes go here at section level zero.

+
+ + + +
+
Note
+
Preface and appendix subsections start out of sequence at level +2 (level 1 is skipped). This only applies to multi-part book +documents.
+
+

The bibliography list is a style of AsciiDoc bulleted list.

+
    +
  • +

    +[taoup] Eric Steven Raymond. The Art of Unix + Programming. Addison-Wesley. ISBN 0-13-142901-9. +

    +
  • +
  • +

    +[walsh-muellner] Norman Walsh & Leonard Muellner. + DocBook - The Definitive Guide. O’Reilly & Associates. 1999. + ISBN 1-56592-580-7. +

    +
  • +
+

Glossaries are optional. Glossaries entries are an example of a style +of AsciiDoc labeled lists.

+
+
+A glossary term +
+
+

+ The corresponding (indented) definition. +

+
+
+A second glossary term +
+
+

+ The corresponding (indented) definition. +

+
+
+

Text at the end of a book describing facts about its production.

+
+
+
+
+
+
+

+ + + Index: docs/manual/server.png ================================================================== --- docs/manual/server.png +++ docs/manual/server.png cannot compute difference between binary files Index: example/cfg/machines.dat ================================================================== --- example/cfg/machines.dat +++ example/cfg/machines.dat @@ -1,16 +1,16 @@ [] [maxload] zeus 0.40000000000000002 xena 0.20000000000000001 -myth1 0.01 +myth2 0.01 hades 1 [minfree] zeus 1000 xena 20000 -myth1 300000 +myth2 300000 hades 4000000 [reqprocs] zeus mfsmount mythbackend mfschunkserver xena mfsmount -myth1 mfsmount mythfrontend mfschunkserver +myth2 mfsmount mythfrontend mfschunkserver hades mfsmount mfsmetalogger mfschunkserver Index: example/cfg/sxml/_sheets.sxml ================================================================== --- example/cfg/sxml/_sheets.sxml +++ example/cfg/sxml/_sheets.sxml @@ -26,12 +26,13 @@ (http://www.gnumeric.org/v10.dtd:name "WorkbookView::is_protected") (http://www.gnumeric.org/v10.dtd:value "FALSE"))) (urn:oasis:names:tc:opendocument:xmlns:office:1.0:document-meta (@ (urn:oasis:names:tc:opendocument:xmlns:office:1.0:version "1.2")) (urn:oasis:names:tc:opendocument:xmlns:office:1.0:meta + (http://purl.org/dc/elements/1.1/:date "2014-02-14T06:16:26Z") (urn:oasis:names:tc:opendocument:xmlns:meta:1.0:creation-date - "2014-02-14T06:12:52Z"))) + "2014-02-14T06:16:17Z"))) (http://www.gnumeric.org/v10.dtd:Calculation (@ (MaxIterations "100") (ManualRecalc "0") (IterationTolerance "0.001") (FloatRadix "2") Index: example/cfg/sxml/machines.sxml ================================================================== --- example/cfg/sxml/machines.sxml +++ example/cfg/sxml/machines.sxml @@ -88,13 +88,13 @@ (http://www.gnumeric.org/v10.dtd:Rows (@ (DefaultSizePts "12.75")) (http://www.gnumeric.org/v10.dtd:RowInfo (@ (Unit "13.5") (No "0") (Count "5")))) (http://www.gnumeric.org/v10.dtd:Selections - (@ (CursorRow "3") (CursorCol "0")) + (@ (CursorRow "4") (CursorCol "0")) (http://www.gnumeric.org/v10.dtd:Selection - (@ (startRow "3") (startCol "0") (endRow "3") (endCol "0")))) + (@ (startRow "4") (startCol "0") (endRow "4") (endCol "0")))) (http://www.gnumeric.org/v10.dtd:SheetLayout (@ (TopLeft "A1"))) (http://www.gnumeric.org/v10.dtd:Solver (@ (ProgramR "0") (ProblemType "0") (NonNeg "1") DELETED fs-transport.scm Index: fs-transport.scm ================================================================== --- fs-transport.scm +++ /dev/null @@ -1,44 +0,0 @@ - -;; Copyright 2006-2012, Matthew Welland. -;; -;; This program is made available under the GNU GPL version 2.0 or -;; greater. See the accompanying file COPYING for details. -;; -;; This program is distributed WITHOUT ANY WARRANTY; without even the -;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR -;; PURPOSE. - -(require-extension (srfi 18) extras tcp s11n) - -(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest) -(import (prefix sqlite3 sqlite3:)) - -(use spiffy uri-common intarweb http-client spiffy-request-vars) - -(tcp-buffer-size 2048) - -(declare (unit fs-transport)) - -(declare (uses common)) -(declare (uses db)) -(declare (uses tests)) -(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. - -(include "common_records.scm") -(include "db_records.scm") - - -;;====================================================================== -;; F S T R A N S P O R T S E R V E R -;;====================================================================== - -;; There is no "server" per se but a convience routine to make it non -;; necessary to be reopening the db over and over again. -;; - -(define (fs:process-queue-item packet) - (if (not *megatest-db*) ;; we will require that (setup-for-run) has already been called - (set! *megatest-db* (open-db))) - (debug:print-info 11 "fs:process-queue-item called with packet=" packet) - (db:process-queue-item *megatest-db* packet)) - Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -68,11 +68,11 @@ ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") (server:get-best-guess-address hostname) #f))) (if ipstr ipstr hostn))) ;; hostname))) (start-port (open-run-close tasks:server-get-next-port tasks:open-db)) - (link-tree-path (config-lookup *configdat* "setup" "linktree"))) + (link-tree-path (configf:lookup *configdat* "setup" "linktree"))) (set! db *inmemdb*) (root-path (if link-tree-path link-tree-path (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP! (handle-directory spiffy-directory-listing) @@ -127,15 +127,15 @@ ((equal? (uri-path (request-uri (current-request))) '(/ "hey")) (send-response body: "hey there!\n" headers: '((content-type text/plain)))) (else (continue)))))))) - (http-transport:try-start-server ipaddrstr start-port server-id))) + (http-transport:try-start-server run-id ipaddrstr start-port server-id))) ;; This is recursively run by http-transport:run until sucessful ;; -(define (http-transport:try-start-server ipaddrstr portnum server-id) +(define (http-transport:try-start-server run-id ipaddrstr portnum server-id) (handle-exceptions exn (begin (print-error-message exn) (if (< portnum 9000) @@ -143,23 +143,26 @@ (debug:print 0 "WARNING: failed to start on portnum: " portnum ", trying next port") (thread-sleep! 0.1) ;; get_next_port goes here - (http-transport:try-start-server ipaddrstr (+ portnum 1) server-id)) - (print "ERROR: Tried and tried but could not start the server"))) + (http-transport:try-start-server run-id ipaddrstr (+ portnum 1) server-id)) + (begin + (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id ipaddrstr portnum " http-transport:try-start-server") + (print "ERROR: Tried and tried but could not start the server")))) ;; any error in following steps will result in a retry (set! *server-info* (list ipaddrstr portnum)) (open-run-close tasks:server-set-interface-port tasks:open-db server-id ipaddrstr portnum) (debug:print 1 "INFO: Trying to start server on " ipaddrstr ":" portnum) ;; This starts the spiffy server ;; NEED WAY TO SET IP TO #f TO BIND ALL - (start-server bind-address: ipaddrstr port: portnum) - (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum) + ;; (start-server bind-address: ipaddrstr port: portnum) + (start-server port: portnum) + (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id ipaddrstr portnum " http-transport:try-start-server") (debug:print 1 "INFO: server has been stopped"))) ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== @@ -219,18 +222,23 @@ ;; Send "cmd" with json payload "params" to serverdat and receive result ;; (define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 30)) (let* ((fullurl (if (list? serverdat) - (cadddr serverdat) ;; this is the uri for /api + (list-ref serverdat 4) ;; (cadddr serverdat) ;; this is the uri for /api (begin (debug:print 0 "FATAL ERROR: http-transport:client-api-send-receive called with no server info") (exit 1)))) (res #f)) (handle-exceptions exn - #f + (if (> numretries 0) + (begin + (mutex-unlock! *http-mutex*) + (thread-sleep! 10) + (http-transport:client-api-send-receive run-id serverdat cmd params (- numretries 1))) + #f) (begin (debug:print-info 11 "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n") ;; set up the http-client here (max-retry-attempts 5) ;; consider all requests indempotent @@ -264,25 +272,29 @@ res))))) ;; ;; connect ;; -(define (http-transport:client-connect run-id iface port) - (let* ((uri-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl")))) - (uri-api-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/api")))) - (server-dat (list iface port uri-dat uri-api-dat)) - (login-res (rmt:login-no-auto-client-setup server-dat run-id))) - ;; (hash-table-set! *runremote* run-id serverdat) ;; may or may not be good ... - (if (and (list? login-res) - (car login-res)) - (begin - (debug:print-info 2 "Logged in and connected to " iface ":" port) +(define (http-transport:client-connect iface port) + (let* ((api-url (conc "http://" iface ":" port "/api")) + (uri-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl")))) + ;; (uri-dat (make-request method: 'GET uri: (uri-reference (conc "http://" iface ":" port "/ctrl")))) + (uri-api-dat (make-request method: 'POST uri: api-url)) ;; (uri-reference (conc "http://" iface ":" port "/api")))) + ;; (uri-api-dat (make-request method: 'GET uri: (uri-reference (conc "http://" iface ":" port "/api")))) + (server-dat (list iface port uri-dat uri-api-dat api-url))) +;; (login-res (server:ping-server run-id server-dat))) ;; login-no-auto-client-setup server-dat run-id))) + server-dat)) +;; (if (and (list? login-res) (hash-table-set! *runremote* run-id server-dat) server-dat) - (begin - (debug:print-info 0 "ERROR: Failed to login or connect to " iface ":" port) - #f)))) +;; (hash-table-set! *runremote* run-id server-dat) +;; (debug:print-info 2 "Logged in and connected to " iface ":" port) +;; (hash-table-set! *runremote* run-id server-dat) +;; server-dat) +;; (begin +;; (debug:print-info 0 "ERROR: Failed to login or connect to " iface ":" port) +;; #f)))) ;; run http-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; (define (http-transport:keep-running server-id) @@ -307,39 +319,43 @@ sdat)))))) (iface (car server-info)) (port (cadr server-info)) (last-access 0) (tdb (tasks:open-db)) - (server-timeout (let ((tmo (config-lookup *configdat* "server" "timeout"))) + (server-timeout (let ((tmo (configf:lookup *configdat* "server" "timeout"))) (if (and (string? tmo) (string->number tmo)) (* 60 60 (string->number tmo)) ;; (* 3 24 60 60) ;; default to three days - (* 60 60) ;; default to one hour + ;; (* 60 1) ;; default to one minute + (* 60 60 25) ;; default to 25 hours )))) - ;; - ;; set_running - ;; - (tasks:server-set-state! tdb server-id "running") - (let loop ((count 0)) + (let loop ((count 0) + (server-state 'available)) ;; Use this opportunity to sync the inmemdb to db (let ((start-time (current-milliseconds)) (sync-time #f) (rem-time #f)) + (if *inmemdb* (db:sync-touched *inmemdb* force-sync: #t)) (set! sync-time (- (current-milliseconds) start-time)) (set! rem-time (quotient (- 4000 sync-time) 1000)) (debug:print 0 "SYNC: time= " sync-time ", rem-time=" rem-time) - (if (and (<= rem-time 4) - (> rem-time 0)) - (thread-sleep! rem-time) - (thread-sleep! 4))) ;; fallback for if the math is changed ... + + ;; + ;; set_running after our first pass through + ;; + (if (eq? server-state 'available) + (tasks:server-set-state! tdb server-id "running")) - ;; (thread-sleep! 4) ;; no need to do this very often - + (if (and (<= rem-time 4) + (> rem-time 0)) + (thread-sleep! rem-time) + (thread-sleep! 4))) ;; fallback for if the math is changed ... + (if (< count 1) ;; 3x3 = 9 secs aprox - (loop (+ count 1))) + (loop (+ count 1) 'running)) ;; Check that iface and port have not changed (can happen if server port collides) (mutex-lock! *heartbeat-mutex*) (set! sdat *server-info*) (mutex-unlock! *heartbeat-mutex*) @@ -363,11 +379,18 @@ (if (and *server-run* (> (+ last-access server-timeout) (current-seconds))) (begin (debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) - (loop 0)) + ;; + ;; Consider implementing some smarts here to re-insert the record or kill self is + ;; the db indicates so + ;; + ;; (if (tasks:server-am-i-the-server? tdb run-id) + ;; (tasks:server-set-state! tdb server-id "running")) + ;; + (loop 0 server-state)) (begin (debug:print-info 0 "Starting to shutdown the server.") ;; need to delete only *my* server entry (future use) (set! *time-to-exit* #t) (if *inmemdb* (db:sync-touched *inmemdb* force-sync: #t)) @@ -390,11 +413,11 @@ "n/a (no queries)" (/ *total-non-write-delay* *number-non-write-queries*)) " ms") (debug:print-info 0 "Server shutdown complete. Exiting") - (tasks:server-delete-record! tdb server-id) + (tasks:server-delete-record tdb server-id " http-transport:keep-running") (exit)))))) ;; all routes though here end in exit ... ;; ;; start_server? @@ -405,16 +428,23 @@ (daemon:ize)) (if (server:check-if-running run-id) (begin (debug:print 0 "INFO: Server for run-id " run-id " already running") (exit 0))) - (let ((server-id (open-run-close tasks:server-lock-slot tasks:open-db run-id))) + (let loop ((server-id (open-run-close tasks:server-lock-slot tasks:open-db run-id)) + (remtries 4)) (if (not server-id) - (begin - ;; since we didn't get the server lock we are going to clean up and bail out - (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") - (open-run-close tasks:server-delete-records-for-this-pid tasks:open-db)) + (if (> remtries 0) + (begin + (thread-sleep! 2) + (loop (open-run-close tasks:server-lock-slot tasks:open-db run-id) + (- remtries 1))) + (begin + ;; since we didn't get the server lock we are going to clean up and bail out + (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") + (open-run-close tasks:server-delete-records-for-this-pid tasks:open-db " http-transport:launch") + )) (let* ((th2 (make-thread (lambda () (http-transport:run (if (args:get-arg "-server") (args:get-arg "-server") "-") Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -90,14 +90,10 @@ fulln runscript))))) ;; assume it is on the path (rollup-status 0)) (change-directory top-path) (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name)) - ;; Setup the *runremote* global var - (if *runremote* (debug:print 2 "ERROR: I'm not expecting *runremote* to be set at this time")) - ;; (set! *runremote* runremote) - ;; (set! *transport-type* (string->symbol transport)) (set! keys (rmt:get-keys)) (set! keyvals (keys:target->keyval keys target)) ;; apply pre-overrides before other variables. The pre-override vars must not ;; clobbers things from the official sources such as megatest.config and runconfigs.config (if (string? set-vars) @@ -318,11 +314,12 @@ (begin (debug:print-info 0 "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim) #t) #f))))) ;; open-run-close not needed for test-set-meta-info - (tests:set-partial-meta-info test-id run-id minutes work-area) + (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f) + ;; (tests:set-partial-meta-info test-id run-id minutes work-area) (if kill-job? (begin (mutex-lock! m) ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this ;; section and the runit section? Or add a loop that tries three times with a 1/4 second @@ -369,11 +366,11 @@ (thread-join! th1) (thread-sleep! 1) ;; give thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec. (mutex-lock! m) (let* ((item-path (item-list->path itemdat)) ;; only state and status needed - use lazy routine - (testinfo (rmt:get-testinfo-state-status run-id test-id))) ;;;(cdb:get-test-info-by-id *runremote* test-id))) ;; )) ;; run-id test-name item-path))) + (testinfo (rmt:get-testinfo-state-status run-id test-id))) ;; Am I completed? (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED")) (let ((new-state (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status ;; "COMPLETED" ;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test @@ -394,14 +391,10 @@ new-state new-status (args:get-arg "-m") #f) ;; need to update the top test record if PASS or FAIL and this is a subtest ;; NO NEED TO CALL roll-up-pass-fail-counts HERE, THIS IS DONE IN roll-up-pass-fail-counts called by tests:test-set-status! - ;; (if (not (equal? item-path "")) - ;; (begin - ;; (thread-sleep! 0.1) ;; give other processes an opportunity to access the db as rollup is lower priority - ;; (cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path new-status))) )) ;; for automated creation of the rollup html file this is a good place... (if (not (equal? item-path "")) (tests:summarize-items run-id test-id test-name #f))) ;; don't force - just update if no (mutex-unlock! m) @@ -500,11 +493,10 @@ (lnkbase (conc linktree "/" target "/" runname)) (lnkpath (conc lnkbase "/" testname)) (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path))) ;; Update the rundir path in the test record for all - ;; (cdb:test-set-rundir-by-test-id *runremote* test-id (filedb:register-path *fdb* lnkpathf)) (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path) (debug:print 2 "INFO:\n lnkbase=" lnkbase "\n lnkpath=" lnkpath "\n toptest-path=" toptest-path "\n test-path=" test-path) (if (not (file-exists? linktree)) (begin @@ -564,11 +556,10 @@ ;; (rmt:sdb-qry 'getstr (db:test-get-rundir testinfo) ;; ) ;; ) #f))) (hash-table-set! *toptest-paths* testname curr-test-path) ;; NB// Was this for the test or for the parent in an iterated test? - ;;(cdb:test-set-rundir! *runremote* run-id testname "" (filedb:register-path *fdb* lnkpath)) ;; toptest-path) (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath (if (file-exists? lnkpath) (resolve-pathname lnkpath) lnkpath) testname "") @@ -700,11 +691,10 @@ (debug:print 0 "WARNING: No disk work area specified - running in the test directory under tmp_run"))) (set! cmdparms (base64:base64-encode (with-output-to-string (lambda () ;; (list 'hosts hosts) (write (list (list 'testpath test-path) - ;; (list 'runremote *runremote*) (list 'transport (conc *transport-type*)) (list 'serverinf *server-info*) (list 'toppath *toppath*) (list 'work-area work-area) (list 'test-name test-name) Index: lock-queue.scm ================================================================== --- lock-queue.scm +++ lock-queue.scm @@ -48,24 +48,32 @@ CONSTRAINT runlock_constraint UNIQUE (run_lock));"))) (sqlite3:set-busy-handler! db handler) db)) (define (lock-queue:set-state db test-id newstate) - (sqlite3:execute db "UPDATE queue SET state=? WHERE test_id=?;" - newstate - test-id)) + (handle-exceptions + exn + (thread-sleep! 30) + (lock-queue:set-state db test-id newstate) + (sqlite3:execute db "UPDATE queue SET state=? WHERE test_id=?;" + newstate + test-id))) (define (lock-queue:any-younger? db mystart test-id) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (tid) - ;; Actually this should not be needed as mystart cannot be simultaneously less than and test-id same as - (if (not (equal? tid test-id)) - (set! res tid))) - db - "SELECT test_id FROM queue WHERE start_time > ?;" mystart) - res)) + (handle-exceptions + exn + (thread-sleep! 30) + (lock-queue:any-younger? db mystart test-id) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (tid) + ;; Actually this should not be needed as mystart cannot be simultaneously less than and test-id same as + (if (not (equal? tid test-id)) + (set! res tid))) + db + "SELECT test_id FROM queue WHERE start_time > ?;" mystart) + res))) (define (lock-queue:get-lock db test-id) (let ((res #f) (lckqry (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';")) (mklckqry (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');"))) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -8,11 +8,11 @@ ;; PURPOSE. ;; (include "common.scm") ;; (include "megatest-version.scm") -(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 format readline apropos json http-client) ;; (srfi 18) extras) +(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 format readline apropos json http-client directory-utils) ;; (srfi 18) extras) (import (prefix sqlite3 sqlite3:)) (import (prefix base64 base64:)) ;; (use zmq) @@ -44,10 +44,13 @@ (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc"))) (if (file-exists? debugcontrolf) (load debugcontrolf))) +;; Disabled help items +;; -rollup : (currently disabled) fill run (set by :runname) with latest test(s) +;; from prior runs with same keys (define help (conc " Megatest, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version " license GPL, Copyright Matt Welland 2006-2012 @@ -62,14 +65,14 @@ -runtests tst1,tst2 ... : run tests -remove-runs : remove the data for a run, requires :runname and -testpatt Optionally use :state and :status -set-state-status X,Y : set state to X and status to Y, requires controls per -remove-runs -rerun FAIL,WARN... : force re-run for tests with specificed status(s) - -rollup : (currently disabled) fill run (set by :runname) with latest test(s) - from prior runs with same keys -lock : lock run specified by target and runname -unlock : unlock run specified by target and runname + -set-run-status status : sets status for run to status, requires -target and :runname + -get-run-status : gets status for run specified by target and runname -run-wait : wait on run specified by target and runname Selectors (e.g. use for -runtests, -remove-runs, -set-state-status, -list-runs etc.) -target key1/key2/... : run for key1, key2, etc. -reqtarg key1/key2/... : run for key1, key2, etc. but key1/key2 must be in runconfig @@ -113,10 +116,11 @@ -show-runconfig : dump the internal representation of the runconfigs.config file -dumpmode json : dump in json format instead of sexpr -show-cmdinfo : dump the command info for a test (run in test environment) Misc + -start-dir path : switch to this directory before running megatest -rebuild-db : bring the database schema up to date -cleanup-db : remove any orphan records, vacuum the db -update-meta : update the tests metadata for all tests -env2file fname : write the environment to fname.csh and fname.sh -setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are @@ -128,10 +132,11 @@ -stop-server id : stop server specified by id (see output of -list-servers), use 0 to kill all -repl : start a repl (useful for extending megatest) -load file.scm : load and run file.scm -mark-incompletes : find and mark incomplete tests + -ping run-id|host:port : ping server, exit with 0 if found Spreadsheet generation -extract-ods fname.ods : extract an open document spreadsheet from the database -pathmod path : insert path, i.e. path/runame/itempath/logfile.html will clear the field if no rundir/testname/itempath/logfile @@ -186,25 +191,28 @@ ":value" ":expected" ":tol" ":units" ;; misc + "-start-dir" "-server" "-stop-server" "-port" "-extract-ods" "-pathmod" "-env2file" "-setvars" "-set-state-status" + "-set-run-status" "-debug" ;; for *verbosity* > 2 "-gen-megatest-test" "-override-timeout" "-test-files" ;; -test-paths is for listing all "-load" ;; load and exectute a scheme file "-dumpmode" "-run-id" + "-ping" ) (list "-h" "-version" "-force" "-xterm" @@ -229,10 +237,12 @@ "-list-targets" "-list-db-targets" "-show-runconfig" "-show-config" "-show-cmdinfo" + "-get-run-status" + ;; queries "-test-paths" ;; get path(s) to a test, ordered by youngest first "-runall" ;; run all tests "-remove-runs" @@ -256,10 +266,17 @@ (if (args:get-arg "-h") (begin (print help) (exit))) + +(if (args:get-arg "-start-dir") + (if (file-exists? (args:get-arg "-start-dir")) + (change-directory (args:get-arg "-start-dir")) + (begin + (debug:print 0 "ERROR: non-existant start dir " (args:get-arg "-start-dir") " specified, exiting.") + (exit 1)))) (if (args:get-arg "-version") (begin (print megatest-version) (exit))) @@ -325,10 +342,38 @@ x " => ")) (common:get-disks) ) "\n")) (set! *didsomething* #t))) + +(if (args:get-arg "-ping") + (let* ((run-id (string->number (args:get-arg "-run-id"))) + (host-port (let ((slst (string-split (args:get-arg "-ping") ":"))) + (if (eq? (length slst) 2) + (list (car slst)(string->number (cadr slst))) + #f))) + (toppath (setup-for-run))) + (if (not run-id) + (begin + (debug:print 0 "ERROR: must specify run-id when doing ping, -run-id n") + (print "ERROR: No run-id") + (exit 1)) + (if (not host-port) + (begin + (debug:print 0 "ERROR: argument to -ping is host:port, got " (args:get-arg "-ping")) + (print "ERROR: bad host:port") + (exit 1)) + (let* ((server-dat (http-transport:client-connect (car host-port)(cadr host-port))) + (login-res (rmt:login-no-auto-client-setup server-dat run-id))) + (if (and (list? login-res) + (car login-res)) + (begin + (print "LOGIN_OK") + (exit 0)) + (begin + (print "LOGIN_FAILED") + (exit 1)))))))) ;;====================================================================== ;; Start the server - can be done in conjunction with -runall or -runtests (one day...) ;; we start the server if not running else start the client thread ;;====================================================================== @@ -453,33 +498,38 @@ (read-config "runconfigs.config" #f #t sections: sections)))) data)) (if (args:get-arg "-show-runconfig") - (let ((data (full-runconfigs-read))) - ;; keep this one local - (cond - ((not (args:get-arg "-dumpmode")) - (pp (hash-table->alist data))) - ((string=? (args:get-arg "-dumpmode") "json") + (let ((tl (setup-for-run))) + (push-directory *toppath*) + (let ((data (full-runconfigs-read))) + ;; keep this one local + (cond + ((not (args:get-arg "-dumpmode")) + (pp (hash-table->alist data))) + ((string=? (args:get-arg "-dumpmode") "json") (json-write data)) - (else - (debug:print 0 "ERROR: -dumpmode of " (args:get-arg "-dumpmode") " not recognised"))) - (set! *didsomething* #t))) + (else + (debug:print 0 "ERROR: -dumpmode of " (args:get-arg "-dumpmode") " not recognised"))) + (set! *didsomething* #t)) + (pop-directory))) (if (args:get-arg "-show-config") (let ((tl (setup-for-run)) (data *configdat*)) ;; (read-config "megatest.config" #f #t))) + (push-directory *toppath*) ;; keep this one local (cond ((not (args:get-arg "-dumpmode")) (pp (hash-table->alist data))) ((string=? (args:get-arg "-dumpmode") "json") (json-write data)) (else (debug:print 0 "ERROR: -dumpmode of " (args:get-arg "-dumpmode") " not recognised"))) - (set! *didsomething* #t))) + (set! *didsomething* #t) + (pop-directory))) (if (args:get-arg "-show-cmdinfo") (if (getenv "MT_CMDINFO") (let ((data (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO")))))) (if (equal? (args:get-arg "-dumpmode") "json") @@ -534,10 +584,31 @@ (general-run-call "-set-state-status" "set state and status" (lambda (target runname keys keyvals) (operate-on 'set-state-status)))) + +(if (or (args:get-arg "-set-run-status") + (args:get-arg "-get-run-status")) + (general-run-call + "-set-run-status" + "set run status" + (lambda (target runname keys keyvals) + (let* ((runsdat (cdb:remote-run db:get-runs-by-patt #f keys runname (or (args:get-arg "-target") + (args:get-arg "-reqtarg")) #f #f)) + (header (vector-ref runsdat 0)) + (rows (vector-ref runsdat 1))) + (if (null? rows) + (begin + (debug:print-info 0 "No matching run found.") + (exit 1)) + (let* ((row (car (vector-ref runsdat 1))) + (run-id (db:get-value-by-header row header "id"))) + (if (args:get-arg "-set-run-status") + (cdb:remote-run db:set-run-status #f run-id (args:get-arg "-set-run-status") msg: (args:get-arg "-m")) + (print (open-run-close db:get-run-status #f run-id)) + ))))))) ;;====================================================================== ;; Query runs ;;====================================================================== @@ -738,11 +809,10 @@ (state (args:get-arg ":state")) (status (args:get-arg ":status")) (target (args:get-arg "-target")) (toppath (assoc/default 'toppath cmdinfo))) (change-directory toppath) - ;; (set! *runremote* runremote) (if (not target) (begin (debug:print 0 "ERROR: -target is required.") (exit 1))) (if (not (setup-for-run)) @@ -786,11 +856,10 @@ (itemdat (assoc/default 'itemdat cmdinfo)) (state (args:get-arg ":state")) (status (args:get-arg ":status")) (target (args:get-arg "-target"))) (change-directory testpath) - ;; (set! *runremote* runremote) (if (not target) (begin (debug:print 0 "ERROR: -target is required.") (exit 1))) (if (not (setup-for-run)) @@ -866,11 +935,10 @@ (test-id (assoc/default 'test-id cmdinfo)) (itemdat (assoc/default 'itemdat cmdinfo)) (work-area (assoc/default 'work-area cmdinfo)) (db #f)) (change-directory testpath) - ;; (set! *runremote* runremote) (if (not (setup-for-run)) (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) (if (and state status) @@ -915,11 +983,10 @@ (itemdat (assoc/default 'itemdat cmdinfo)) (work-area (assoc/default 'work-area cmdinfo)) (db #f) ;; (open-db)) (state (args:get-arg ":state")) (status (args:get-arg ":status"))) - ;; (set! *runremote* runremote) (if (not (setup-for-run)) (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) @@ -932,18 +999,17 @@ ;; has sub commands that are rdb: ;; DO NOT put this one into either cdb:remote-run or open-run-close (tdb:load-test-data run-id test-id)) (if (args:get-arg "-setlog") (let ((logfname (args:get-arg "-setlog"))) - ;; (cdb:test-set-log! *runremote* test-id (sdb:qry 'getid logfname)))) (rmt:test-set-log! run-id test-id logfname))) (if (args:get-arg "-set-toplog") ;; DO NOT run remote (tests:test-set-toplog! run-id test-name (args:get-arg "-set-toplog"))) (if (args:get-arg "-summarize-items") ;; DO NOT run remote - (tests:summarize-items db run-id test-id test-name #t)) ;; do force here + (tests:summarize-items run-id test-id test-name #t)) ;; do force here (if (args:get-arg "-runstep") (if (null? remargs) (begin (debug:print 0 "ERROR: nothing specified to run!") (if db (sqlite3:finalize! db)) @@ -978,11 +1044,10 @@ (debug:print-info 2 "running \"" cmd "\"") (change-directory startingdir) (set! exitstat (system cmd)) (set! *globalexitstatus* exitstat) ;; no necessary (change-directory testpath) - ;; (cdb:test-set-log! *runremote* test-id (sdb:qry 'getid htmllogfile)))) (rmt:test-set-log! run-id test-id htmllogfile))) (let ((msg (args:get-arg "-m"))) (rmt:teststep-set-status! run-id test-id stepname "end" exitstat msg logfile)) ))) (if (or (args:get-arg "-test-status") @@ -1165,11 +1230,11 @@ (if (args:get-arg "-import-megatest.db") (let* ((toppath (setup-for-run)) (dbstruct (if toppath (make-dbr:dbstruct path: toppath) #f)) (mtdb (if toppath (db:open-megatest-db))) - (run-ids (if toppath (db:get-run-ids mtdb)))) + (run-ids (if toppath (db:get-all-run-ids mtdb)))) ;; sync runs, test_meta etc. (db:sync-tables (db:sync-main-list mtdb) mtdb (db:get-db dbstruct #f)) (for-each (lambda (run-id) (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id))) @@ -1185,23 +1250,13 @@ ;; Exit and clean up ;;====================================================================== (if *runremote* (close-all-connections!)) -;; this is the socket if we are a client -;; (if (and *runremote* -;; (socket? *runremote*)) -;; (close-socket *runremote*)) - -;; (if sdb:qry (sdb:qry 'finalize #f)) -;; (if *fdb* (filedb:finalize-db! *fdb*)) - (if (not *didsomething*) (debug:print 0 help)) -;; (if *runremote* (rpc:close-all-connections!)) - (if (not (eq? *globalexitstatus* 0)) (if (or (args:get-arg "-runtests")(args:get-arg "-runall")) (begin (debug:print 0 "NOTE: Subprocesses with non-zero exit code detected: " *globalexitstatus*) (exit 0)) ADDED minimal/megatest.config Index: minimal/megatest.config ================================================================== --- /dev/null +++ minimal/megatest.config @@ -0,0 +1,12 @@ +[fields] +RUNTYPE text + +[setup] +linktree #{getenv PWD}/linktree +max_concurrent_jobs 20 + +[jobtools] +launcher nbfake + +[disks] +disk0 #{getenv PWD}/runs ADDED minimal/runconfigs.config Index: minimal/runconfigs.config ================================================================== --- /dev/null +++ minimal/runconfigs.config @@ -0,0 +1,3 @@ +[default] +EXAMPLEVAR 1 + ADDED minimal/tests/tmpspace/testconfig Index: minimal/tests/tmpspace/testconfig ================================================================== --- /dev/null +++ minimal/tests/tmpspace/testconfig @@ -0,0 +1,27 @@ +[ezsteps] + +df [ `df -m /tmp | grep /tmp | awk '{print $3}'` -gt 200000 ] + +[items] +TARGETHOST chlr10722 \ + chlr15003 \ + chlr13406 \ + chlr12539 \ + chlr12713 \ + chlr11407 \ + chlr14713 \ + chlr11440 \ + chlr11417 \ + chlr14709 \ + chlr11722 \ + chlr11445 \ + chlr11421 \ + chlr11404 + +[test_meta] +author mrwellan +owner mrwellan +description Check for available space in /tmp +tags Utility +reviewed ww50.3 + ADDED oldsrc/fs-transport.scm Index: oldsrc/fs-transport.scm ================================================================== --- /dev/null +++ oldsrc/fs-transport.scm @@ -0,0 +1,44 @@ + +;; Copyright 2006-2012, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. + +(require-extension (srfi 18) extras tcp s11n) + +(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest) +(import (prefix sqlite3 sqlite3:)) + +(use spiffy uri-common intarweb http-client spiffy-request-vars) + +(tcp-buffer-size 2048) + +(declare (unit fs-transport)) + +(declare (uses common)) +(declare (uses db)) +(declare (uses tests)) +(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. + +(include "common_records.scm") +(include "db_records.scm") + + +;;====================================================================== +;; F S T R A N S P O R T S E R V E R +;;====================================================================== + +;; There is no "server" per se but a convience routine to make it non +;; necessary to be reopening the db over and over again. +;; + +(define (fs:process-queue-item packet) + (if (not *megatest-db*) ;; we will require that (setup-for-run) has already been called + (set! *megatest-db* (open-db))) + (debug:print-info 11 "fs:process-queue-item called with packet=" packet) + (db:process-queue-item *megatest-db* packet)) + ADDED oldsrc/zmq-transport.scm Index: oldsrc/zmq-transport.scm ================================================================== --- /dev/null +++ oldsrc/zmq-transport.scm @@ -0,0 +1,494 @@ +;;====================================================================== +;; Copyright 2006-2012, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. +;;====================================================================== + +(require-extension (srfi 18) extras tcp s11n) + +(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest) +(import (prefix sqlite3 sqlite3:)) + +(use zmq) + +(declare (unit zmq-transport)) + +(declare (uses common)) +(declare (uses db)) +(declare (uses tests)) +(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. +(declare (uses server)) + +(include "common_records.scm") +(include "db_records.scm") + +;; Transition to pub --> sub with pull <-- push +;; +;; 1. client sends request to server via push to the pull port +;; 2. server puts request in queue or processes immediately as appropriate +;; 3. server puts responses from completed requests into pub port +;; +;; TODO +;; +;; Done Tested +;; [x] [ ] 1. Add columns pullport pubport to servers table +;; [x] [ ] 2. Add rm of monitor.db if older than 11/12/2012 +;; [x] [ ] 3. Add create of pullport and pubport with finding of available ports +;; [x] [ ] 4. Add client compose of request +;; [x] [ ] - name of client: testname/itempath-test_id-hostname +;; [x] [ ] - name of request: callname, params +;; [x] [ ] - request key: f(clientname, callname, params) +;; [x] [ ] 5. Add processing of subscription hits +;; [x] [ ] - done when get key +;; [x] [ ] - return results +;; [x] [ ] 6. Add timeout processing +;; [x] [ ] - after 60 seconds +;; [ ] [ ] i. check server alive, connect to new if necessary +;; [ ] [ ] ii. resend request +;; [ ] [ ] 7. Turn self ping back on + +(define (zmq-transport:make-server-url hostport) + (if (not hostport) + #f + (conc "tcp://" (car hostport) ":" (cadr hostport)))) + +(define *server-loop-heart-beat* (current-seconds)) +(define *heartbeat-mutex* (make-mutex)) + +;;====================================================================== +;; S E R V E R +;;====================================================================== + +(define-inline (zmqsock:get-pub dat)(vector-ref dat 0)) +(define-inline (zmqsock:get-pull dat)(vector-ref dat 1)) +(define-inline (zmqsock:set-pub! dat s)(vector-set! dat s 0)) +(define-inline (zmqsock:set-pull! dat s)(vector-set! dat s 0)) + +(define (zmq-transport:run hostn) + (debug:print 2 "Attempting to start the server ...") + (if (not *toppath*) + (if (not (setup-for-run)) + (begin + (debug:print 0 "ERROR: cannot find megatest.config, cannot start server, exiting") + (exit)))) + (let* ((db (open-db)) ;; here we *do not* want to be opening and closing the db + (zmq-sdat1 #f) + (zmq-sdat2 #f) + (pull-socket #f) + (pub-socket #f) + (p1 #f) + (p2 #f) + (zmq-sockets-dat #f) + (iface (if (string=? "-" hostn) + "*" ;; (get-host-name) + hostn)) + (hostname (get-host-name)) + (ipaddrstr (let ((ipstr (if (string=? "-" hostn) + (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") + #f))) + (if ipstr ipstr hostname))) + (last-run 0)) + (set! zmq-sockets-dat (zmq-transport:setup-ports ipaddrstr (if (args:get-arg "-port") + (string->number (args:get-arg "-port")) + (+ 5000 (random 1001))))) + + (set! zmq-sdat1 (car zmq-sockets-dat)) + (set! pull-socket (cadr zmq-sdat1)) ;; (iface s port) + (set! p1 (caddr zmq-sdat1)) + + (set! zmq-sdat2 (cadr zmq-sockets-dat)) + (set! pub-socket (cadr zmq-sdat2)) + (set! p2 (caddr zmq-sdat2)) + + (set! *cache-on* #t) + + (set! *runremote* (vector pull-socket pub-socket)) ;; overloading the use of *runremote* BUG!? + + ;; what to do when we quit + ;; +;; (on-exit (lambda () +;; (if (and *toppath* *server-info*) +;; (open-run-close tasks:server-deregister-self tasks:open-db (car *server-info*)) +;; (let loop () +;; (let ((queue-len 0)) +;; (thread-sleep! (random 5)) +;; (mutex-lock! *incoming-mutex*) +;; (set! queue-len (length *incoming-data*)) +;; (mutex-unlock! *incoming-mutex*) +;; (if (> queue-len 0) +;; (begin +;; (debug:print-info 0 "Queue not flushed, waiting ...") +;; (loop)))))))) + + ;; The heavy lifting + ;; + ;; make-vector-record cdb packet client-sig qtype immediate query-sig params qtime + ;; + (debug:print-info 11 "Server setup complete, start listening for messages") + (let loop ((queue-lst '())) + (let* ((rawmsg (receive-message* pull-socket)) + (packet (db:string->obj rawmsg)) + (qtype (cdb:packet-get-qtype packet))) + (debug:print-info 12 "server=> received packet=" packet) + (if (not (member qtype '(sync ping))) + (begin + (mutex-lock! *heartbeat-mutex*) + (set! *last-db-access* (current-seconds)) + (mutex-unlock! *heartbeat-mutex*))) + (if #t ;; (cdb:packet-get-immediate packet) ;; process immediately or put in queue + (begin + (db:process-queue-item db packet) + ;; (open-run-close db:process-queue #f pub-socket (cons packet queue-lst)) + + (loop '())) + (loop (cons packet queue-lst))))))) + +;; run zmq-transport:keep-running in a parallel thread to monitor that the db is being +;; used and to shutdown after sometime if it is not. +;; +(define (zmq-transport:keep-running) + ;; if none running or if > 20 seconds since + ;; server last used then start shutdown + ;; This thread waits for the server to come alive + (let* ((server-info (let loop () + (let ((sdat #f)) + (mutex-lock! *heartbeat-mutex*) + (set! sdat *server-info*) + (mutex-unlock! *heartbeat-mutex*) + (if sdat sdat + (begin + (debug:print 12 "WARNING: server not started yet, waiting few seconds before trying again") + (sleep 4) + (loop)))))) + (iface (cadr server-info)) + (pullport (caddr server-info)) + (pubport (cadddr server-info)) ;; id interface pullport pubport) + ;; (zmq-sockets (zmq-transport:client-connect iface pullport pubport)) + (last-access 0)) + (debug:print-info 11 "heartbeat started for zmq server on " iface " " pullport " " pubport) + (let loop ((count 0)) + (thread-sleep! 4) ;; no need to do this very often + ;; NB// sync currently does NOT return queue-length + ;; GET REAL QUEUE LENGTH FROM THE VARIABLE + (let ((queue-len 0)) ;; FOR NOW DO NOT DO THIS (cdb:client-call zmq-sockets 'sync #t 1))) + ;; (print "Server running, count is " count) + (if (< count 1) ;; 3x3 = 9 secs aprox + (loop (+ count 1))) + + ;; NOTE: Get rid of this mechanism! It really is not needed... + (open-run-close tasks:server-update-heartbeat tasks:open-db (car server-info)) + + ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access + (mutex-lock! *heartbeat-mutex*) + (set! last-access *last-db-access*) + (mutex-unlock! *heartbeat-mutex*) + (if (> (+ last-access + ;; (* 50 60 60) ;; 48 hrs + ;; 60 ;; one minute + ;; (* 60 60) ;; one hour + (* 45 60) ;; 45 minutes, until the db deletion bug is fixed. + ) + (current-seconds)) + (begin + (debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) + (loop 0)) + (begin + (debug:print-info 0 "Starting to shutdown the server.") + ;; need to delete only *my* server entry (future use) + (set! *time-to-exit* #t) + (open-run-close tasks:server-deregister-self tasks:open-db (get-host-name)) + (thread-sleep! 1) + (debug:print-info 0 "Max cached queries was " *max-cache-size*) + (debug:print-info 0 "Server shutdown complete. Exiting") + (exit))))))) + +(define (zmq-transport:find-free-port-and-open iface s port stype #!key (trynum 50)) + (let ((s (if s s (make-socket stype))) + (p (if (number? port) port 5555)) + (old-handler (current-exception-handler))) + (handle-exceptions + exn + (begin + (debug:print 0 "Failed to bind to port " p ", trying next port") + (debug:print 0 " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn)) + ;; (old-handler) + ;; (print-call-chain) + (if (> trynum 0) + (zmq-transport:find-free-port-and-open iface s (+ p 1) trynum: (- trynum 1)) + (debug:print-info 0 "Tried ports up to " p + " but all were in use. Please try a different port range by starting the server with parameter \" -port N\" where N is the starting port number to use")) + (exit)) ;; To exit or not? That is the question. + (let ((zmq-url (conc "tcp://" iface ":" p))) + (debug:print 2 "Trying to start server on " zmq-url) + (bind-socket s zmq-url) + (list iface s port))))) + +(define (zmq-transport:setup-ports ipaddrstr startport) + (let* ((s1 (zmq-transport:find-free-port-and-open ipaddrstr #f startport 'pull)) + (p1 (caddr s1)) + (s2 (zmq-transport:find-free-port-and-open ipaddrstr #f (+ 1 (if p1 p1 (+ startport 1))) 'pub)) + (p2 (caddr s2))) + (set! *runremote* #f) + (debug:print 0 "Server started on " ipaddrstr " ports " p1 " and " p2) + (mutex-lock! *heartbeat-mutex*) + (set! *server-info* (open-run-close tasks:server-register + tasks:open-db + (current-process-id) + ipaddrstr p1 + 0 + 'live + 'zmq + pubport: p2)) + (debug:print-info 11 "*server-info* set to " *server-info*) + (mutex-unlock! *heartbeat-mutex*) + (list s1 s2))) + +(define (zmq-transport:mk-signature) + (message-digest-string (md5-primitive) + (with-output-to-string + (lambda () + (write (list (current-directory) + (argv))))))) + +;;====================================================================== +;; S E R V E R U T I L I T I E S +;;====================================================================== + +;;====================================================================== +;; C L I E N T S +;;====================================================================== + +;; +(define (zmq-transport:client-socket-connect iface port #!key (context #f)(type 'req)(subscriptions '())) + (debug:print-info 3 "client-connect " iface ":" port ", type=" type ", subscriptions=" subscriptions) + (let ((connect-ok #f) + (zmq-socket (if context + (make-socket type context) + (make-socket type))) + (conurl (zmq-transport:make-server-url (list iface port)))) + (if (socket? zmq-socket) + (begin + ;; first apply subscriptions + (for-each (lambda (subscription) + (debug:print 2 "Subscribing to " subscription) + (socket-option-set! zmq-socket 'subscribe subscription)) + subscriptions) + (connect-socket zmq-socket conurl) + zmq-socket) + (begin + (debug:print 0 "ERROR: Failed to open socket to " conurl) + #f)))) + +(define (zmq-transport:client-connect iface pullport pubport) + (let* ((push-socket (zmq-transport:client-socket-connect iface pullport type: 'push)) + (sub-socket (zmq-transport:client-socket-connect iface pubport + type: 'sub + subscriptions: (list (client:get-signature) "all"))) + (zmq-sockets (vector push-socket sub-socket)) + (login-res #f)) + (debug:print-info 11 "zmq-transport:client-connect started. Next is login") + (set! login-res (client:login serverdat zmq-sockets)) + (if (and (not (null? login-res)) + (car login-res)) + (begin + (debug:print-info 2 "Logged in and connected to " iface ":" pullport "/" pubport ".") + (set! *runremote* zmq-sockets) + zmq-sockets) + (begin + (debug:print-info 2 "Failed to login or connect to " conurl) + (set! *runremote* #f) + #f)))) + +;; run zmq-transport:keep-running in a parallel thread to monitor that the db is being +;; used and to shutdown after sometime if it is not. +;; +(define (zmq-transport:keep-running) + ;; if none running or if > 20 seconds since + ;; server last used then start shutdown + ;; This thread waits for the server to come alive + (let* ((server-info (let loop () + (let ((sdat #f)) + (mutex-lock! *heartbeat-mutex*) + (set! sdat *runremote*) + (mutex-unlock! *heartbeat-mutex*) + (if sdat sdat + (begin + (sleep 4) + (loop)))))) + (iface (car server-info)) + (port (cadr server-info)) + (last-access 0) + (tdb (tasks:open-db)) + (spid (tasks:server-get-server-id tdb #f iface port #f))) + (print "Keep-running got server pid " spid ", using iface " iface " and port " port) + (let loop ((count 0)) + (thread-sleep! 4) ;; no need to do this very often + ;; NB// sync currently does NOT return queue-length + (let () ;; (queue-len (cdb:client-call server-info 'sync #t 1))) + ;; (print "Server running, count is " count) + (if (< count 1) ;; 3x3 = 9 secs aprox + (loop (+ count 1))) + + ;; NOTE: Get rid of this mechanism! It really is not needed... + (tasks:server-update-heartbeat tdb spid) + + ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access + (mutex-lock! *heartbeat-mutex*) + (set! last-access *last-db-access*) + (mutex-unlock! *heartbeat-mutex*) + (if (> (+ last-access + ;; (* 50 60 60) ;; 48 hrs + ;; 60 ;; one minute + ;; (* 60 60) ;; one hour + (* 45 60) ;; 45 minutes, until the db deletion bug is fixed. + ) + (current-seconds)) + (begin + (debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) + (loop 0)) + (begin + (debug:print-info 0 "Starting to shutdown the server.") + ;; need to delete only *my* server entry (future use) + (set! *time-to-exit* #t) + (tasks:server-deregister-self tdb (get-host-name)) + (thread-sleep! 1) + (debug:print-info 0 "Max cached queries was " *max-cache-size*) + (debug:print-info 0 "Server shutdown complete. Exiting") + (exit))))))) + +;; all routes though here end in exit ... +(define (zmq-transport:launch) + (if (not *toppath*) + (if (not (setup-for-run)) + (begin + (debug:print 0 "ERROR: cannot find megatest.config, exiting") + (exit)))) + (debug:print-info 2 "Starting zmq server") + (if *toppath* + (let* (;; (th1 (make-thread (lambda () + ;; (let ((server-info #f)) + ;; ;; wait for the server to be online and available + ;; (let loop () + ;; (debug:print-info 2 "Waiting for the server to come online before starting heartbeat") + ;; (thread-sleep! 2) + ;; (mutex-lock! *heartbeat-mutex*) + ;; (set! server-info *server-info* ) + ;; (mutex-unlock! *heartbeat-mutex*) + ;; (if (not server-info)(loop))) + ;; (debug:print 2 "Server alive, starting self-ping") + ;; (zmq-transport:self-ping server-info) + ;; )) + ;; "Self ping")) + (th2 (make-thread (lambda () + (zmq-transport:run + (if (args:get-arg "-server") + (args:get-arg "-server") + "-"))) "Server run")) + ;; (th3 (make-thread (lambda ()(zmq-transport:keep-running)) "Keep running")) + ) + (set! *client-non-blocking-mode* #t) + ;; (thread-start! th1) + (thread-start! th2) + ;; (thread-start! th3) + (set! *didsomething* #t) + ;; (thread-join! th3) + (thread-join! th2) + ) + (debug:print 0 "ERROR: Failed to setup for megatest"))) + +(define (zmq-transport:client-signal-handler signum) + (handle-exceptions + exn + (debug:print " ... exiting ...") + (let ((th1 (make-thread (lambda () + (if (not *received-response*) + (receive-message* *runremote*))) ;; flush out last call if applicable + "eat response")) + (th2 (make-thread (lambda () + (debug:print 0 "ERROR: Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") + (thread-sleep! 3) ;; give the flush three seconds to do it's stuff + (debug:print 0 " Done.") + (exit 4)) + "exit on ^C timer"))) + (thread-start! th2) + (thread-start! th1) + (thread-join! th2)))) + +(define (zmq-transport:client-launch) + (set-signal-handler! signal/int zmq-transport:client-signal-handler) + (if (zmq-transport:client-setup) + (debug:print-info 2 "connected as client") + (begin + (debug:print 0 "ERROR: Failed to connect as client") + (exit)))) + +;;====================================================================== +;; Defunct functions +;;====================================================================== + +;; ping a server and return number of clients or #f (if no response) +;; NOT IN USE! +(define (zmq-transport:ping host port #!key (secs 10)(return-socket #f)) + (cdb:use-non-blocking-mode + (lambda () + (let* ((res #f) + (th1 (make-thread + (lambda () + (let* ((zmq-context (make-context 1)) + (zmq-socket (zmq-transport:client-connect host port context: zmq-context))) + (if zmq-socket + (if (zmq-transport:client-login zmq-socket) + (let ((numclients (cdb:num-clients zmq-socket))) + (if (not return-socket) + (begin + (zmq-transport:client-logout zmq-socket) + (close-socket zmq-socket))) + (set! res (list #t numclients (if return-socket zmq-socket #f)))) + (begin + ;; (close-socket zmq-socket) + (set! res (list #f "CAN'T LOGIN" #f)))) + (set! res (list #f "CAN'T CONNECT" #f))))) + "Ping: th1")) + (th2 (make-thread + (lambda () + (let loop ((count 1)) + (debug:print-info 1 "Ping " count " server on " host " at port " port) + (thread-sleep! 2) + (if (< count (/ secs 2)) + (loop (+ count 1)))) + ;; (thread-terminate! th1) + (set! res (list #f "TIMED OUT" #f))) + "Ping: th2"))) + (thread-start! th2) + (thread-start! th1) + (handle-exceptions + exn + (set! res (list #f "TIMED OUT" #f)) + (thread-join! th1 secs)) + res)))) + +;; (define (zmq-transport:self-ping server-info) +;; ;; server-info: server-id interface pullport pubport +;; (let ((iface (list-ref server-info 1)) +;; (pullport (list-ref server-info 2)) +;; (pubport (list-ref server-info 3))) +;; (zmq-transport:client-connect iface pullport pubport) +;; (let loop () +;; (thread-sleep! 2) +;; (cdb:client-call *runremote* 'ping #t) +;; (debug:print 4 "zmq-transport:self-ping - I'm alive on " iface ":" pullport "/" pubport "!") +;; (mutex-lock! *heartbeat-mutex*) +;; (set! *server-loop-heart-beat* (current-seconds)) +;; (mutex-unlock! *heartbeat-mutex*) +;; (loop)))) + +(define (zmq-transport:reply pubsock target query-sig success/fail result) + (debug:print-info 11 "zmq-transport:reply target=" target ", result=" result) + (send-message pubsock target send-more: #t) + (send-message pubsock (db:obj->string (vector success/fail query-sig result)))) + Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -41,21 +41,22 @@ (let* ((run-id (if rid rid 0)) (connection-info (let ((cinfo (hash-table-ref/default *runremote* run-id #f))) (if cinfo cinfo (let loop ((numtries 100)) - (thread-sleep! 1) (let ((res (client:setup run-id))) (if res (hash-table-ref *runremote* run-id) ;; client:setup filled this in (hopefully) (if (> numtries 0) - (loop (- numtries 1)) + (begin + (thread-sleep! 10) + (loop (- numtries 1))) (begin (debug:print 0 "ERROR: 100 tries and no server, giving up") (exit 1))))))))) (jparams (db:obj->string params)) - (res (http-transport:client-api-send-receive run-id connection-info cmd jparams))) + (res (http-transport:client-api-send-receive run-id connection-info cmd jparams))) (if res (db:string->obj res) ;; (rmt:json-str->dat res) (let ((new-connection-info (client:setup run-id))) (debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.") (rmt:send-receive cmd run-id params))))) @@ -83,10 +84,20 @@ ;;====================================================================== ;; ;; A C T U A L A P I C A L L S ;; ;;====================================================================== + +;;====================================================================== +;; S E R V E R +;;====================================================================== + +(define (rmt:kill-server run-id) + (rmt:send-receive 'kill-server run-id (list run-id))) + +(define (rmt:start-server run-id) + (rmt:send-receive 'start-server 0 (list run-id))) ;;====================================================================== ;; M I S C ;;====================================================================== @@ -96,13 +107,10 @@ ;; This login does no retries under the hood - it acts a bit like a ping. ;; (define (rmt:login-no-auto-client-setup connection-info run-id) (rmt:send-receive-no-auto-client-setup connection-info 'login run-id (list *toppath* megatest-version run-id *my-client-signature*))) -(define (rmt:kill-server run-id) - (rmt:send-receive 'kill-server run-id (list run-id))) - ;; hand off a call to one of the db:queries statements ;; added run-id to make looking up the correct db possible ;; (define (rmt:general-call stmtname run-id . params) (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params))) @@ -120,12 +128,12 @@ ;;====================================================================== ;; K E Y S ;;====================================================================== -;; These should not require run-id but it is more consistent to have it. -;; run-id can theoretically be #f but how to handle that is not yet done. +;; These require run-id because the values come from the run! +;; (define (rmt:get-key-val-pairs run-id) (rmt:send-receive 'get-key-val-pairs run-id (list run-id))) (define (rmt:get-keys) (rmt:send-receive 'get-keys #f '())) @@ -182,12 +190,12 @@ (rmt:send-receive 'delete-test-records run-id (list run-id test-id))) (define (rmt:test-set-status-state run-id test-id status state msg) (rmt:send-receive 'test-set-status-state run-id (list run-id test-id status state msg))) -(define (rmt:get-previous-test-run-record run-id test-name item-path) - (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path))) +;; (define (rmt:get-previous-test-run-record run-id test-name item-path) +;; (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path))) (define (rmt:get-matching-previous-test-run-records run-id test-name item-path) (rmt:send-receive 'get-matching-previous-test-run-records run-id (list run-id test-name item-path))) (define (rmt:test-get-logfile-info run-id test-name) @@ -200,17 +208,21 @@ (rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id))) (define (rmt:test-set-log! run-id test-id logf) (if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id))) +(define (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt) + (rmt:send-receive 'get-run-ids-matching-target #f (list keynames target res runname testpatt statepatt statuspatt))) + ;; NOTE: This will open and access ALL run databases. ;; (define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname) - (let ((run-ids (rmt:get-all-run-ids))) ;; (rmt:get-run-ids-matching keynames target res))) - (apply append (lambda (run-id) - (rmt:send-receive 'test-get-paths-matching-keynames-target-new (list keynames target res testpatt statepatt statuspatt runname))) - run-ids))) + (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt))) + (apply append + (map (lambda (run-id) + (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname))) + run-ids)))) (define (rmt:get-run-ids-matching keynames target res) (rmt:send-receive #f 'get-run-ids-matching (list keynames target res))) (define (rmt:get-prereqs-not-met run-id waitons ref-item-path #!key (mode 'normal)) @@ -260,18 +272,52 @@ (rmt:send-receive 'get-runs #f (list runpatt count offset keypatts))) (define (rmt:get-all-run-ids) (rmt:send-receive 'get-all-run-ids #f '())) +(define (rmt:get-prev-run-ids run-id) + (rmt:send-receive 'get-prev-run-ids #f (list run-id))) + (define (rmt:lock/unlock-run run-id lock unlock user) (rmt:send-receive 'lock/unlock-run #f (list run-id lock unlock user))) (define (rmt:update-run-event_time run-id) (rmt:send-receive 'update-run-event_time #f (list run-id))) (define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit) (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit))) + +;;====================================================================== +;; M U L T I R U N Q U E R I E S +;;====================================================================== + +;; get the previous record for when this test was run where all keys match but runname +;; returns #f if no such test found, returns a single test record if found +;; +;; Run this at the client end since we have to connect to multiple run-id dbs +;; +(define (rmt:get-previous-test-run-record run-id test-name item-path) + (let* ((keyvals (rmt:get-key-val-pairs run-id)) + (keys (rmt:get-keys)) + (selstr (string-intersperse keys ",")) + (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND "))) + (if (not keyvals) + #f + (let ((prev-run-ids (rmt:get-prev-run-ids run-id))) + ;; for each run starting with the most recent look to see if there is a matching test + ;; if found then return that matching test record + (debug:print 4 "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids) + (if (null? prev-run-ids) #f + (let loop ((hed (car prev-run-ids)) + (tal (cdr prev-run-ids))) + (let ((results (rmt:get-tests-for-run hed (conc test-name "/" item-path) '() '() #f #f #f #f #f #f))) + (debug:print 4 "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results) + (if (and (null? results) + (not (null? tal))) + (loop (car tal)(cdr tal)) + (if (null? results) #f + (car results)))))))))) ;;====================================================================== ;; S T E P S ;;====================================================================== Index: runconfig.scm ================================================================== --- runconfig.scm +++ runconfig.scm @@ -29,11 +29,11 @@ (debug:print 4 "Using key=\"" thekey "\"") (if change-env (for-each ;; NB// This can be simplified with new content of keyvals having all that is needed. (lambda (keyval) - (setenv (car keyval)(cadr keyval))) + (safe-setenv (car keyval)(cadr keyval))) keyvals)) (for-each (lambda (section) (let ((section-dat (hash-table-ref/default confdat section #f))) @@ -43,11 +43,11 @@ (let ((val (cadr (assoc envvar section-dat)))) (hash-table-set! whatfound section (+ (hash-table-ref/default whatfound section 0) 1)) (if (and (string? envvar) (string? val) change-env) - (setenv envvar val)) + (safe-setenv envvar val)) (hash-table-set! finaldat envvar val))) (map car section-dat))))) sections) (if already-seen (begin Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -84,11 +84,11 @@ (exit 1))) ;; Now have runconfigs data loaded, set environment vars (for-each (lambda (section) (for-each (lambda (varval) (set! envdat (append envdat (list varval))) - (setenv (car varval)(cadr varval))) + (safe-setenv (car varval)(cadr varval))) (configf:get-section runconfig section))) (list "default" target)) (vector target runname testpatt keys keyvals envdat mconfig runconfig serverdat transport db toppath run-id))) (define (set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)) @@ -110,14 +110,11 @@ ;; from the cached data set the vars (hash-table-for-each vals (lambda (key val) (debug:print 2 "setenv " key " " val) - (if (and (string? key) - (string? val)) - (setenv key val) - (debug:print 0 "ERROR: Malformed environment variable definition: var=" var ", val=" val)))) + (safe-setenv key val))) (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target)) (alist->env-vars (hash-table-ref/default *configdat* "env-override" '())) ;; Lets use this as an opportunity to put MT_RUNNAME in the environment (let ((runname (if inrunname inrunname (rmt:get-run-name-from-id run-id)))) (if runname @@ -1154,11 +1151,11 @@ (if (> (- (current-seconds)(+ (db:test-get-event_time testdat) (db:test-get-run_duration testdat))) 600) ;; i.e. no update for more than 600 seconds (begin (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD") - (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "Test is stuck or dead" #f)) + (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "Test is stuck or dead" #f)) (debug:print 2 "NOTE: " test-name " is already running"))) (else (debug:print 0 "ERROR: Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat)) (case (string->symbol (test:get-state testdat)) ((COMPLETED INCOMPLETE) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -46,48 +46,19 @@ ;; all routes though here end in exit ... ;; ;; start_server ;; (define (server:launch run-id) - ;; (if (server:check-if-running run-id) - ;; a server is already running - ;; (exit) (http-transport:launch run-id)) -;; (define (server:launch-no-exit run-id) -;; (if (server:check-if-running run-id) -;; #t ;; if running -;; (http-transport:launch run-id))) - ;;====================================================================== ;; Q U E U E M A N A G E M E N T ;;====================================================================== ;; We don't want to flush the queue if it was just flushed (define *server:last-write-flush* (current-milliseconds)) -;; Flush the queue every third of a second. Can we assume that setup-for-run -;; has already been done? -;; (define (server:write-queue-handler) -;; (if (setup-for-run) -;; (let ((db (open-db))) -;; (let loop () -;; (let ((last-write-flush-time #f)) -;; (mutex-lock! *incoming-mutex*) -;; (set! last-write-flush-time *server:last-write-flush*) -;; (mutex-unlock! *incoming-mutex*) -;; (if (> (- (current-milliseconds) last-write-flush-time) 10) -;; (begin -;; (mutex-lock! *db:process-queue-mutex*) -;; (db:process-cached-writes db) -;; (mutex-unlock! *db:process-queue-mutex*) -;; (thread-sleep! 0.005)))) -;; (loop))) -;; (begin -;; (debug:print 0 "ERROR: failed to setup for Megatest in server:write-queue-handler") -;; (exit 1)))) - ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== ;; Generate a unique signature for this server @@ -103,39 +74,73 @@ ;; with spiffy or rpc this simply returns the return data to be returned ;; (define (server:reply return-addr query-sig success/fail result) (db:obj->string (vector success/fail query-sig result))) -;; > file 2>&1 -(define (server:try-running run-id) - (let* ((rand-name (random 100)) - (cmdln (conc (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest") - " -server - -run-id " run-id " name=" rand-name " > " *toppath* "/db/" run-id - ".log 2>&1 &"))) - ;; ".log &" ))) +;; Given a run id start a server process ### NOTE ### > file 2>&1 +;; if the run-id is zero and the target-host is set +;; try running on that host +;; +(define (server:run run-id) + (let* ((target-host (configf:lookup *configdat* "server" "homehost" )) + (cmdln (conc (common:get-megatest-exe) + " -server - -run-id " run-id " >> " *toppath* "/db/" run-id ".log 2>&1 &"))) (debug:print 0 "INFO: Starting server (" cmdln ") as none running ...") (push-directory *toppath*) - (system cmdln) + (if target-host + (begin + (set-environment-variable "TARGETHOST" target-host) + (system (conc "nbfake " cmdln))) + (system cmdln)) (pop-directory))) + +;; kind start up of servers, wait 40 seconds before allowing another server for a given +;; run-id to be launched +(define (server:kind-run run-id) + (let ((last-run-time (hash-table-ref/default *server-kind-run* run-id #f))) + (if (or (not last-run-time) + (> (- (current-seconds) last-run-time) 40)) + (begin + (server:run run-id) + (hash-table-set! *server-kind-run* run-id (current-seconds)))))) + +;; The generic run a server command. Dispatches the call to server 0 if run-id != 0 +;; +(define (server:try-running run-id) + (if (eq? run-id 0) + (server:run run-id) + (rmt:start-server run-id))) (define (server:check-if-running run-id) (let loop ((server-info (open-run-close tasks:get-server tasks:open-db run-id)) (trycount 0)) - (thread-sleep! 2) (if server-info ;; note: client:start will set *runremote*. this needs to be changed ;; also, client:start will login to the server, also need to change that. ;; ;; client:start returns #t if login was successful. ;; - (let ((res (http-transport:client-connect + (let ((res (server:ping-server run-id (vector-ref server 1)(vector-ref server 0)))) run-id (tasks:hostinfo-get-interface server-info) (tasks:hostinfo-get-port server-info)))) ;; if the server didn't respond we must remove the record (if res - res + #t (begin - (debug:print 0 "WARNING: running server not reachable, removing record: " server-info) - (open-run-close tasks:server-force-clean-running-records-for-run-id tasks:open-db run-id) + (open-run-close tasks:server-force-clean-running-records-for-run-id tasks:open-db run-id + " server:check-if-running") res))) #f))) + +(define (server:ping-server run-id iface port) + (with-input-from-pipe + (conc (common:get-megatest-exe) " -run-id " run-id " -ping " (conc iface ":" port)) + (lambda () + (let loop ((inl (read-line)) + (res "NOREPLY")) + (if (eof-object? inl) + (case (string->symbol res) + ((NOREPLY) #f) + ((LOGIN_OK) #t) + (else #f)) + (loop (read-line) inl)))))) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -91,11 +91,11 @@ (define (tasks:hostinfo-get-transport vec) (vector-ref vec 4)) (define (tasks:hostinfo-get-pid vec) (vector-ref vec 5)) (define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) (define (tasks:server-lock-slot mdb run-id) - (tasks:server-clean-out-old-records-for-run-id mdb run-id) + (tasks:server-clean-out-old-records-for-run-id mdb run-id " tasks:server-lock-slot") (if (< (tasks:num-in-available-state mdb run-id) 4) (begin (tasks:server-set-available mdb run-id) (thread-sleep! 0.2) ;; Try removing this. It may not be needed. (tasks:server-am-i-the-server? mdb run-id)) @@ -123,58 +123,79 @@ (let ((res 0)) (sqlite3:for-each-row (lambda (num-in-queue) (set! res num-in-queue)) mdb - "SELECT count(id) FROM servers WHERE run_id=?;" + "SELECT count(id) FROM servers WHERE run_id=? AND state = 'available';" run-id) res)) -(define (tasks:server-clean-out-old-records-for-run-id mdb run-id) - (sqlite3:execute mdb "DELETE FROM servers WHERE state in ('available','shutting-down') AND (strftime('%s','now') - start_time) > 100 AND run_id=?;" run-id)) - -(define (tasks:server-force-clean-running-records-for-run-id mdb run-id) - (sqlite3:execute mdb "DELETE FROM servers WHERE state = 'running' AND run_id=?;" run-id)) - -(define (tasks:server-force-clean-run-record mdb run-id iface port) - (sqlite3:execute mdb "DELETE FROM servers WHERE state = 'running' AND run_id=? AND interface=? AND port=?;" - run-id iface port)) +(define (tasks:server-clean-out-old-records-for-run-id mdb run-id tag) + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state in ('available','shutting-down') AND (strftime('%s','now') - start_time) > 50 AND run_id=?;" + (conc "defunct" tag) run-id)) + +(define (tasks:server-force-clean-running-records-for-run-id mdb run-id tag) + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=?;" + (conc "defunct" tag) run-id)) + +(define (tasks:server-force-clean-run-record mdb run-id iface port tag) + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=? AND interface=? AND port=?;" + (conc "defunct" tag) run-id iface port)) + +(define (tasks:server-delete-records-for-this-pid mdb tag) + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE hostname=? AND pid=?;" + (conc "defunct" tag) (get-host-name) (current-process-id))) + +(define (tasks:server-delete-record mdb server-id tag) + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE id=?;" + (conc "defunct" tag) server-id) + ;; use this opportuntity to clean out records over one month old or over 10 minutes old with port = -1 (i.e. a never used placeholder) + (sqlite3:execute mdb "DELETE FROM servers WHERE state not in ('running','shutting-down') AND (strftime('%s','now') - start_time) > 2628000;") + (sqlite3:execute mdb "DELETE FROM servers WHERE state like 'defunct%' AND port=-1 AND (strftime('%s','now') - start_time) > 600;") + ) (define (tasks:server-set-state! mdb server-id state) - (sqlite3:execute mdb "UPDATE servers SET state=? WHERE id=?;" state server-id)) - -(define (tasks:server-delete-record! mdb server-id) - (sqlite3:execute mdb "DELETE FROM servers WHERE id=?;" server-id)) - -(define (tasks:server-delete-records-for-this-pid mdb) - (sqlite3:execute mdb "DELETE FROM servers WHERE hostname=? AND pid=?;" (get-host-name) (current-process-id))) + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE id=?;" state server-id)) (define (tasks:server-set-interface-port mdb server-id interface port) - (sqlite3:execute mdb "UPDATE servers SET interface=?,port=? WHERE id=?;" interface port server-id)) + (sqlite3:execute mdb "UPDATE servers SET interface=?,port=?,heartbeat=strftime('%s','now') WHERE id=?;" interface port server-id)) +;; Get random port not used in long time +;; (define (tasks:server-get-next-port mdb) - (let ((res #f) - (port-param (if (and (args:get-arg "-port") - (string->number (args:get-arg "-port"))) - (string->number (args:get-arg "-port")) - #f)) - (config-port (if (and (config-lookup *configdat* "server" "port") - (string->number (config-lookup *configdat* "server" "port"))) - (string->number (config-lookup *configdat* "server" "port")) - #f))) + (let* ((lownum 30000) + (highnum 64000) + (used-ports '()) + (get-rand-port (lambda () + (+ lownum (random (- highnum lownum))))) + (port-param (if (and (args:get-arg "-port") + (string->number (args:get-arg "-port"))) + (string->number (args:get-arg "-port")) + #f)) + ;; (config-port (if (and (config-lookup *configdat* "server" "port") + ;; (string->number (config-lookup *configdat* "server" "port"))) + ;; (string->number (config-lookup *configdat* "server" "port")) + ;; #f)) + ) (sqlite3:for-each-row (lambda (port) - (set! res (+ port 1))) ;; set to next + (set! used-ports (cons port used-ports))) mdb - "SELECT max(port) FROM servers;") + "SELECT port FROM servers;") (cond ((and port-param res) (if (> res port-param) res port-param)) (port-param port-param) - ((and config-port res) (if (> res config-port) res config-port)) - (config-port config-port) - ((and res (> res 8080)) res) - (else (+ 5000 (random 1001)))))) + ;; ((and config-port res) (if (> res config-port) res config-port)) + ;; (config-port config-port) + (else + (let loop ((port (get-rand-port)) + (remtries 100)) + (if (member port used-ports) + (if (> remtries 0) + (loop (get-rand-port)(- remtries 1)) + (get-rand-port)) + port)))))) (define (tasks:server-am-i-the-server? mdb run-id) (let* ((all (tasks:server-get-servers-vying-for-run-id mdb run-id)) (first (if (null? all) (begin (debug:print 0 "ERROR: no servers listed, should be at least one by now.") @@ -205,11 +226,11 @@ (res '())) (sqlite3:for-each-row (lambda (a . b) (set! res (cons (apply vector a b) res))) mdb - (conc "SELECT " selstr " FROM servers WHERE run_id=? ORDER BY start_time DESC;") + (conc "SELECT " selstr " FROM servers WHERE run_id=? AND state in ('available','running') ORDER BY start_time DESC;") run-id) (vector header res))) (define (tasks:get-server mdb run-id) (let ((res #f) @@ -226,14 +247,15 @@ res)) (define (tasks:get-all-servers mdb) (let ((res '())) (sqlite3:for-each-row - (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport) - (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport) res))) + (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) + ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 + (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res))) mdb - "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport FROM servers ORDER BY start_time DESC;") + "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id FROM servers WHERE state NOT LIKE 'defunct%' ORDER BY start_time DESC;") res)) (define (tasks:kill-server status hostname port pid) (debug:print-info 1 "Removing defunct server record for " hostname ":" port) (if port @@ -253,11 +275,11 @@ ) ;; local machine, send sig term (begin ;;(debug:print-info 1 "Stopping remote servers not yet supported.")))) (debug:print-info 1 "Telling alive server on " hostname ":" port " to commit servercide") (let ((serverdat (list hostname port))) - (http-transport:client-connect hostname port) + (hash-table-set! *runremote* run-id (http-transport:client-connect hostname port)) (cdb:kill-server serverdat pid))))) ;; remote machine, try telling server to commit suicide (begin (if status (if (equal? hostname (get-host-name)) (begin Index: tdb.scm ================================================================== --- tdb.scm +++ tdb.scm @@ -11,25 +11,19 @@ ;;====================================================================== ;; Database access ;;====================================================================== -(require-extension (srfi 18) extras tcp) ;; rpc) -;; (import (prefix rpc rpc:)) - +(require-extension (srfi 18) extras tcp) (use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64) (import (prefix sqlite3 sqlite3:)) (import (prefix base64 base64:)) -;; Note, try to remove this dependency -;; (use zmq) - (declare (unit tdb)) (declare (uses common)) (declare (uses keys)) (declare (uses ods)) -(declare (uses fs-transport)) (declare (uses client)) (declare (uses mt)) (include "common_records.scm") (include "db_records.scm") Index: tests/Makefile ================================================================== --- tests/Makefile +++ tests/Makefile @@ -64,15 +64,15 @@ # NOTE: Only one instance can be a server test5 : cleanprep @echo "WARNING: No longer running fullprep, test converage may be lessened" cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log & - cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ab -debug $(DEBUG) $(LOGGING) > ab.log 2> ab.log & - cd fullrun;sleep 5;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ac -debug $(DEBUG) $(LOGGING) > ac.log 2> ac.log & - cd fullrun;sleep 8;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ad -debug $(DEBUG) $(LOGGING) > ad.log 2> ad.log & -# cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log & -# cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log & a + cd fullrun;sleep 3;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ab -debug $(DEBUG) $(LOGGING) > ab.log 2> ab.log & + cd fullrun;sleep 6;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ac -debug $(DEBUG) $(LOGGING) > ac.log 2> ac.log & + cd fullrun;sleep 9;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ad -debug $(DEBUG) $(LOGGING) > ad.log 2> ad.log & + cd fullrun;sleep 12;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log & + cd fullrun;sleep 15;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log & # MUST ADD THIS BACK IN ASAP!!!! # cd fullrun;sleep 10;$(MEGATEST) -run-wait -target $(TARGET) :runname % -testpatt % :state RUNNING,LAUNCHED,NOT_STARTED,REMOTEHOSTSTART;echo ALL DONE test6: fullprep Index: tests/fdktestqa/fdk.config ================================================================== --- tests/fdktestqa/fdk.config +++ tests/fdktestqa/fdk.config @@ -2,11 +2,11 @@ SYSTEM TEXT RELEASE TEXT [setup] # Adjust max_concurrent_jobs to limit how much you load your machines -max_concurrent_jobs 150 +max_concurrent_jobs 500 # This is your link path, you can move it but it is generally better to keep it stable linktree #{shell readlink -f #{getenv PWD}/../simplelinks} [include testqa/configs/megatest.abc.config] Index: tests/fdktestqa/testqa/Makefile ================================================================== --- tests/fdktestqa/testqa/Makefile +++ tests/fdktestqa/testqa/Makefile @@ -5,25 +5,23 @@ all : $(MEGATEST) -remove-runs -target a/b :runname c -testpatt %/% $(MEGATEST) -runtests % -target a/b :runname c bigbig : - $(MEGATEST) -server - -daemonize ; sleep 3 for tn in a b c d;do \ ($(MEGATEST) -runtests % -target a/b :runname $tn & ) ; \ done bigrun : - $(MEGATEST) -runtests bigrun -target a/bigrun :runname a + $(MEGATEST) -runtests bigrun -target a/bigrun :runname a$(shell date +%V) bigrun2 : - $(MEGATEST) -runtests bigrun2 -target a/bigrun2 :runname a -transport http + $(MEGATEST) -runtests bigrun2 -target a/bigrun2 :runname a$(shell date +%V) dashboard : $(DASHBOARD) -rows 20 & compile : - $(MEGATEST) -stop-server 0 (cd ../../..;make && make install) clean : - rm -rf ../simple*/*/* megatest.db + rm -rf ../simple*/*/* megatest.db db/* Index: tests/fdktestqa/testqa/megatest.config ================================================================== --- tests/fdktestqa/testqa/megatest.config +++ tests/fdktestqa/testqa/megatest.config @@ -1,10 +1,11 @@ [setup] testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. >> TEST_TARG_PATH/mt_launch.log 2>> TEST_TARG_PATH/mt_launch.log -runqueue 20 -transport http launchwait no +[jobtools] +launcher loadrunner + [include ../fdk.config] [server] port 9080 Index: tests/fullrun/config/mt_include_1.config ================================================================== --- tests/fullrun/config/mt_include_1.config +++ tests/fullrun/config/mt_include_1.config @@ -13,11 +13,11 @@ # launcher nbfake launcher loadrunner # launcher echo # launcher nbfind # launcher nodanggood -launcher nbload +# launcher nbload ## use "xterm -e csi -- " as a launcher to examine the launch environment. ## exit with (exit) ## get a shell with (system "bash") # launcher xterm -e csi -- Index: tests/fullrun/megatest.config ================================================================== --- tests/fullrun/megatest.config +++ tests/fullrun/megatest.config @@ -22,11 +22,11 @@ # this may save a few milliseconds on launching tests # launchwait no waivercommentpatt ^WW\d+ [a-z].* # Use http instead of direct filesystem access -transport http +# transport http # transport fs # If set to "default" the old code is used. Otherwise defaults to 200 or uses # numeric value given. # @@ -115,11 +115,12 @@ # it succeeds port 8080 # This server will keep running this number of hours after last access. # Three minutes is 0.05 hours -timeout 0.025 +# timeout 0.025 +timeout 0.25 ## disks are: ## name host:/path/to/area ## -or- ## name /path/to/area Index: tests/watch-monitor.sh ================================================================== --- tests/watch-monitor.sh +++ tests/watch-monitor.sh @@ -1,8 +1,10 @@ #!/bin/bash +if [ -e fullrun/db/monitor.db ];then sqlite3 fullrun/db/monitor.db << EOF .header on .mode column -select * from servers; +select * from servers order by start_time desc; .q EOF +fi Index: utils/installall.sh ================================================================== --- utils/installall.sh +++ utils/installall.sh @@ -1,10 +1,10 @@ #! /usr/bin/env bash # set -x -# Copyright 2007-2010, Matthew Welland. +# Copyright 2007-2014, Matthew Welland. # # This program is made available under the GNU GPL version 2.0 or # greater. See the accompanying file COPYING for details. # # This program is distributed WITHOUT ANY WARRANTY; without even the @@ -14,10 +14,11 @@ echo You may need to do the following first: echo sudo apt-get install libreadline-dev echo sudo apt-get install libwebkitgtk-dev echo sudo apt-get install libmotif3 -OR- set KTYPE=26g4 echo KTYPE can be 26, 26g4, or 32 +echo echo KTYPE=$KTYPE echo You are using PREFIX=$PREFIX echo You are using proxy="$proxy" echo echo "Set additional_libpath to help find gtk or other libraries, don't forget a leading :" @@ -58,14 +59,20 @@ export KTYPE=26 else echo Using KTYPE=$KTYPE fi +# Put all the downloaded tar files in tgz +mkdir -p tgz + +# http://code.call-cc.org/releases/4.8.0/chicken-4.8.0.5.tar.gz export CHICKEN_VERSION=4.8.0.5 export CHICKEN_BASEVER=4.8.0 -if ! [[ -e chicken-${CHICKEN_VERSION}.tar.gz ]]; then - wget http://code.call-cc.org/releases/${CHICKEN_BASEVER}/chicken-${CHICKEN_VERSION}.tar.gz +chicken_targz=chicken-${CHICKEN_VERSION}.tar.gz +if ! [[ -e tgz/$chicken_targz ]]; then + wget http://code.call-cc.org/releases/${CHICKEN_BASEVER}/${chicken_targz} + mv $chicken_targz tgz fi BUILDHOME=$PWD DEPLOYTARG=$BUILDHOME/deploy @@ -82,11 +89,11 @@ echo PATH=$PATH echo LD_LIBRARY_PATH=$LD_LIBRARY_PATH if ! [[ -e $PREFIX/bin/csi ]]; then - tar xfvz chicken-${CHICKEN_VERSION}.tar.gz + tar xfvz tgz/$chicken_targz cd chicken-${CHICKEN_VERSION} # make PLATFORM=linux PREFIX=$PREFIX spotless make PLATFORM=linux PREFIX=$PREFIX make PLATFORM=linux PREFIX=$PREFIX install cd $BUILDHOME @@ -113,56 +120,65 @@ export LIBPATH=$PREFIX/lib$ADDITIONAL_LIBPATH export LD_LIBRARY_PATH=$LIBPATH export SQLITE3_VERSION=3071401 echo Install sqlite3 -if ! [[ -e sqlite-autoconf-$SQLITE3_VERSION.tar.gz ]]; then - wget http://www.sqlite.org/sqlite-autoconf-$SQLITE3_VERSION.tar.gz +sqlite3_tgz=sqlite-autoconf-$SQLITE3_VERSION.tar.gz +if ! [[ -e tgz/$sqlite3_tgz ]]; then + wget http://www.sqlite.org/$sqlite3_tgz + mv $sqlite3_tgz tgz fi if ! [[ -e $PREFIX/bin/sqlite3 ]] ; then - if [[ -e sqlite-autoconf-$SQLITE3_VERSION.tar.gz ]]; then - tar xfz sqlite-autoconf-$SQLITE3_VERSION.tar.gz + if [[ -e tgz/sqlite-autoconf-$SQLITE3_VERSION.tar.gz ]]; then + tar xfz tgz/sqlite-autoconf-$SQLITE3_VERSION.tar.gz (cd sqlite-autoconf-$SQLITE3_VERSION;./configure --prefix=$PREFIX;make;make install) # CSC_OPTIONS="-I$PREFIX/include -L$PREFIX/lib" $CHICKEN_INSTALL -prefix $DEPLOYTARG -deploy $PROX sqlite3 CSC_OPTIONS="-I$PREFIX/include -L$PREFIX/lib" $CHICKEN_INSTALL $PROX sqlite3 fi fi # $CHICKEN_INSTALL $PROX sqlite3 + +# IUP versions +CDVER=5.7 +IUPVER=3.8 +IMVER=3.8 if [[ `uname -a | grep x86_64` == "" ]]; then export ARCHSIZE='' else export ARCHSIZE=64_ fi # export files="cd-5.4.1_Linux${KTYPE}_lib.tar.gz im-3.6.3_Linux${KTYPE}_lib.tar.gz iup-3.5_Linux${KTYPE}_lib.tar.gz" if [[ x$USEOLDIUP == "x" ]];then - export files="cd-5.5.1_Linux${KTYPE}_${ARCHSIZE}lib.tar.gz im-3.8_Linux${KTYPE}_${ARCHSIZE}lib.tar.gz iup-3.6_Linux${KTYPE}_${ARCHSIZE}lib.tar.gz" + export files="cd-${CDVER}_Linux${KTYPE}_${ARCHSIZE}lib.tar.gz im-${IMVER}_Linux${KTYPE}_${ARCHSIZE}lib.tar.gz iup-${IUPVER}_Linux${KTYPE}_${ARCHSIZE}lib.tar.gz" else echo WARNING: Using old IUP libraries export files="cd-5.4.1_Linux${KTYPE}_${ARCHSIZE}lib.tar.gz im-3.6.3_Linux${KTYPE}_${ARCHSIZE}lib.tar.gz iup-3.5_Linux${KTYPE}_${ARCHSIZE}lib.tar.gz" fi mkdir -p $PREFIX/iuplib for a in `echo $files` ; do - if ! [[ -e $a ]] ; then + if ! [[ -e tgz/$a ]] ; then wget http://www.kiatoa.com/matt/iup/$a fi - echo Untarring $a into $BUILDHOME/lib - (cd $PREFIX/lib;tar xfvz $BUILDHOME/$a;mv include/* ../include) + mv $a tgz/$a + echo Untarring tgz/$a into $BUILDHOME/lib + (cd $PREFIX/lib;tar xfvz $BUILDHOME/tgz/$a;mv include/* ../include) # (cd $DEPLOYTARG;tar xfvz $BUILDHOME/$a) done # ffcall obtained from: # cvs -z3 -d:pserver:anonymous@cvs.savannah.gnu.org:/sources/libffcall co ffcall -if ! [[ -e ffcall.tar.gz ]] ; then +if ! [[ -e tgz/ffcall.tar.gz ]] ; then wget http://www.kiatoa.com/matt/iup/ffcall.tar.gz + mv ffcall.tar.gz tgz fi -tar xfvz ffcall.tar.gz +tar xfvz tgz/ffcall.tar.gz cd ffcall ./configure --prefix=$PREFIX --enable-shared make make install @@ -174,137 +190,12 @@ # CSC_OPTIONS="-I$PREFIX/include -L$CSCLIBS" $CHICKEN_INSTALL $PROX -D no-library-checks -feature disable-iup-web -deploy -prefix $DEPLOYTARG iup # iup:1.0.2 CSC_OPTIONS="-I$PREFIX/include -L$CSCLIBS" $CHICKEN_INSTALL $PROX -D no-library-checks canvas-draw # CSC_OPTIONS="-I$PREFIX/include -L$CSCLIBS" $CHICKEN_INSTALL $PROX -D no-library-checks -deploy -prefix $DEPLOYTARG canvas-draw -# disabled zmq # #====================================================================== -# disabled zmq # # Note uuid needed only for zmq 2.x series -# disabled zmq # #====================================================================== -# disabled zmq # -# disabled zmq # # http://download.zeromq.org/zeromq-3.2.1-rc2.tar.gz -# disabled zmq # # zpatchlev=-rc2 -# disabled zmq # # http://download.zeromq.org/zeromq-2.2.0.tar.gz -# disabled zmq # -# disabled zmq # if [[ -e /usr/lib/libzmq.so ]]; then -# disabled zmq # echo "Using system installed zmq library" -# disabled zmq # $CHICKEN_INSTALL zmq -# disabled zmq # else -# disabled zmq # ZEROMQ=zeromq-2.2.0 -# disabled zmq # # ZEROMQ=zeromq-3.2.2 -# disabled zmq # -# disabled zmq # # wget http://www.kernel.org/pub/linux/utils/util-linux/v2.22/util-linux-2.22.tar.gz -# disabled zmq # UTIL_LINUX=2.21 -# disabled zmq # # UTIL_LINUX=2.20.1 -# disabled zmq # if ! [[ -e util-linux-${UTIL_LINUX}.tar.gz ]] ; then -# disabled zmq # # wget http://www.kiatoa.com/matt/util-linux-2.20.1.tar.gz -# disabled zmq # wget http://www.kernel.org/pub/linux/utils/util-linux/v${UTIL_LINUX}/util-linux-${UTIL_LINUX}.tar.gz -# disabled zmq # fi -# disabled zmq # -# disabled zmq # if [[ -e util-linux-${UTIL_LINUX}.tar.gz ]] ; then -# disabled zmq # tar xfz util-linux-${UTIL_LINUX}.tar.gz -# disabled zmq # cd util-linux-${UTIL_LINUX} -# disabled zmq # mkdir -p build -# disabled zmq # cd build -# disabled zmq # if [[ $UTIL_LINUX = "2.22" ]] ; then -# disabled zmq # ../configure --prefix=$PREFIX \ -# disabled zmq # --enable-shared \ -# disabled zmq # --disable-use-tty-group \ -# disabled zmq # --disable-makeinstall-chown \ -# disabled zmq # --disable-makeinstall-setuid \ -# disabled zmq # --disable-libtool-lock \ -# disabled zmq # --disable-login \ -# disabled zmq # --disable-sulogin \ -# disabled zmq # --disable-su \ -# disabled zmq # --disable-schedutils \ -# disabled zmq # --disable-libmount \ -# disabled zmq # --disable-mount \ -# disabled zmq # --disable-losetup \ -# disabled zmq # --disable-fsck \ -# disabled zmq # --disable-partx \ -# disabled zmq # --disable-mountpoint \ -# disabled zmq # --disable-fallocate \ -# disabled zmq # --disable-unshare \ -# disabled zmq # --disable-eject \ -# disabled zmq # --disable-agetty \ -# disabled zmq # --disable-cramfs \ -# disabled zmq # --disable-switch_root \ -# disabled zmq # --disable-pivot_root \ -# disabled zmq # --disable-kill \ -# disabled zmq # --disable-libblkid \ -# disabled zmq # --disable-utmpdump \ -# disabled zmq # --disable-rename \ -# disabled zmq # --disable-chsh-only-listed \ -# disabled zmq # --disable-wall \ -# disabled zmq # --disable-pg-bell \ -# disabled zmq # --disable-require-password \ -# disabled zmq # --disable-libtool-lock \ -# disabled zmq # --disable-nls \ -# disabled zmq # --disable-dmesg \ -# disabled zmq # --without-ncurses -# disabled zmq # else -# disabled zmq # ../configure --prefix=$PREFIX \ -# disabled zmq # --enable-shared \ -# disabled zmq # --disable-mount \ -# disabled zmq # --disable-fsck \ -# disabled zmq # --disable-partx \ -# disabled zmq # --disable-largefile \ -# disabled zmq # --disable-tls \ -# disabled zmq # --disable-libmount \ -# disabled zmq # --disable-mountpoint \ -# disabled zmq # --disable-nls \ -# disabled zmq # --disable-rpath \ -# disabled zmq # --disable-agetty \ -# disabled zmq # --disable-cramfs \ -# disabled zmq # --disable-switch_root \ -# disabled zmq # --disable-pivot_root \ -# disabled zmq # --disable-fallocate \ -# disabled zmq # --disable-unshare \ -# disabled zmq # --disable-rename \ -# disabled zmq # --disable-schedutils \ -# disabled zmq # --disable-libblkid \ -# disabled zmq # --disable-wall CFLAGS='-fPIC' -# disabled zmq # -# disabled zmq # # --disable-makeinstall-chown \ -# disabled zmq # # --disable-makeinstall-setuid \ -# disabled zmq # -# disabled zmq # # --disable-chsh-only-listed -# disabled zmq # # --disable-pg-bell let pg not ring the bell on invalid keys -# disabled zmq # # --disable-require-password -# disabled zmq # # --disable-use-tty-group do not install wall and write setgid tty -# disabled zmq # # --disable-makeinstall-chown -# disabled zmq # # --disable-makeinstall-setuid -# disabled zmq # fi -# disabled zmq # -# disabled zmq # (cd libuuid;make install) -# disabled zmq # # make -# disabled zmq # # make install -# disabled zmq # cp $PREFIX/include/uuid/uuid.h $PREFIX/include/uuid.h -# disabled zmq # fi -# disabled zmq # -# disabled zmq # -# disabled zmq # cd $BUILDHOME -# disabled zmq # -# disabled zmq # if ! [[ -e ${ZEROMQ}${zpatchlev}.tar.gz ]] ; then -# disabled zmq # wget http://download.zeromq.org/${ZEROMQ}${zpatchlev}.tar.gz -# disabled zmq # fi -# disabled zmq # -# disabled zmq # if [[ -e ${ZEROMQ}${zpatchlev}.tar.gz ]] ; then -# disabled zmq # tar xfz ${ZEROMQ}.tar.gz -# disabled zmq # cd ${ZEROMQ} -# disabled zmq # ln -s $PREFIX/include/uuid src -# disabled zmq # # LDFLAGS=-L$PREFIX/lib ./configure --prefix=$PREFIX -# disabled zmq # -# disabled zmq # ./configure --enable-static --prefix=$PREFIX --with-uuid=$PREFIX LDFLAGS="-L$PREFIX/lib" CPPFLAGS="-fPIC -I$PREFIX/include" LIBS="-lgcc" -# disabled zmq # # --disable-shared CPPFLAGS="-fPIC -# disabled zmq # # LDFLAGS="-L/usr/lib64 -L$PREFIX/lib" ./configure --enable-static --prefix=$PREFIX -# disabled zmq # make -# disabled zmq # make install -# disabled zmq # CSC_OPTIONS="-I$PREFIX/include -L$CSCLIBS" $CHICKEN_INSTALL $PROX zmq -# disabled zmq # # CSC_OPTIONS="-I$PREFIX/include -L$CSCLIBS" $CHICKEN_INSTALL $PROX -deploy -prefix $DEPLOYTARG zmq -# disabled zmq # fi -# disabled zmq # fi # if zmq is in /usr/lib -# disabled zmq # +# NB// Removed bunch of zmq compiling tricks. Look at older versions of this file if you need to recreate... + cd $BUILDHOME git clone https://bitbucket.org/DerGuteMoritz/zmq/commits/branch/3.2 zmq-3.2 cd zmq-3.2 chicken-install Index: utils/loadrunner ================================================================== --- utils/loadrunner +++ utils/loadrunner @@ -23,7 +23,7 @@ echo "Load acceptable: lperc=$lperc %, max_load=$max_load %, load=$load, numcpu=$numcpu, MAX_ALLOWED_LOAD=$MAX_ALLOWED_LOAD %" echo "Starting command: \"$@\"" nbfake "$@" else # echo "Load too high: lperc=$lperc, max_load=$max_load, waiting two minutes before trying to run command: \"$@\"" - echo "nbload $@" | at now + 2 minutes 2> /dev/null + echo "loadrunner $@" | at now + 2 minutes 2> /dev/null fi DELETED zmq-transport.scm Index: zmq-transport.scm ================================================================== --- zmq-transport.scm +++ /dev/null @@ -1,494 +0,0 @@ -;;====================================================================== -;; Copyright 2006-2012, Matthew Welland. -;; -;; This program is made available under the GNU GPL version 2.0 or -;; greater. See the accompanying file COPYING for details. -;; -;; This program is distributed WITHOUT ANY WARRANTY; without even the -;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR -;; PURPOSE. -;;====================================================================== - -(require-extension (srfi 18) extras tcp s11n) - -(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest) -(import (prefix sqlite3 sqlite3:)) - -(use zmq) - -(declare (unit zmq-transport)) - -(declare (uses common)) -(declare (uses db)) -(declare (uses tests)) -(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. -(declare (uses server)) - -(include "common_records.scm") -(include "db_records.scm") - -;; Transition to pub --> sub with pull <-- push -;; -;; 1. client sends request to server via push to the pull port -;; 2. server puts request in queue or processes immediately as appropriate -;; 3. server puts responses from completed requests into pub port -;; -;; TODO -;; -;; Done Tested -;; [x] [ ] 1. Add columns pullport pubport to servers table -;; [x] [ ] 2. Add rm of monitor.db if older than 11/12/2012 -;; [x] [ ] 3. Add create of pullport and pubport with finding of available ports -;; [x] [ ] 4. Add client compose of request -;; [x] [ ] - name of client: testname/itempath-test_id-hostname -;; [x] [ ] - name of request: callname, params -;; [x] [ ] - request key: f(clientname, callname, params) -;; [x] [ ] 5. Add processing of subscription hits -;; [x] [ ] - done when get key -;; [x] [ ] - return results -;; [x] [ ] 6. Add timeout processing -;; [x] [ ] - after 60 seconds -;; [ ] [ ] i. check server alive, connect to new if necessary -;; [ ] [ ] ii. resend request -;; [ ] [ ] 7. Turn self ping back on - -(define (zmq-transport:make-server-url hostport) - (if (not hostport) - #f - (conc "tcp://" (car hostport) ":" (cadr hostport)))) - -(define *server-loop-heart-beat* (current-seconds)) -(define *heartbeat-mutex* (make-mutex)) - -;;====================================================================== -;; S E R V E R -;;====================================================================== - -(define-inline (zmqsock:get-pub dat)(vector-ref dat 0)) -(define-inline (zmqsock:get-pull dat)(vector-ref dat 1)) -(define-inline (zmqsock:set-pub! dat s)(vector-set! dat s 0)) -(define-inline (zmqsock:set-pull! dat s)(vector-set! dat s 0)) - -(define (zmq-transport:run hostn) - (debug:print 2 "Attempting to start the server ...") - (if (not *toppath*) - (if (not (setup-for-run)) - (begin - (debug:print 0 "ERROR: cannot find megatest.config, cannot start server, exiting") - (exit)))) - (let* ((db (open-db)) ;; here we *do not* want to be opening and closing the db - (zmq-sdat1 #f) - (zmq-sdat2 #f) - (pull-socket #f) - (pub-socket #f) - (p1 #f) - (p2 #f) - (zmq-sockets-dat #f) - (iface (if (string=? "-" hostn) - "*" ;; (get-host-name) - hostn)) - (hostname (get-host-name)) - (ipaddrstr (let ((ipstr (if (string=? "-" hostn) - (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") - #f))) - (if ipstr ipstr hostname))) - (last-run 0)) - (set! zmq-sockets-dat (zmq-transport:setup-ports ipaddrstr (if (args:get-arg "-port") - (string->number (args:get-arg "-port")) - (+ 5000 (random 1001))))) - - (set! zmq-sdat1 (car zmq-sockets-dat)) - (set! pull-socket (cadr zmq-sdat1)) ;; (iface s port) - (set! p1 (caddr zmq-sdat1)) - - (set! zmq-sdat2 (cadr zmq-sockets-dat)) - (set! pub-socket (cadr zmq-sdat2)) - (set! p2 (caddr zmq-sdat2)) - - (set! *cache-on* #t) - - (set! *runremote* (vector pull-socket pub-socket)) ;; overloading the use of *runremote* BUG!? - - ;; what to do when we quit - ;; -;; (on-exit (lambda () -;; (if (and *toppath* *server-info*) -;; (open-run-close tasks:server-deregister-self tasks:open-db (car *server-info*)) -;; (let loop () -;; (let ((queue-len 0)) -;; (thread-sleep! (random 5)) -;; (mutex-lock! *incoming-mutex*) -;; (set! queue-len (length *incoming-data*)) -;; (mutex-unlock! *incoming-mutex*) -;; (if (> queue-len 0) -;; (begin -;; (debug:print-info 0 "Queue not flushed, waiting ...") -;; (loop)))))))) - - ;; The heavy lifting - ;; - ;; make-vector-record cdb packet client-sig qtype immediate query-sig params qtime - ;; - (debug:print-info 11 "Server setup complete, start listening for messages") - (let loop ((queue-lst '())) - (let* ((rawmsg (receive-message* pull-socket)) - (packet (db:string->obj rawmsg)) - (qtype (cdb:packet-get-qtype packet))) - (debug:print-info 12 "server=> received packet=" packet) - (if (not (member qtype '(sync ping))) - (begin - (mutex-lock! *heartbeat-mutex*) - (set! *last-db-access* (current-seconds)) - (mutex-unlock! *heartbeat-mutex*))) - (if #t ;; (cdb:packet-get-immediate packet) ;; process immediately or put in queue - (begin - (db:process-queue-item db packet) - ;; (open-run-close db:process-queue #f pub-socket (cons packet queue-lst)) - - (loop '())) - (loop (cons packet queue-lst))))))) - -;; run zmq-transport:keep-running in a parallel thread to monitor that the db is being -;; used and to shutdown after sometime if it is not. -;; -(define (zmq-transport:keep-running) - ;; if none running or if > 20 seconds since - ;; server last used then start shutdown - ;; This thread waits for the server to come alive - (let* ((server-info (let loop () - (let ((sdat #f)) - (mutex-lock! *heartbeat-mutex*) - (set! sdat *server-info*) - (mutex-unlock! *heartbeat-mutex*) - (if sdat sdat - (begin - (debug:print 12 "WARNING: server not started yet, waiting few seconds before trying again") - (sleep 4) - (loop)))))) - (iface (cadr server-info)) - (pullport (caddr server-info)) - (pubport (cadddr server-info)) ;; id interface pullport pubport) - ;; (zmq-sockets (zmq-transport:client-connect iface pullport pubport)) - (last-access 0)) - (debug:print-info 11 "heartbeat started for zmq server on " iface " " pullport " " pubport) - (let loop ((count 0)) - (thread-sleep! 4) ;; no need to do this very often - ;; NB// sync currently does NOT return queue-length - ;; GET REAL QUEUE LENGTH FROM THE VARIABLE - (let ((queue-len 0)) ;; FOR NOW DO NOT DO THIS (cdb:client-call zmq-sockets 'sync #t 1))) - ;; (print "Server running, count is " count) - (if (< count 1) ;; 3x3 = 9 secs aprox - (loop (+ count 1))) - - ;; NOTE: Get rid of this mechanism! It really is not needed... - (open-run-close tasks:server-update-heartbeat tasks:open-db (car server-info)) - - ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access - (mutex-lock! *heartbeat-mutex*) - (set! last-access *last-db-access*) - (mutex-unlock! *heartbeat-mutex*) - (if (> (+ last-access - ;; (* 50 60 60) ;; 48 hrs - ;; 60 ;; one minute - ;; (* 60 60) ;; one hour - (* 45 60) ;; 45 minutes, until the db deletion bug is fixed. - ) - (current-seconds)) - (begin - (debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) - (loop 0)) - (begin - (debug:print-info 0 "Starting to shutdown the server.") - ;; need to delete only *my* server entry (future use) - (set! *time-to-exit* #t) - (open-run-close tasks:server-deregister-self tasks:open-db (get-host-name)) - (thread-sleep! 1) - (debug:print-info 0 "Max cached queries was " *max-cache-size*) - (debug:print-info 0 "Server shutdown complete. Exiting") - (exit))))))) - -(define (zmq-transport:find-free-port-and-open iface s port stype #!key (trynum 50)) - (let ((s (if s s (make-socket stype))) - (p (if (number? port) port 5555)) - (old-handler (current-exception-handler))) - (handle-exceptions - exn - (begin - (debug:print 0 "Failed to bind to port " p ", trying next port") - (debug:print 0 " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn)) - ;; (old-handler) - ;; (print-call-chain) - (if (> trynum 0) - (zmq-transport:find-free-port-and-open iface s (+ p 1) trynum: (- trynum 1)) - (debug:print-info 0 "Tried ports up to " p - " but all were in use. Please try a different port range by starting the server with parameter \" -port N\" where N is the starting port number to use")) - (exit)) ;; To exit or not? That is the question. - (let ((zmq-url (conc "tcp://" iface ":" p))) - (debug:print 2 "Trying to start server on " zmq-url) - (bind-socket s zmq-url) - (list iface s port))))) - -(define (zmq-transport:setup-ports ipaddrstr startport) - (let* ((s1 (zmq-transport:find-free-port-and-open ipaddrstr #f startport 'pull)) - (p1 (caddr s1)) - (s2 (zmq-transport:find-free-port-and-open ipaddrstr #f (+ 1 (if p1 p1 (+ startport 1))) 'pub)) - (p2 (caddr s2))) - (set! *runremote* #f) - (debug:print 0 "Server started on " ipaddrstr " ports " p1 " and " p2) - (mutex-lock! *heartbeat-mutex*) - (set! *server-info* (open-run-close tasks:server-register - tasks:open-db - (current-process-id) - ipaddrstr p1 - 0 - 'live - 'zmq - pubport: p2)) - (debug:print-info 11 "*server-info* set to " *server-info*) - (mutex-unlock! *heartbeat-mutex*) - (list s1 s2))) - -(define (zmq-transport:mk-signature) - (message-digest-string (md5-primitive) - (with-output-to-string - (lambda () - (write (list (current-directory) - (argv))))))) - -;;====================================================================== -;; S E R V E R U T I L I T I E S -;;====================================================================== - -;;====================================================================== -;; C L I E N T S -;;====================================================================== - -;; -(define (zmq-transport:client-socket-connect iface port #!key (context #f)(type 'req)(subscriptions '())) - (debug:print-info 3 "client-connect " iface ":" port ", type=" type ", subscriptions=" subscriptions) - (let ((connect-ok #f) - (zmq-socket (if context - (make-socket type context) - (make-socket type))) - (conurl (zmq-transport:make-server-url (list iface port)))) - (if (socket? zmq-socket) - (begin - ;; first apply subscriptions - (for-each (lambda (subscription) - (debug:print 2 "Subscribing to " subscription) - (socket-option-set! zmq-socket 'subscribe subscription)) - subscriptions) - (connect-socket zmq-socket conurl) - zmq-socket) - (begin - (debug:print 0 "ERROR: Failed to open socket to " conurl) - #f)))) - -(define (zmq-transport:client-connect iface pullport pubport) - (let* ((push-socket (zmq-transport:client-socket-connect iface pullport type: 'push)) - (sub-socket (zmq-transport:client-socket-connect iface pubport - type: 'sub - subscriptions: (list (client:get-signature) "all"))) - (zmq-sockets (vector push-socket sub-socket)) - (login-res #f)) - (debug:print-info 11 "zmq-transport:client-connect started. Next is login") - (set! login-res (client:login serverdat zmq-sockets)) - (if (and (not (null? login-res)) - (car login-res)) - (begin - (debug:print-info 2 "Logged in and connected to " iface ":" pullport "/" pubport ".") - (set! *runremote* zmq-sockets) - zmq-sockets) - (begin - (debug:print-info 2 "Failed to login or connect to " conurl) - (set! *runremote* #f) - #f)))) - -;; run zmq-transport:keep-running in a parallel thread to monitor that the db is being -;; used and to shutdown after sometime if it is not. -;; -(define (zmq-transport:keep-running) - ;; if none running or if > 20 seconds since - ;; server last used then start shutdown - ;; This thread waits for the server to come alive - (let* ((server-info (let loop () - (let ((sdat #f)) - (mutex-lock! *heartbeat-mutex*) - (set! sdat *runremote*) - (mutex-unlock! *heartbeat-mutex*) - (if sdat sdat - (begin - (sleep 4) - (loop)))))) - (iface (car server-info)) - (port (cadr server-info)) - (last-access 0) - (tdb (tasks:open-db)) - (spid (tasks:server-get-server-id tdb #f iface port #f))) - (print "Keep-running got server pid " spid ", using iface " iface " and port " port) - (let loop ((count 0)) - (thread-sleep! 4) ;; no need to do this very often - ;; NB// sync currently does NOT return queue-length - (let () ;; (queue-len (cdb:client-call server-info 'sync #t 1))) - ;; (print "Server running, count is " count) - (if (< count 1) ;; 3x3 = 9 secs aprox - (loop (+ count 1))) - - ;; NOTE: Get rid of this mechanism! It really is not needed... - (tasks:server-update-heartbeat tdb spid) - - ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access - (mutex-lock! *heartbeat-mutex*) - (set! last-access *last-db-access*) - (mutex-unlock! *heartbeat-mutex*) - (if (> (+ last-access - ;; (* 50 60 60) ;; 48 hrs - ;; 60 ;; one minute - ;; (* 60 60) ;; one hour - (* 45 60) ;; 45 minutes, until the db deletion bug is fixed. - ) - (current-seconds)) - (begin - (debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) - (loop 0)) - (begin - (debug:print-info 0 "Starting to shutdown the server.") - ;; need to delete only *my* server entry (future use) - (set! *time-to-exit* #t) - (tasks:server-deregister-self tdb (get-host-name)) - (thread-sleep! 1) - (debug:print-info 0 "Max cached queries was " *max-cache-size*) - (debug:print-info 0 "Server shutdown complete. Exiting") - (exit))))))) - -;; all routes though here end in exit ... -(define (zmq-transport:launch) - (if (not *toppath*) - (if (not (setup-for-run)) - (begin - (debug:print 0 "ERROR: cannot find megatest.config, exiting") - (exit)))) - (debug:print-info 2 "Starting zmq server") - (if *toppath* - (let* (;; (th1 (make-thread (lambda () - ;; (let ((server-info #f)) - ;; ;; wait for the server to be online and available - ;; (let loop () - ;; (debug:print-info 2 "Waiting for the server to come online before starting heartbeat") - ;; (thread-sleep! 2) - ;; (mutex-lock! *heartbeat-mutex*) - ;; (set! server-info *server-info* ) - ;; (mutex-unlock! *heartbeat-mutex*) - ;; (if (not server-info)(loop))) - ;; (debug:print 2 "Server alive, starting self-ping") - ;; (zmq-transport:self-ping server-info) - ;; )) - ;; "Self ping")) - (th2 (make-thread (lambda () - (zmq-transport:run - (if (args:get-arg "-server") - (args:get-arg "-server") - "-"))) "Server run")) - ;; (th3 (make-thread (lambda ()(zmq-transport:keep-running)) "Keep running")) - ) - (set! *client-non-blocking-mode* #t) - ;; (thread-start! th1) - (thread-start! th2) - ;; (thread-start! th3) - (set! *didsomething* #t) - ;; (thread-join! th3) - (thread-join! th2) - ) - (debug:print 0 "ERROR: Failed to setup for megatest"))) - -(define (zmq-transport:client-signal-handler signum) - (handle-exceptions - exn - (debug:print " ... exiting ...") - (let ((th1 (make-thread (lambda () - (if (not *received-response*) - (receive-message* *runremote*))) ;; flush out last call if applicable - "eat response")) - (th2 (make-thread (lambda () - (debug:print 0 "ERROR: Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") - (thread-sleep! 3) ;; give the flush three seconds to do it's stuff - (debug:print 0 " Done.") - (exit 4)) - "exit on ^C timer"))) - (thread-start! th2) - (thread-start! th1) - (thread-join! th2)))) - -(define (zmq-transport:client-launch) - (set-signal-handler! signal/int zmq-transport:client-signal-handler) - (if (zmq-transport:client-setup) - (debug:print-info 2 "connected as client") - (begin - (debug:print 0 "ERROR: Failed to connect as client") - (exit)))) - -;;====================================================================== -;; Defunct functions -;;====================================================================== - -;; ping a server and return number of clients or #f (if no response) -;; NOT IN USE! -(define (zmq-transport:ping host port #!key (secs 10)(return-socket #f)) - (cdb:use-non-blocking-mode - (lambda () - (let* ((res #f) - (th1 (make-thread - (lambda () - (let* ((zmq-context (make-context 1)) - (zmq-socket (zmq-transport:client-connect host port context: zmq-context))) - (if zmq-socket - (if (zmq-transport:client-login zmq-socket) - (let ((numclients (cdb:num-clients zmq-socket))) - (if (not return-socket) - (begin - (zmq-transport:client-logout zmq-socket) - (close-socket zmq-socket))) - (set! res (list #t numclients (if return-socket zmq-socket #f)))) - (begin - ;; (close-socket zmq-socket) - (set! res (list #f "CAN'T LOGIN" #f)))) - (set! res (list #f "CAN'T CONNECT" #f))))) - "Ping: th1")) - (th2 (make-thread - (lambda () - (let loop ((count 1)) - (debug:print-info 1 "Ping " count " server on " host " at port " port) - (thread-sleep! 2) - (if (< count (/ secs 2)) - (loop (+ count 1)))) - ;; (thread-terminate! th1) - (set! res (list #f "TIMED OUT" #f))) - "Ping: th2"))) - (thread-start! th2) - (thread-start! th1) - (handle-exceptions - exn - (set! res (list #f "TIMED OUT" #f)) - (thread-join! th1 secs)) - res)))) - -;; (define (zmq-transport:self-ping server-info) -;; ;; server-info: server-id interface pullport pubport -;; (let ((iface (list-ref server-info 1)) -;; (pullport (list-ref server-info 2)) -;; (pubport (list-ref server-info 3))) -;; (zmq-transport:client-connect iface pullport pubport) -;; (let loop () -;; (thread-sleep! 2) -;; (cdb:client-call *runremote* 'ping #t) -;; (debug:print 4 "zmq-transport:self-ping - I'm alive on " iface ":" pullport "/" pubport "!") -;; (mutex-lock! *heartbeat-mutex*) -;; (set! *server-loop-heart-beat* (current-seconds)) -;; (mutex-unlock! *heartbeat-mutex*) -;; (loop)))) - -(define (zmq-transport:reply pubsock target query-sig success/fail result) - (debug:print-info 11 "zmq-transport:reply target=" target ", result=" result) - (send-message pubsock target send-more: #t) - (send-message pubsock (db:obj->string (vector success/fail query-sig result)))) -