Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -4,11 +4,11 @@
INSTALL=install
SRCFILES = common.scm items.scm launch.scm \
ods.scm runconfig.scm server.scm configf.scm \
db.scm keys.scm margs.scm megatest-version.scm \
process.scm runs.scm tasks.scm tests.scm genexample.scm \
- fs-transport.scm http-transport.scm filedb.scm \
+ http-transport.scm filedb.scm \
client.scm gutils.scm synchash.scm daemon.scm mt.scm dcommon.scm \
tree.scm ezsteps.scm lock-queue.scm sdb.scm \
rmt.scm api.scm tdb.scm
GUISRCF = dashboard-tests.scm dashboard-guimonitor.scm
@@ -62,10 +62,11 @@
tests.o runs.o dashboard.o dashboard-tests.o dashboard-main.o : run_records.scm
db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm
tests.o tasks.o dashboard-tasks.o : task_records.scm
runs.o : test_records.scm
megatest.o : megatest-fossil-hash.scm
+client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm fs-transport.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm zmq-transport.scm : common_records.scm
# Temporary while transitioning to new routine
# runs.o : run-tests-queue-classic.scm run-tests-queue-new.scm
megatest-fossil-hash.scm : $(SRCFILES) megatest.scm *_records.scm
Index: api.scm
==================================================================
--- api.scm
+++ api.scm
@@ -15,10 +15,30 @@
;; These are called by the server on recipt of /api calls
(define (api:execute-requests dbstruct cmd params)
(case (string->symbol cmd)
+ ;; SERVERS
+ ((start-server) (apply server:kind-run params))
+ ;; ((kill-server)
+ ;; (db:sync-tables (db:tbls *inmemdb*) *inmemdb* *db*) ;; (db:sync-to *inmemdb* *db*)
+ ;; (let ((hostname (car *runremote*))
+ ;; (port (cadr *runremote*))
+ ;; (pid (if (null? params) #f (car params)))
+ ;; (th1 (make-thread (lambda ()(thread-sleep! 3)(debug:print 0 "Server exiting!")(exit 0)) "Server exit thread")))
+ ;; (debug:print 0 "WARNING: Server on " hostname ":" port " going down by user request!")
+ ;; (debug:print-info 1 "current pid=" (current-process-id))
+ ;; (open-run-close tasks:server-deregister tasks:open-db
+ ;; hostname
+ ;; port: port)
+ ;; (set! *server-run* #f)
+ ;; (thread-sleep! 3)
+ ;; (if pid
+ ;; (process-signal pid signal/kill)
+ ;; (thread-start! th1))
+ ;; '(#t "exit process started")))
+
;; KEYS
((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params))
((get-keys) (db:get-keys dbstruct))
;; TESTS
@@ -51,10 +71,12 @@
((get-tests-for-runs-mindata) (apply db:get-tests-for-runs-mindata dbstruct params))
((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params))
((delete-run) (apply db:delete-run dbstruct params))
((get-runs) (apply db:get-runs dbstruct params))
((get-all-run-ids) (db:get-all-run-ids dbstruct))
+ ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params))
+ ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params))
((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params))
((lock/unlock-run) (apply db:lock/unlock-run dbstruct params))
((update-run-event_time) (apply db:update-run-event_time dbstruct params))
;; STEPS
@@ -72,27 +94,10 @@
(realparams (cddr params)))
(db:with-db dbstruct run-id #t ;; these are all for modifying the db
(lambda (db)
(db:general-call db stmtname realparams)))))
((sync-inmem->db) (db:sync-touched dbstruct run-id force-sync: #t))
- ;; ((kill-server)
- ;; (db:sync-tables (db:tbls *inmemdb*) *inmemdb* *db*) ;; (db:sync-to *inmemdb* *db*)
- ;; (let ((hostname (car *runremote*))
- ;; (port (cadr *runremote*))
- ;; (pid (if (null? params) #f (car params)))
- ;; (th1 (make-thread (lambda ()(thread-sleep! 3)(debug:print 0 "Server exiting!")(exit 0)) "Server exit thread")))
- ;; (debug:print 0 "WARNING: Server on " hostname ":" port " going down by user request!")
- ;; (debug:print-info 1 "current pid=" (current-process-id))
- ;; (open-run-close tasks:server-deregister tasks:open-db
- ;; hostname
- ;; port: port)
- ;; (set! *server-run* #f)
- ;; (thread-sleep! 3)
- ;; (if pid
- ;; (process-signal pid signal/kill)
- ;; (thread-start! th1))
- ;; '(#t "exit process started")))
((sdb-qry) (apply sdb:qry params))
;; TESTMETA
((testmeta-get-record) (apply db:testmeta-get-record dbstruct params))
((testmeta-add-record) (apply db:testmeta-add-record dbstruct params))
Index: client.scm
==================================================================
--- client.scm
+++ client.scm
@@ -53,62 +53,86 @@
;;
;; client:setup
;;
;; lookup_server, need to remove *runremote* stuff
;;
-(define (client:setup run-id #!key (remaining-tries 10))
+(define (client:setup run-id #!key (remaining-tries 10) (failed-connects 0))
(if (<= remaining-tries 0)
(begin
(debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id)
(exit 1))
- (let ((server-dat (and run-id (hash-table-ref/default *runremote* run-id #f))))
+ (let ((host-info (hash-table-ref/default *runremote* run-id #f)))
+ (debug:print-info 0 "client:setup host-info=" host-info ", remaining-tries=" remaining-tries)
(thread-sleep! 1) ;; try to avoid race conditons
- (if server-dat
- (let ((new-dat (http-transport:client-connect run-id ;; NB// confusion over server-dat and connection result!
- (car server-dat)
- (cadr server-dat))))
- (if new-dat ;; sucessful login?
- new-dat
- (begin ;; login failed
- (debug:print 0 "INFO: login failed in client:setup with existing server-dat: " server-dat ", new-dat: " new-dat ", cleaning out records and then trying again")
- (hash-table-delete! *runremote* run-id)
- (open-run-close tasks:server-force-clean-run-record
- tasks:open-db
- run-id
- (car server-dat)
- (cadr server-dat))
- (thread-sleep! 5)
- (client:setup run-id remaining-tries: (- remaining-tries 1)))))
- (let* ((server-info (open-run-close tasks:get-server tasks:open-db run-id)))
- (if server-info
- (let ((new-dat (http-transport:client-connect run-id
- (tasks:hostinfo-get-interface server-info)
- (tasks:hostinfo-get-port server-info))))
+ (if host-info
+ (let* ((iface (car host-info))
+ (port (cadr host-info))
+ (start-res (http-transport:client-connect iface port))
+ ;; (ping-res (server:ping-server run-id iface port))
+ (ping-res (rmt:login-no-auto-client-setup start-res run-id)))
+ (if ping-res ;; sucessful login?
+ start-res) ;; return the server info
+ (if (member remaining-tries '(3 4 6))
+ (begin ;; login failed
+ (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" host-info)
+ (hash-table-delete! *runremote* run-id)
+ (open-run-close tasks:server-force-clean-run-record
+ tasks:open-db
+ run-id
+ (car host-info)
+ (cadr host-info)
+ " client:setup (host-info=#t)")
+ (thread-sleep! 5)
+ (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1)))
+ (begin
+ (debug:print 25 "INFO: client:setup failed to connect, start-res=" start-res ", run-id=" run-id ", host-info=" host-info)
+ (thread-sleep! 5)
+ (client:setup run-id remaining-tries: (- remaining-tries 1))))))
+ ;; YUK: rename server-dat here
(if new-dat
new-dat
- (begin ;; login failed
- (debug:print 0 "INFO: login failed in client:setup with no existing server-dat: " server-dat ", new-dat: " new-dat ", and server-info: " server-info ", cleaning out records and then trying again")
- (hash-table-delete! *runremote* run-id)
- (open-run-close tasks:server-force-clean-run-record
- tasks:open-db
- run-id
- (tasks:hostinfo-get-interface server-dat)
- (tasks:hostinfo-get-port server-dat))
- ;; (thread-sleep! 2)
- (server:try-running run-id)
- (thread-sleep! 5) ;; give server a little time to start up
- (client:setup run-id remaining-tries: (- remaining-tries 1)))))
+ (debug:print-info 0 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
+ (let* ((iface (tasks:hostinfo-get-interface server-dat))
+ (port (tasks:hostinfo-get-port server-dat))
+ (start-res (http-transport:client-connect iface port))
+ ;; (ping-res (server:ping-server run-id iface port))
+ (ping-res (rmt:login-no-auto-client-setup start-res run-id)))
+ (if (member remaining-tries '(2 5))
+ (begin ;; login failed
+ (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat)
+ ;;(debug:print 0 "INFO: login failed in client:setup with no existing server-dat: " server-dat ", new-dat: " new-dat ", and server-info: " server-info ", cleaning out records and then trying again")
+ (hash-table-delete! *runremote* run-id)
+ (open-run-close tasks:server-force-clean-run-record
+ tasks:open-db
+ run-id
+ (tasks:hostinfo-get-interface server-dat)
+ (tasks:hostinfo-get-port server-dat)
+ " client:setup (server-dat = #t)")
+ (server:try-running run-id)
+ (thread-sleep! 10) ;; give server a little time to start up
+ (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1)))
+ (begin
+ (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat)
+ (thread-sleep! 5)
+ (client:setup run-id remaining-tries: (- remaining-tries 1))))))
(begin ;; no server registered
- ;; (thread-sleep! 2)
- (server:try-running run-id)
- (thread-sleep! 5) ;; give server a little time to start up
- (client:setup run-id remaining-tries: (- remaining-tries 1)))))))))
+ (if (eq? remaining-tries 2)
+ (begin
+ ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)")
+ (client:setup run-id remaining-tries: 10))
+ (begin
+ (debug:print 25 "INFO: client:setup start-res (not defined here), run-id=" run-id ", server-dat=" server-dat)
+ (if (< (open-run-close tasks:num-in-available-state tasks:open-db run-id) 3)
+ (begin
+ ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)")
+ (server:try-running run-id)))
+ (thread-sleep! 10) ;; give server a little time to start up
+ (client:setup run-id remaining-tries: (- remaining-tries 1)))))))))))
;; keep this as a function to ease future
(define (client:start run-id server-info)
- (http-transport:client-connect run-id
- (tasks:hostinfo-get-interface server-info)
+ (http-transport:client-connect (tasks:hostinfo-get-interface server-info)
(tasks:hostinfo-get-port server-info)))
;; client:signal-handler
(define (client:signal-handler signum)
(handle-exceptions
Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -23,10 +23,17 @@
;; (require-library margs)
;; (include "margs.scm")
(define getenv get-environment-variable)
+(define (safe-setenv key val)
+ (if (and (string? val)(string? key))
+ (handle-exceptions
+ exn
+ (debug:print 0 "ERROR: bad value for setenv, key=" key ", value=" val)
+ (setenv key val))
+ (debug:print 0 "ERROR: bad value for setenv, key=" key ", value=" val)))
(define home (getenv "HOME"))
(define user (getenv "USER"))
;; GLOBAL GLETCHES
@@ -60,10 +67,11 @@
(define *default-numtries* 10)
(define *server-run* #t)
(define *db-write-access* #t)
(define *inmemdb* #f)
(define *run-id* #f)
+(define *server-kind-run* (make-hash-table))
(define *target* (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys* (make-hash-table)) ;; cache the keys here
(define *keyvals* (make-hash-table))
(define *toptest-paths* (make-hash-table)) ;; cache toptest path settings here
@@ -99,10 +107,17 @@
;; Generic string database (normalization of sorts)
(define sdb:qry #f) ;; (make-sdb:qry)) ;; 'init #f)
;; Generic path database (normalization of sorts)
(define *fdb* #f)
+
+;;======================================================================
+;; U S E F U L S T U F F
+;;======================================================================
+
+(define (common:get-megatest-exe)
+ (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest"))
;;======================================================================
;; S T A T E S A N D S T A T U S E S
;;======================================================================
Index: common_records.scm
==================================================================
--- common_records.scm
+++ common_records.scm
@@ -6,28 +6,33 @@
;;
;; This program is distributed WITHOUT ANY WARRANTY; without even the
;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;; PURPOSE.
;;======================================================================
+
+(use trace)
(define (debug:calc-verbosity vstr)
(cond
- (vstr
- (let ((debugvals (string-split vstr ",")))
- (if (> (length debugvals) 1)
- (map string->number debugvals)
- (string->number (car debugvals)))))
- ((args:get-arg "-v") 2)
+ ((number? vstr) vstr)
+ ((not (string? vstr)) 1)
+ ;; ((string-match "^\\s*$" vstr) 1)
+ (vstr (let ((debugvals (filter number? (map string->number (string-split vstr ",")))))
+ (cond
+ ((> (length debugvals) 1) debugvals)
+ ((> (length debugvals) 0)(car debugvals))
+ (else 1))))
+ ((args:get-arg "-v") 2)
((args:get-arg "-q") 0)
(else 1)))
;; check verbosity, #t is ok
(define (debug:check-verbosity verbosity vstr)
(if (not (or (number? verbosity)
(list? verbosity)))
(begin
- (print "ERROR: Invalid debug value " vstr)
+ (print "ERROR: Invalid debug value \"" vstr "\"")
#f)
#t))
(define (debug:debug-mode n)
(or (and (number? *verbosity*)
@@ -38,10 +43,12 @@
(define (debug:setup)
(let ((debugstr (or (args:get-arg "-debug")
(getenv "MT_DEBUG_MODE"))))
(set! *verbosity* (debug:calc-verbosity debugstr))
(debug:check-verbosity *verbosity* debugstr)
+ ;; if we were handed a bad verbosity rule then we will override it with 1 and continue
+ (if (not *verbosity*)(set! *verbosity* 1))
(if (or (args:get-arg "-debug")
(not (getenv "MT_DEBUG_MODE")))
(setenv "MT_DEBUG_MODE" (if (list? *verbosity*)
(string-intersperse (map conc *verbosity*) ",")
(conc *verbosity*))))))
@@ -51,10 +58,11 @@
(if (debug:debug-mode n)
(with-output-to-port (current-error-port)
(lambda ()
(if *logging*
(db:log-event (apply conc params))
+ ;; (apply print "pid:" (current-process-id) " " params)
(apply print params)
)))))
(define (debug:print-info n . params)
(if (debug:debug-mode n)
@@ -61,13 +69,14 @@
(with-output-to-port (current-error-port)
(lambda ()
(let ((res (format#format #f "INFO: (~2d) ~a" n (apply conc params))))
(if *logging*
(db:log-event res)
+ ;; (apply print "pid:" (current-process-id) " " "INFO: (" n ") " params) ;; res)
(apply print "INFO: (" n ") " params) ;; res)
))))))
;; if a value is printable (i.e. string or number) return the value
;; else return an empty string
(define-inline (printable val)
(if (or (number? val)(string? val)) val ""))
Index: configf.scm
==================================================================
--- configf.scm
+++ configf.scm
@@ -217,17 +217,11 @@
(envar (and environ-patt (string-search (regexp environ-patt) curr-section-name)))
(realval (if envar
(config:eval-string-in-environment val)
val)))
(debug:print-info 6 "read-config env setting, envar: " envar " realval: " realval " val: " val " key: " key " curr-section-name: " curr-section-name)
- (if envar
- (if (and (string? realval)(string? key))
- (handle-exceptions
- exn
- (debug:print 0 "ERROR: bad value for setenv, key=" key ", value=" realval)
- (setenv key realval))
- (debug:print 0 "ERROR: bad value for setenv, key=" key ", value=" realval)))
+ (if envar (safe-setenv key realval))
(hash-table-set! res curr-section-name
(config:assoc-safe-add alist key realval))
(loop (configf:read-line inp res allow-system) curr-section-name key #f)))
(configf:key-no-val ( x key val) (let* ((alist (hash-table-ref/default res curr-section-name '())))
(hash-table-set! res curr-section-name
Index: dashboard.scm
==================================================================
--- dashboard.scm
+++ dashboard.scm
@@ -88,20 +88,10 @@
(print "Failed to find megatest.config, exiting")
(exit 1)))
(define *dbstruct-local* (make-dbr:dbstruct path: *toppath* local: #t))
-;; (define sdb:qry (make-sdb:qry)) ;; 'init #f)
-
-;; (if (args:get-arg "-host")
-;; (begin
-;; (set! *runremote* (string-split (args:get-arg "-host" ":")))
-;; (client:launch))
-;; (if (not (args:get-arg "-use-server"))
-;; (set! *transport-type* 'fs) ;; force fs access
-;; (client:launch)))
-
;; HACK ALERT: this is a hack, please fix.
(define *read-only* (not (file-read-access? (conc *toppath* "db/main.db"))))
;; (client:setup *dbstruct-local*)
(define toplevel #f)
@@ -990,17 +980,21 @@
;; General info about the run(s) and megatest area
(define (dashboard:summary db)
(let ((rawconfig (read-config (conc *toppath* "/megatest.config") #f 'return-string)))
(iup:vbox
(iup:split
- ;; #:value 500
+ #:value 500
(iup:frame
#:title "General Info"
- (iup:hbox
- (dcommon:keys-matrix rawconfig)
- (dcommon:general-info)
- ))
+ (iup:vbox
+ (iup:hbox
+ (iup:label "Area Path")
+ (iup:textbox #:value *toppath* #:expand "HORIZONTAL"))
+ (iup:hbox
+ (dcommon:keys-matrix rawconfig)
+ (dcommon:general-info)
+ )))
(iup:frame
#:title "Server"
(dcommon:servers-table)))
(iup:frame
#:title "Megatest config settings"
Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -11,29 +11,21 @@
;;======================================================================
;; Database access
;;======================================================================
-(require-extension (srfi 18) extras tcp) ;; rpc)
-;; (import (prefix rpc rpc:))
-
+(require-extension (srfi 18) extras tcp)
(use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64 format)
(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:))
-;; Note, try to remove this dependency
-;; (use zmq)
-
(declare (unit db))
(declare (uses common))
(declare (uses keys))
(declare (uses ods))
-(declare (uses fs-transport))
(declare (uses client))
(declare (uses mt))
-;; (declare (uses sdb))
-;; (declare (uses filedb))
(include "common_records.scm")
(include "db_records.scm")
(include "key_records.scm")
(include "run_records.scm")
@@ -77,14 +69,22 @@
;; (db:with-db dbstruct run-id sqlite3:exec "select blah from blaz;")
;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no
;;
(define (db:with-db dbstruct run-id r/w proc . params)
- (let* ((db (db:get-db dbstruct run-id)))
+ (let* ((db (db:get-db dbstruct run-id))
+ )
+ ;; (proc2 (lambda ()
(let ((res (apply proc db params)))
(db:done-with dbstruct run-id r/w)
res)))
+;; (handle-exceptions
+;; exn
+;; (begin
+;; (thread-sleep! 10)
+;; (proc2))
+;; (proc2))))
;;======================================================================
;; K E E P F I L E D B I N dbstruct
;;======================================================================
@@ -460,12 +460,13 @@
(apply open-run-close-no-exception-handling proc idb params))
(apply open-run-close-no-exception-handling proc idb params)))
;; (define open-run-close
(define open-run-close ;; (if (debug:debug-mode 2)
- open-run-close-no-exception-handling)
- ;; open-run-close-exception-handling))
+ ;; open-run-close-no-exception-handling
+ open-run-close-exception-handling)
+;;)
(define (db:initialize-main-db db)
(let* ((configdat (car *configinfo*)) ;; tut tut, global warning...
(keys (keys:config-get-fields configdat))
(havekeys (> (length keys) 0))
@@ -642,11 +643,11 @@
(deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
(deadtime (if (and deadtime-str
(string->number deadtime-str))
(string->number deadtime-str)
7200)) ;; two hours
- (run-ids (db:get-run-ids db))) ;; iterate over runs to divy up the calls
+ (run-ids (db:get-all-run-ids db))) ;; iterate over runs to divy up the calls
(if (number? ovr-deadtime)(set! deadtime ovr-deadtime))
(for-each
(lambda (run-id)
;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
@@ -978,11 +979,11 @@
(define (db:get-targets dbstruct)
(let* ((res '())
(keys (db:get-keys dbstruct))
(header keys) ;; (map key:get-fieldname keys))
(keystr (keys->keystr keys))
- (qrystr (conc "SELECT " keystr " FROM runs;"))
+ (qrystr (conc "SELECT " keystr " FROM runs WHERE state != 'deleted';"))
(seen (make-hash-table)))
(sqlite3:for-each-row
(lambda (a . x)
(let ((targ (cons a x)))
(if (not (hash-table-ref/default seen targ #f))
@@ -1011,11 +1012,11 @@
(sqlite3:for-each-row
(lambda (run-id)
(set! run-ids (cons run-id run-ids)))
(db:get-db dbstruct #f)
"SELECT id FROM runs WHERE state != 'deleted';")
- run-ids))
+ (reverse run-ids)))
;; get some basic run stats
;;
;; ( (runname (( state count ) ... ))
;; ( ...
@@ -1026,11 +1027,11 @@
;; First get all the runname/run-ids
(sqlite3:for-each-row
(lambda (run-id runname)
(set! runs-info (cons (list run-id runname) runs-info)))
(db:get-db dbstruct #f)
- "SELECT id,runname FROM runs;")
+ "SELECT id,runname FROM runs WHERE state != 'deleted';")
;; for each run get stats data
(for-each
(lambda (run-info)
(let ((run-id (car run-info))
(run-name (cadr run-info)))
@@ -1138,26 +1139,24 @@
(sqlite3:execute (db:get-db dbstruct #f) "UPDATE runs SET state=? WHERE id=?;" newlockval run-id)
(sqlite3:execute (db:get-db dbstruct #f) "INSERT INTO access_log (user,accessed,args) VALUES(?,strftime('%s','now'),?);"
user (conc newlockval " " run-id))
(debug:print-info 1 "" newlockval " run number " run-id)))
-(define (db:get-all-run-ids dbstruct)
- (let ((res '()))
- (sqlite3:for-each-row
- (lambda (run-id)
- (set! res (cons run-id res)))
- (db:get-db dbstruct #f)
- "SELECT id FROM runs;")
- (reverse res)))
-
-(define (db:get-run-ids db)
- (let ((res '()))
- (sqlite3:for-each-row
- (lambda (id)
- (set! res (cons id res)))
+(define (db:set-run-status db run-id status #!key (msg #f))
+ (if msg
+ (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id)
+ (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id)))
+
+(define (db:get-run-status db run-id)
+ (let ((res "n/a"))
+ (sqlite3:for-each-row
+ (lambda (status)
+ (set! res status))
db
- "SELECT id FROM runs;")))
+ "SELECT status FROM runs WHERE id=?;"
+ run-id)
+ res))
;;======================================================================
;; K E Y S
;;======================================================================
@@ -1177,36 +1176,46 @@
keys)
(reverse res)))
;; get key vals for a given run-id
(define (db:get-key-vals dbstruct run-id)
- (let ((mykeyvals (hash-table-ref/default *keyvals* run-id #f)))
- (if mykeyvals
- mykeyvals
- (let* ((keys (db:get-keys dbstruct))
- (res '()))
- (for-each
- (lambda (key)
- (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;")))
- (sqlite3:for-each-row
- (lambda (key-val)
- (set! res (cons key-val res)))
- (db:get-db dbstruct #f) qry run-id)))
- keys)
- (let ((final-res (reverse res)))
- (hash-table-set! *keyvals* run-id final-res)
- final-res)))))
+ (let* ((keys (db:get-keys dbstruct))
+ (res '()))
+ (for-each
+ (lambda (key)
+ (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;")))
+ (sqlite3:for-each-row
+ (lambda (key-val)
+ (set! res (cons key-val res)))
+ (db:get-db dbstruct #f) qry run-id)))
+ keys)
+ (let ((final-res (reverse res)))
+ (hash-table-set! *keyvals* run-id final-res)
+ final-res)))
;; The target is keyval1/keyval2..., cached in *target* as it is used often
(define (db:get-target dbstruct run-id)
- (let ((mytarg (hash-table-ref/default *target* run-id #f)))
- (if mytarg
- mytarg
- (let* ((keyvals (db:get-key-vals dbstruct run-id))
- (thekey (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/")))
- (hash-table-set! *target* run-id thekey)
- thekey))))
+ (let* ((keyvals (db:get-key-vals dbstruct run-id))
+ (thekey (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/")))
+ thekey))
+
+;; Get run-ids for runs with same target but different runnames and NOT run-id
+;;
+(define (db:get-prev-run-ids dbstruct run-id)
+ (let* ((keyvals (rmt:get-key-val-pairs run-id))
+ (kvalues (map cadr keyvals))
+ (keys (rmt:get-keys))
+ (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")))
+ (let ((prev-run-ids '()))
+ (db:with-db dbstruct #f #f ;; #f means work with the zeroth db - i.e. the runs db
+ (lambda (db)
+ (apply sqlite3:for-each-row
+ (lambda (id)
+ (set! prev-run-ids (cons id prev-run-ids)))
+ db
+ (conc "SELECT id FROM runs WHERE " qrystr " AND state != 'deleted' AND id != ?;") (append kvalues (list run-id)))))
+ prev-run-ids)))
;;======================================================================
;; T E S T S
;;======================================================================
@@ -1487,11 +1496,11 @@
(lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir)
;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
(set! res (cons (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir)
res)))
(db:get-db dbstruct run-id)
- (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE run_id=?;")
+ (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE state != 'DELETED' AND run_id=?;")
run-id)
res))
(define (db:replace-test-records dbstruct run-id testrecs)
(db:with-db dbstruct run-id #t
@@ -1683,34 +1692,36 @@
;;======================================================================
;; Misc. test related queries
;;======================================================================
-(define (db:test-get-paths-matching-keynames-target-new dbstruct keynames target res testpatt statepatt statuspatt runname)
+(define (db:get-run-ids-matching-target dbstruct keynames target res runname testpatt statepatt statuspatt)
(let* ((row-ids '())
(keystr (string-intersperse
(map (lambda (key val)
(conc key " like '" val "'"))
keynames
(string-split target "/"))
" AND "))
- (testqry (tests:match->sqlqry testpatt))
- (runsqry (sqlite3:prepare (db:get-db dbstruct #f)(conc "SELECT id FROM runs WHERE " keystr " AND runname LIKE '" runname "';")))
- (tstsqry (conc "SELECT rundir FROM tests WHERE " testqry " AND state LIKE '" statepatt "' AND status LIKE '" statuspatt "' ORDER BY event_time ASC;")))
- (debug:print 8 "db:test-get-paths-matching-keynames-target-new\n runsqry=" runsqry "\n tstsqry=" tstsqry)
+ ;; (testqry (tests:match->sqlqry testpatt))
+ (runsqry (sqlite3:prepare (db:get-db dbstruct #f)(conc "SELECT id FROM runs WHERE " keystr " AND runname LIKE '" runname "';"))))
+ ;; (debug:print 8 "db:test-get-paths-matching-keynames-target-new\n runsqry=" runsqry "\n tstsqry=" testqry)
(sqlite3:for-each-row
(lambda (rid)
(set! row-ids (cons rid row-ids)))
runsqry)
(sqlite3:finalize! runsqry)
- (for-each (lambda (rid)
- (sqlite3:for-each-row
- (lambda (p)
- (set! res (cons p res)))
- (db:get-db dbstruct rid)
- tstsqry))
- row-ids)
+ row-ids))
+
+(define (db:test-get-paths-matching-keynames-target-new dbstruct run-id keynames target res testpatt statepatt statuspatt runname)
+ (let* ((testqry (tests:match->sqlqry testpatt))
+ (tstsqry (conc "SELECT rundir FROM tests WHERE " testqry " AND state LIKE '" statepatt "' AND status LIKE '" statuspatt "' ORDER BY event_time ASC;")))
+ (sqlite3:for-each-row
+ (lambda (p)
+ (set! res (cons p res)))
+ (db:get-db dbstruct run-id)
+ tstsqry)
res))
;;======================================================================
;; QUEUE UP META, TEST STATUS AND STEPS REMOTE ACCESS
;;======================================================================
@@ -1878,49 +1889,10 @@
db:queries)))
(if q (car q) #f))))
(apply sqlite3:execute db query params)
#t))
-;; get the previous record for when this test was run where all keys match but runname
-;; returns #f if no such test found, returns a single test record if found
-;;
-;; Run this server-side
-;;
-(define (db:get-previous-test-run-record dbstruct run-id test-name item-path)
- (let* ((db (db:get-db dbstruct #f)) ;;
- (keys (db:get-keys db))
- (selstr (string-intersperse keys ","))
- (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND "))
- (keyvals #f))
- ;; first look up the key values from the run selected by run-id
- (sqlite3:for-each-row
- (lambda (a . b)
- (set! keyvals (cons a b)))
- db
- (conc "SELECT " selstr " FROM runs WHERE id=? ORDER BY event_time DESC;") run-id)
- (if (not keyvals)
- #f
- (let ((prev-run-ids '()))
- (apply sqlite3:for-each-row
- (lambda (id)
- (set! prev-run-ids (cons id prev-run-ids)))
- db
- (conc "SELECT id FROM runs WHERE " qrystr " AND id != ?;") (append keyvals (list run-id)))
- ;; for each run starting with the most recent look to see if there is a matching test
- ;; if found then return that matching test record
- (debug:print 4 "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids)
- (if (null? prev-run-ids) #f
- (let loop ((hed (car prev-run-ids))
- (tal (cdr prev-run-ids)))
- (let ((results (db:get-tests-for-run db hed (conc test-name "/" item-path) '() '() #f #f #f #f #f #f)))
- (debug:print 4 "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results)
- (if (and (null? results)
- (not (null? tal)))
- (loop (car tal)(cdr tal))
- (if (null? results) #f
- (car results))))))))))
-
;; get the previous records for when these tests were run where all keys match but runname
;; NB// Merge this with test:get-previous-test-run-records? This one looks for all matching tests
;; can use wildcards. Also can likely be factored in with get test paths?
;;
;; Run this remotely!!
Index: dcommon.scm
==================================================================
--- dcommon.scm
+++ dcommon.scm
@@ -358,24 +358,24 @@
(define (dcommon:general-info)
(let ((general-matrix (iup:matrix
#:alignment1 "ALEFT"
#:expand "YES" ;; "HORIZONTAL"
#:numcol 1
- #:numlin 3
+ #:numlin 2
#:numcol-visible 1
- #:numlin-visible 3)))
- (iup:attribute-set! general-matrix "WIDTH1" "200")
+ #:numlin-visible 2)))
+ (iup:attribute-set! general-matrix "WIDTH1" "150")
(iup:attribute-set! general-matrix "0:1" "About this Megatest area")
;; User (this is not always obvious - it is common to run as a different user
(iup:attribute-set! general-matrix "1:0" "User")
(iup:attribute-set! general-matrix "1:1" (current-user-name))
;; Megatest area
- (iup:attribute-set! general-matrix "2:0" "Area")
- (iup:attribute-set! general-matrix "2:1" *toppath*)
+ ;; (iup:attribute-set! general-matrix "2:0" "Area")
+ ;; (iup:attribute-set! general-matrix "2:1" *toppath*)
;; Megatest version
- (iup:attribute-set! general-matrix "3:0" "Version")
- (iup:attribute-set! general-matrix "3:1" megatest-version)
+ (iup:attribute-set! general-matrix "2:0" "Version")
+ (iup:attribute-set! general-matrix "2:1" (conc megatest-version "-" (substring megatest-fossil-hash 0 4)))
general-matrix))
(define (dcommon:run-stats dbstruct)
(let* ((stats-matrix (iup:matrix expand: "YES"))
@@ -445,13 +445,13 @@
(let* ((colnum 0)
(rownum 0)
(servers-matrix (iup:matrix #:expand "YES"
#:numcol 7
#:numcol-visible 7
- #:numlin-visible 3
+ #:numlin-visible 5
))
- (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "InPort" "State" "Transport"))
+ (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId"))
(updater (lambda ()
(let ((servers (open-run-close tasks:get-all-servers tasks:open-db)))
(iup:attribute-set! servers-matrix "NUMLIN" (length servers))
;; (set! colnum 0)
;; (for-each (lambda (colname)
@@ -466,25 +466,27 @@
(let* ((vals (list (vector-ref server 0) ;; Id
(vector-ref server 9) ;; MT-Ver
(vector-ref server 1) ;; Pid
(vector-ref server 2) ;; Hostname
(conc (vector-ref server 3) ":" (vector-ref server 4)) ;; IP:Port
- (vector-ref server 5) ;; Pubport
+ (seconds->hr-min-sec (- (current-seconds)(vector-ref server 6)))
+ ;; (vector-ref server 5) ;; Pubport
;; (vector-ref server 10) ;; Last beat
;; (vector-ref server 6) ;; Start time
;; (vector-ref server 7) ;; Priority
;; (vector-ref server 8) ;; State
- (if (< (vector-ref server 10) 20) ;; Status (Please redo this properly!)
- "alive"
- "dead")
- (vector-ref server 11) ;; Transport
+ (vector-ref server 8) ;; State
+ (vector-ref server 12) ;; RunId
)))
(for-each (lambda (val)
- ;; (print "rownum: " rownum " colnum: " colnum " val: " val)
- (iup:attribute-set! servers-matrix (conc rownum ":" colnum) val)
- (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum))
- (set! colnum (+ 1 colnum)))
+ (let* ((row-col (conc rownum ":" colnum))
+ (curr-val (iup:attribute servers-matrix row-col)))
+ (if (not (equal? (conc val) curr-val))
+ (begin
+ (iup:attribute-set! servers-matrix row-col val)
+ (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum))))
+ (set! colnum (+ 1 colnum))))
vals)
(set! rownum (+ rownum 1)))
(iup:attribute-set! servers-matrix "REDRAW" "ALL"))
servers)))))
(set! colnum 0)
@@ -493,39 +495,41 @@
(iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum))
(set! colnum (+ colnum 1)))
colnames)
(set! dashboard:update-servers-table updater)
;; (iup:attribute-set! servers-matrix "WIDTHDEF" "40")
- (iup:hbox
- (iup:vbox
- (iup:button "Start"
- ;; #:size "50x"
- #:expand "YES"
- #:action (lambda (obj)
- (let ((cmd (conc ;; "xterm -geometry 180x20 -e \""
- "megatest -server - &")))
- ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &")))
- (system cmd))))
- (iup:button "Stop"
- #:expand "YES"
- ;; #:size "50x"
- #:action (lambda (obj)
- (let ((cmd (conc ;; "xterm -geometry 180x20 -e \""
- "megatest -stop-server 0 &")))
- ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &")))
- (system cmd))))
- (iup:button "Restart"
- #:expand "YES"
- ;; #:size "50x"
- #:action (lambda (obj)
- (let ((cmd (conc ;; "xterm -geometry 180x20 -e \""
- "megatest -stop-server 0;megatest -server - &")))
- ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &")))
- (system cmd)))))
- servers-matrix
- )))
-
+ ;; (iup:hbox
+ ;; (iup:vbox
+ ;; (iup:button "Start"
+ ;; ;; #:size "50x"
+ ;; #:expand "YES"
+ ;; #:action (lambda (obj)
+ ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \""
+ ;; "megatest -server - &")))
+ ;; ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &")))
+ ;; (system cmd))))
+ ;; (iup:button "Stop"
+ ;; #:expand "YES"
+ ;; ;; #:size "50x"
+ ;; #:action (lambda (obj)
+ ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \""
+ ;; "megatest -stop-server 0 &")))
+ ;; ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &")))
+ ;; (system cmd))))
+ ;; (iup:button "Restart"
+ ;; #:expand "YES"
+ ;; ;; #:size "50x"
+ ;; #:action (lambda (obj)
+ ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \""
+ ;; "megatest -stop-server 0;megatest -server - &")))
+ ;; ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &")))
+ ;; (system cmd)))))
+ ;; servers-matrix
+ ;; )))
+ servers-matrix
+ ))
+
;; The main menu
(define (dcommon:main-menu)
(iup:menu ;; a menu is a special attribute to a dialog (think Gnome putting the menu at screen top)
(iup:menu-item "Files" (iup:menu ;; Note that you can use either #:action or action: for options
(iup:menu-item "Open" action: (lambda (obj)
Index: docs/manual/howto.txt
==================================================================
--- docs/manual/howto.txt
+++ docs/manual/howto.txt
@@ -5,10 +5,33 @@
Tricks
------
This section is a compendium of a various useful tricks for debugging,
configuring and generally getting the most out of Megatest.
+
+Limiting your running jobs
+--------------------------
+
+The following example will limit a test in the jobgroup "group1" to no more than 10 tests simultaneously.
+
+In your testconfig:
+
+----------------
+[test_meta]
+jobgroup group1
+----------------
+
+In your megatest.config:
+
+---------------
+[jobgroups]
+group1 10
+custdes 4
+---------------
+
+
+
Debugging Tricks
----------------
Examining The Environment
@@ -34,5 +57,13 @@
-------------------
runscript main.csh
-------------------
+Debugging Server Problems
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+----------------
+sudo lsof -i
+sudo netstat -lptu
+sudo netstat -tulpn
+----------------
Index: docs/manual/megatest_manual.html
==================================================================
--- docs/manual/megatest_manual.html
+++ docs/manual/megatest_manual.html
@@ -1,1227 +1,1196 @@
-
-
-
-
-
-The Megatest Users Manual
-
-
-
-
-
-
-
-
Preface
-
-
This book is organised as three sub-books; getting started, writing tests and reference.
-
-
Why Megatest?
-
The Megatest project was started for two reasons, the first was an
-immediate and pressing need for a generalized tool to manage a suite
-of regression tests and the second was the fact that the author had
-written or maintained several such tools at different companies over
-the years and it seemed a good thing to have a single open source
-tool, flexible enough to meet the needs of any team doing continuous
-integrating and or running a complex suite of tests for release
-qualification.
-
-
-
Megatest Design Philosophy
-
Megatest is intended to provide the minimum needed resources to make
-writing a suite of tests and tasks for implementing continuous build
-for software, design engineering or process control (via owlfs for
-example) without being specialized for any specific problem
-space. Megatest in of itself does not know what constitutes a PASS or
-FAIL of a test. In most cases megatest is best used in conjunction
-with logpro or a similar tool to parse, analyze and decide on the test
-outcome.
-
-
-
Megatest Architecture
-
All data to specify the tests and configure the system is stored in
-plain text files. All system state is stored in an sqlite3
-database. Tests are launched using the launching system available for
-the distributed compute platform in use. A template script is provided
-which can launch jobs on local and remote Linux hosts. Currently
-megatest uses the network filesystem to call home to your master
-sqlite3 database.
-
-
-
-
Road Map
-
Note 1: This road-map is tentative and subject to change without notice.
-
Note 2: Starting over. Old plan is commented out.
-
-
Current Items
-
-
-
ww05 - migrate to inmem-db
-
Keep as much the same as possible. Add internal reference to almost
-eliminate contention on db(s).
-
--
-
-Add internal reference db
-
-
--
-
-Verify that actions are accessing correct db
-
-
--
-
--runtests - inmem
-
-
--
-
--list-runs - local (but not megatest.db)
-
-
--
-
-dashboard - local (but not megatest.db)
-
-
-
-
--
-
-Mirror db to /var/tmp…
-
-
--
-
-Dashboard read db from per-run db.
-
-
--
-
-Dashboard read db from /var/tmp
-
-
--
-
-Runs register in tasks table in monitor.db
-
-
--
-
-Server polls tasks table for next action (in addition?)
-
-
--
-
-Change run loop to execute in server, triggered by call to polling of tasks table
-
-
-
-
-
-
-
Getting Started
-
-
Getting started with Megatest
-
-
How to install Megatest and set it up for running your regressions and continuous integration process.
-
-
-
Installation
-
-
-
Dependencies
-
Chicken scheme and a number of "eggs" are required for building
-Megatest. See the script installall.sch in the utils directory of the
-distribution for a mostly automated way to install everything needed
-for building Megatest on Linux.
-
-
And now for something completely different: monkeys, lions and
-tigers (Bengal and Siberian) using the alternative syntax index
-entries.
-
-
-
-Note that multi-entry terms generate separate index entries.
-
Here are a couple of image examples: an
-
-
-example inline image followed by an example block image:
-
-
-
-
-
Figure 1. Tiger block image
-
-
Followed by an example table:
-
-
-Table 1. An example table
-
-
-
-
- Option |
- Description |
-
-
-
-
--a USER GROUP |
-Add USER to GROUP. |
-
-
--R GROUP |
-Disables access to GROUP. |
-
-
-
-
-
-
Example 1. An example example
-
-
-
-
Sub-section with Anchor
-
-
-
Chapter Sub-section
-
-
-
Chapter Sub-section
-
-
This is the maximum sub-section depth supported by the distributed
-AsciiDoc configuration.
-
-
-
-
-
-
-
-
The Second Chapter
-
-
-
An example link to a bibliography entry [taoup].
-
-
-
Writing Tests
-
-
The First Chapter of the Second Part
-
-
Chapters grouped into book parts are at level 1 and can contain
-sub-sections.
-
-
-
How To Do Things
-
-
Tricks
-
-
This section is a compendium of a various useful tricks for debugging,
-configuring and generally getting the most out of Megatest.
-
-
-
-
Debugging Tricks
-
-
-
Examining The Environment
-
-
During Config File Processing
-
-
-
Organising Your Tests and Tasks
-
-
-
[tests-paths]
-1 #{get misc parent}/simplerun/tests
-
-
-
The runscript method is a brute force way to run scripts where the
-user is responsible for setting STATE and STATUS
-
-
-
-
-
-
Reference
-
-
The First Chapter of the Second Part
-
-
Chapters grouped into book parts are at level 1 and can contain
-sub-sections.
-
-
-
-
The testconfig File
-
-
-
Setup section
-
-
-
-
The runscript method is a brute force way to run scripts where the
-user is responsible for setting STATE and STATUS
-
-
-
-
-
Requirements section
-
-
-
Wait on Other Tests
-
-
-
# A normal waiton waits for the prior tests to be COMPLETED
-# and PASS, CHECK or WAIVED
-waiton test1 test2
-
-
-
-
Mode
-
The default (i.e. if mode is not specified) is normal. All pre-dependent tests
-must be COMPLETED and PASS, CHECK or WAIVED before the test will start
-
-
The toplevel mode requires only that the prior tests are COMPLETED.
-
-
A item based waiton will start items in a test when the
-same-named item is COMPLETED and PASS, CHECK or WAIVED
-in the prior test
-
-
-
-
# With a toplevel test you may wish to generate your list
-# of tests to run dynamically
-#
-# waiton #{shell get-valid-tests-to-run.sh}
-
-
-
-
Run time limit
-
-
-
runtimelim 1h 2m 3s # this will automatically kill the test if it runs for more than 1h 2m and 3s
-
-
-
-
Skip
-
-
-
-
Skip on Still-running Tests
-
-
-
# NB// If the prevrunning line exists with *any* value the test will
-# automatically SKIP if the same-named test is currently RUNNING
-
-prevrunning x
-
-
-
-
Skip if a File Exists
-
-
-
fileexists /path/to/a/file # skip if /path/to/a/file exists
-
-
-
-
Controlled waiver propagation
-
If test is FAIL and previous test in run with same MT_TARGET is WAIVED then apply the following rules from the testconfig:
-If a waiver check is specified in the testconfig apply the check and if it passes then set this FAIL to WAIVED
-
Waiver check has two parts, 1) a list of waiver, rulename, filepatterns and 2) the rulename script spec (note that "diff" and "logpro" are predefined)
-
-
-
###### EXAMPLE FROM testconfig #########
-# matching file(s) will be diff'd with previous run and logpro applied
-# if PASS or WARN result from logpro then WAIVER state is set
-#
-[waivers]
-# logpro_file rulename input_glob
-waiver_1 logpro lookittmp.log
-
-[waiver_rules]
-
-# This builtin rule is the default if there is no <waivername>.logpro file
-# diff diff %file1% %file2%
-
-# This builtin rule is applied if a <waivername>.logpro file exists
-# logpro diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html
-
-
-
-
-
Ezsteps
-
To transfer the environment to the next step you can do the following:
-
-
-
$MT_MEGATEST -env2file .ezsteps/${stepname}
-
-
-
-
Megatest Internals
-
-
-
-
-
-
-
-
-
-
Appendix A: Example Appendix
-
-
One or more optional appendixes go here at section level zero.
-
-
Appendix Sub-section
-
-
-
- Note
- |
-Preface and appendix subsections start out of sequence at level
-2 (level 1 is skipped). This only applies to multi-part book
-documents. |
-
-
-
-
-
-
-
Example Bibliography
-
-
The bibliography list is a style of AsciiDoc bulleted list.
-
--
-
-[taoup] Eric Steven Raymond. The Art of Unix
- Programming. Addison-Wesley. ISBN 0-13-142901-9.
-
-
--
-
-[walsh-muellner] Norman Walsh & Leonard Muellner.
- DocBook - The Definitive Guide. O’Reilly & Associates. 1999.
- ISBN 1-56592-580-7.
-
-
-
-
-
-
-
Example Glossary
-
-
Glossaries are optional. Glossaries entries are an example of a style
-of AsciiDoc labeled lists.
-
--
-A glossary term
-
--
-
- The corresponding (indented) definition.
-
-
--
-A second glossary term
-
--
-
- The corresponding (indented) definition.
-
-
-
-
-
-
-
Example Colophon
-
-
Text at the end of a book describing facts about its production.
-
-
-
-
-
-
-
-
+
+
+
+
+
+The Megatest Users Manual
+
+
+
+
+
+
+
+
Preface
+
+
This book is organised as three sub-books; getting started, writing tests and reference.
+
+
Why Megatest?
+
The Megatest project was started for two reasons, the first was an
+immediate and pressing need for a generalized tool to manage a suite
+of regression tests and the second was the fact that the author had
+written or maintained several such tools at different companies over
+the years and it seemed a good thing to have a single open source
+tool, flexible enough to meet the needs of any team doing continuous
+integrating and or running a complex suite of tests for release
+qualification.
+
+
+
Megatest Design Philosophy
+
Megatest is intended to provide the minimum needed resources to make
+writing a suite of tests and tasks for implementing continuous build
+for software, design engineering or process control (via owlfs for
+example) without being specialized for any specific problem
+space. Megatest in of itself does not know what constitutes a PASS or
+FAIL of a test. In most cases megatest is best used in conjunction
+with logpro or a similar tool to parse, analyze and decide on the test
+outcome.
+
+
+
Megatest Architecture
+
All data to specify the tests and configure the system is stored in
+plain text files. All system state is stored in an sqlite3
+database. Tests are launched using the launching system available for
+the distributed compute platform in use. A template script is provided
+which can launch jobs on local and remote Linux hosts. Currently
+megatest uses the network filesystem to call home to your master
+sqlite3 database.
+
+
+
+
Road Map
+
Note 1: This road-map is tentative and subject to change without notice.
+
Note 2: Starting over. Old plan is commented out.
+
+
Current Items
+
+
+
ww05 - migrate to inmem-db
+
Keep as much the same as possible. Add internal reference to almost
+eliminate contention on db(s).
+
+-
+
+Add internal reference db
+
+
+-
+
+Verify that actions are accessing correct db
+
+
+-
+
+-runtests - inmem
+
+
+-
+
+-list-runs - local (but not megatest.db)
+
+
+-
+
+dashboard - local (but not megatest.db)
+
+
+
+
+-
+
+Mirror db to /var/tmp…
+
+
+-
+
+Dashboard read db from per-run db.
+
+
+-
+
+Dashboard read db from /var/tmp
+
+
+-
+
+Runs register in tasks table in monitor.db
+
+
+-
+
+Server polls tasks table for next action (in addition?)
+
+
+-
+
+Change run loop to execute in server, triggered by call to polling of tasks table
+
+
+
+
+
+
+
Getting Started
+
+
Getting started with Megatest
+
+
How to install Megatest and set it up for running your regressions and continuous integration process.
+
+
+
Installation
+
+
+
Dependencies
+
Chicken scheme and a number of "eggs" are required for building
+Megatest. See the script installall.sch in the utils directory of the
+distribution for a mostly automated way to install everything needed
+for building Megatest on Linux.
+
+
And now for something completely different: monkeys, lions and
+tigers (Bengal and Siberian) using the alternative syntax index
+entries.
+
+
+
+Note that multi-entry terms generate separate index entries.
+
Here are a couple of image examples: an
+
+
+example inline image followed by an example block image:
+
+
+
+
+
Figure 1. Tiger block image
+
+
Followed by an example table:
+
+
+Table 1. An example table
+
+
+
+
+ Option |
+ Description |
+
+
+
+
+-a USER GROUP |
+Add USER to GROUP. |
+
+
+-R GROUP |
+Disables access to GROUP. |
+
+
+
+
+
+
Example 1. An example example
+
+
+
+
Sub-section with Anchor
+
+
+
Chapter Sub-section
+
+
+
Chapter Sub-section
+
+
This is the maximum sub-section depth supported by the distributed
+AsciiDoc configuration.
+
+
+
+
+
+
+
+
The Second Chapter
+
+
+
An example link to a bibliography entry [taoup].
+
+
+
Writing Tests
+
+
The First Chapter of the Second Part
+
+
Chapters grouped into book parts are at level 1 and can contain
+sub-sections.
+
+
+
How To Do Things
+
+
Tricks
+
+
This section is a compendium of a various useful tricks for debugging,
+configuring and generally getting the most out of Megatest.
+
+
+
+
Debugging Tricks
+
+
+
Examining The Environment
+
+
During Config File Processing
+
+
+
Organising Your Tests and Tasks
+
+
+
[tests-paths]
+1 #{get misc parent}/simplerun/tests
+
+
+
The runscript method is a brute force way to run scripts where the
+user is responsible for setting STATE and STATUS
+
+
+
+
+
+
+
Tricks
+
+
This section is a compendium of a various useful tricks for debugging,
+configuring and generally getting the most out of Megatest.
+
+
+
+
Debugging Tricks
+
+
+
Examining The Environment
+
+
During Config File Processing
+
+
+
Organising Your Tests and Tasks
+
/nfs/ch/disks/ch_unienv_disk005/qa_mrwellan/interim/src/megatest/tests/fdktestqa/testqa
+
+
+
[tests-paths]
+1 #{get misc parent}/simplerun/tests
+
+
+
The runscript method is a brute force way to run scripts where the
+user is responsible for setting STATE and STATUS
+
+
ww30.2
+cellname/LVS/cellname.LAYOUT_ERRORS
+
+
ww31.3
+cellname/LVS/cellname.LAYOUT_ERRORS
+
Error: text open
+Reference
+
+
+
Chapters grouped into book parts are at level 1 and can contain
+sub-sections.
+
+
The runscript method is a brute force way to run scripts where the
+user is responsible for setting STATE and STATUS
+
+
+
+
+
# A normal waiton waits for the prior tests to be COMPLETED
+# and PASS, CHECK or WAIVED
+waiton test1 test2
+
+
The default (i.e. if mode is not specified) is normal. All pre-dependent tests
+must be COMPLETED and PASS, CHECK or WAIVED before the test will start
+
+
The toplevel mode requires only that the prior tests are COMPLETED.
+
+
A item based waiton will start items in a test when the
+same-named item is COMPLETED and PASS, CHECK or WAIVED
+in the prior test
+
+
+
+
# With a toplevel test you may wish to generate your list
+# of tests to run dynamically
+#
+# waiton #{shell get-valid-tests-to-run.sh}
+
+
+
+
runtimelim 1h 2m 3s # this will automatically kill the test if it runs for more than 1h 2m and 3s
+
+
+
+
+
# NB// If the prevrunning line exists with *any* value the test will
+# automatically SKIP if the same-named test is currently RUNNING
+
+prevrunning x
+
+
+
+
fileexists /path/to/a/file # skip if /path/to/a/file exists
+
+
If test is FAIL and previous test in run with same MT_TARGET is WAIVED then apply the following rules from the testconfig:
+If a waiver check is specified in the testconfig apply the check and if it passes then set this FAIL to WAIVED
+
Waiver check has two parts, 1) a list of waiver, rulename, filepatterns and 2) the rulename script spec (note that "diff" and "logpro" are predefined)
+
+
+
###### EXAMPLE FROM testconfig #########
+# matching file(s) will be diff'd with previous run and logpro applied
+# if PASS or WARN result from logpro then WAIVER state is set
+#
+[waivers]
+# logpro_file rulename input_glob
+waiver_1 logpro lookittmp.log
+
+[waiver_rules]
+
+# This builtin rule is the default if there is no <waivername>.logpro file
+# diff diff %file1% %file2%
+
+# This builtin rule is applied if a <waivername>.logpro file exists
+# logpro diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html
+
+
To transfer the environment to the next step you can do the following:
+
+
+
$MT_MEGATEST -env2file .ezsteps/${stepname}
+
+
+
Megatest Internals
+
+
+
+
+
+
+
One or more optional appendixes go here at section level zero.
+
+
+
+ Note
+ |
+Preface and appendix subsections start out of sequence at level
+2 (level 1 is skipped). This only applies to multi-part book
+documents. |
+
+
+
The bibliography list is a style of AsciiDoc bulleted list.
+
+-
+
+[taoup] Eric Steven Raymond. The Art of Unix
+ Programming. Addison-Wesley. ISBN 0-13-142901-9.
+
+
+-
+
+[walsh-muellner] Norman Walsh & Leonard Muellner.
+ DocBook - The Definitive Guide. O’Reilly & Associates. 1999.
+ ISBN 1-56592-580-7.
+
+
+
+
Glossaries are optional. Glossaries entries are an example of a style
+of AsciiDoc labeled lists.
+
+-
+A glossary term
+
+-
+
+ The corresponding (indented) definition.
+
+
+-
+A second glossary term
+
+-
+
+ The corresponding (indented) definition.
+
+
+
+
Text at the end of a book describing facts about its production.
+
+
+
+
+
+
+
+
+
+
Index: docs/manual/server.png
==================================================================
--- docs/manual/server.png
+++ docs/manual/server.png
cannot compute difference between binary files
Index: example/cfg/machines.dat
==================================================================
--- example/cfg/machines.dat
+++ example/cfg/machines.dat
@@ -1,16 +1,16 @@
[]
[maxload]
zeus 0.40000000000000002
xena 0.20000000000000001
-myth1 0.01
+myth2 0.01
hades 1
[minfree]
zeus 1000
xena 20000
-myth1 300000
+myth2 300000
hades 4000000
[reqprocs]
zeus mfsmount mythbackend mfschunkserver
xena mfsmount
-myth1 mfsmount mythfrontend mfschunkserver
+myth2 mfsmount mythfrontend mfschunkserver
hades mfsmount mfsmetalogger mfschunkserver
Index: example/cfg/sxml/_sheets.sxml
==================================================================
--- example/cfg/sxml/_sheets.sxml
+++ example/cfg/sxml/_sheets.sxml
@@ -26,12 +26,13 @@
(http://www.gnumeric.org/v10.dtd:name "WorkbookView::is_protected")
(http://www.gnumeric.org/v10.dtd:value "FALSE")))
(urn:oasis:names:tc:opendocument:xmlns:office:1.0:document-meta
(@ (urn:oasis:names:tc:opendocument:xmlns:office:1.0:version "1.2"))
(urn:oasis:names:tc:opendocument:xmlns:office:1.0:meta
+ (http://purl.org/dc/elements/1.1/:date "2014-02-14T06:16:26Z")
(urn:oasis:names:tc:opendocument:xmlns:meta:1.0:creation-date
- "2014-02-14T06:12:52Z")))
+ "2014-02-14T06:16:17Z")))
(http://www.gnumeric.org/v10.dtd:Calculation
(@ (MaxIterations "100")
(ManualRecalc "0")
(IterationTolerance "0.001")
(FloatRadix "2")
Index: example/cfg/sxml/machines.sxml
==================================================================
--- example/cfg/sxml/machines.sxml
+++ example/cfg/sxml/machines.sxml
@@ -88,13 +88,13 @@
(http://www.gnumeric.org/v10.dtd:Rows
(@ (DefaultSizePts "12.75"))
(http://www.gnumeric.org/v10.dtd:RowInfo
(@ (Unit "13.5") (No "0") (Count "5"))))
(http://www.gnumeric.org/v10.dtd:Selections
- (@ (CursorRow "3") (CursorCol "0"))
+ (@ (CursorRow "4") (CursorCol "0"))
(http://www.gnumeric.org/v10.dtd:Selection
- (@ (startRow "3") (startCol "0") (endRow "3") (endCol "0"))))
+ (@ (startRow "4") (startCol "0") (endRow "4") (endCol "0"))))
(http://www.gnumeric.org/v10.dtd:SheetLayout (@ (TopLeft "A1")))
(http://www.gnumeric.org/v10.dtd:Solver
(@ (ProgramR "0")
(ProblemType "0")
(NonNeg "1")
DELETED fs-transport.scm
Index: fs-transport.scm
==================================================================
--- fs-transport.scm
+++ /dev/null
@@ -1,44 +0,0 @@
-
-;; Copyright 2006-2012, Matthew Welland.
-;;
-;; This program is made available under the GNU GPL version 2.0 or
-;; greater. See the accompanying file COPYING for details.
-;;
-;; This program is distributed WITHOUT ANY WARRANTY; without even the
-;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-;; PURPOSE.
-
-(require-extension (srfi 18) extras tcp s11n)
-
-(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
-(import (prefix sqlite3 sqlite3:))
-
-(use spiffy uri-common intarweb http-client spiffy-request-vars)
-
-(tcp-buffer-size 2048)
-
-(declare (unit fs-transport))
-
-(declare (uses common))
-(declare (uses db))
-(declare (uses tests))
-(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
-
-(include "common_records.scm")
-(include "db_records.scm")
-
-
-;;======================================================================
-;; F S T R A N S P O R T S E R V E R
-;;======================================================================
-
-;; There is no "server" per se but a convience routine to make it non
-;; necessary to be reopening the db over and over again.
-;;
-
-(define (fs:process-queue-item packet)
- (if (not *megatest-db*) ;; we will require that (setup-for-run) has already been called
- (set! *megatest-db* (open-db)))
- (debug:print-info 11 "fs:process-queue-item called with packet=" packet)
- (db:process-queue-item *megatest-db* packet))
-
Index: http-transport.scm
==================================================================
--- http-transport.scm
+++ http-transport.scm
@@ -68,11 +68,11 @@
;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
(server:get-best-guess-address hostname)
#f)))
(if ipstr ipstr hostn))) ;; hostname)))
(start-port (open-run-close tasks:server-get-next-port tasks:open-db))
- (link-tree-path (config-lookup *configdat* "setup" "linktree")))
+ (link-tree-path (configf:lookup *configdat* "setup" "linktree")))
(set! db *inmemdb*)
(root-path (if link-tree-path
link-tree-path
(current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP!
(handle-directory spiffy-directory-listing)
@@ -127,15 +127,15 @@
((equal? (uri-path (request-uri (current-request)))
'(/ "hey"))
(send-response body: "hey there!\n"
headers: '((content-type text/plain))))
(else (continue))))))))
- (http-transport:try-start-server ipaddrstr start-port server-id)))
+ (http-transport:try-start-server run-id ipaddrstr start-port server-id)))
;; This is recursively run by http-transport:run until sucessful
;;
-(define (http-transport:try-start-server ipaddrstr portnum server-id)
+(define (http-transport:try-start-server run-id ipaddrstr portnum server-id)
(handle-exceptions
exn
(begin
(print-error-message exn)
(if (< portnum 9000)
@@ -143,23 +143,26 @@
(debug:print 0 "WARNING: failed to start on portnum: " portnum ", trying next port")
(thread-sleep! 0.1)
;; get_next_port goes here
- (http-transport:try-start-server ipaddrstr (+ portnum 1) server-id))
- (print "ERROR: Tried and tried but could not start the server")))
+ (http-transport:try-start-server run-id ipaddrstr (+ portnum 1) server-id))
+ (begin
+ (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id ipaddrstr portnum " http-transport:try-start-server")
+ (print "ERROR: Tried and tried but could not start the server"))))
;; any error in following steps will result in a retry
(set! *server-info* (list ipaddrstr portnum))
(open-run-close tasks:server-set-interface-port
tasks:open-db
server-id
ipaddrstr portnum)
(debug:print 1 "INFO: Trying to start server on " ipaddrstr ":" portnum)
;; This starts the spiffy server
;; NEED WAY TO SET IP TO #f TO BIND ALL
- (start-server bind-address: ipaddrstr port: portnum)
- (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum)
+ ;; (start-server bind-address: ipaddrstr port: portnum)
+ (start-server port: portnum)
+ (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id ipaddrstr portnum " http-transport:try-start-server")
(debug:print 1 "INFO: server has been stopped")))
;;======================================================================
;; S E R V E R U T I L I T I E S
;;======================================================================
@@ -219,18 +222,23 @@
;; Send "cmd" with json payload "params" to serverdat and receive result
;;
(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 30))
(let* ((fullurl (if (list? serverdat)
- (cadddr serverdat) ;; this is the uri for /api
+ (list-ref serverdat 4) ;; (cadddr serverdat) ;; this is the uri for /api
(begin
(debug:print 0 "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
(exit 1))))
(res #f))
(handle-exceptions
exn
- #f
+ (if (> numretries 0)
+ (begin
+ (mutex-unlock! *http-mutex*)
+ (thread-sleep! 10)
+ (http-transport:client-api-send-receive run-id serverdat cmd params (- numretries 1)))
+ #f)
(begin
(debug:print-info 11 "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")
;; set up the http-client here
(max-retry-attempts 5)
;; consider all requests indempotent
@@ -264,25 +272,29 @@
res)))))
;;
;; connect
;;
-(define (http-transport:client-connect run-id iface port)
- (let* ((uri-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl"))))
- (uri-api-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/api"))))
- (server-dat (list iface port uri-dat uri-api-dat))
- (login-res (rmt:login-no-auto-client-setup server-dat run-id)))
- ;; (hash-table-set! *runremote* run-id serverdat) ;; may or may not be good ...
- (if (and (list? login-res)
- (car login-res))
- (begin
- (debug:print-info 2 "Logged in and connected to " iface ":" port)
+(define (http-transport:client-connect iface port)
+ (let* ((api-url (conc "http://" iface ":" port "/api"))
+ (uri-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl"))))
+ ;; (uri-dat (make-request method: 'GET uri: (uri-reference (conc "http://" iface ":" port "/ctrl"))))
+ (uri-api-dat (make-request method: 'POST uri: api-url)) ;; (uri-reference (conc "http://" iface ":" port "/api"))))
+ ;; (uri-api-dat (make-request method: 'GET uri: (uri-reference (conc "http://" iface ":" port "/api"))))
+ (server-dat (list iface port uri-dat uri-api-dat api-url)))
+;; (login-res (server:ping-server run-id server-dat))) ;; login-no-auto-client-setup server-dat run-id)))
+ server-dat))
+;; (if (and (list? login-res)
(hash-table-set! *runremote* run-id server-dat)
server-dat)
- (begin
- (debug:print-info 0 "ERROR: Failed to login or connect to " iface ":" port)
- #f))))
+;; (hash-table-set! *runremote* run-id server-dat)
+;; (debug:print-info 2 "Logged in and connected to " iface ":" port)
+;; (hash-table-set! *runremote* run-id server-dat)
+;; server-dat)
+;; (begin
+;; (debug:print-info 0 "ERROR: Failed to login or connect to " iface ":" port)
+;; #f))))
;; run http-transport:keep-running in a parallel thread to monitor that the db is being
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running server-id)
@@ -307,39 +319,43 @@
sdat))))))
(iface (car server-info))
(port (cadr server-info))
(last-access 0)
(tdb (tasks:open-db))
- (server-timeout (let ((tmo (config-lookup *configdat* "server" "timeout")))
+ (server-timeout (let ((tmo (configf:lookup *configdat* "server" "timeout")))
(if (and (string? tmo)
(string->number tmo))
(* 60 60 (string->number tmo))
;; (* 3 24 60 60) ;; default to three days
- (* 60 60) ;; default to one hour
+ ;; (* 60 1) ;; default to one minute
+ (* 60 60 25) ;; default to 25 hours
))))
- ;;
- ;; set_running
- ;;
- (tasks:server-set-state! tdb server-id "running")
- (let loop ((count 0))
+ (let loop ((count 0)
+ (server-state 'available))
;; Use this opportunity to sync the inmemdb to db
(let ((start-time (current-milliseconds))
(sync-time #f)
(rem-time #f))
+
(if *inmemdb* (db:sync-touched *inmemdb* force-sync: #t))
(set! sync-time (- (current-milliseconds) start-time))
(set! rem-time (quotient (- 4000 sync-time) 1000))
(debug:print 0 "SYNC: time= " sync-time ", rem-time=" rem-time)
- (if (and (<= rem-time 4)
- (> rem-time 0))
- (thread-sleep! rem-time)
- (thread-sleep! 4))) ;; fallback for if the math is changed ...
+
+ ;;
+ ;; set_running after our first pass through
+ ;;
+ (if (eq? server-state 'available)
+ (tasks:server-set-state! tdb server-id "running"))
- ;; (thread-sleep! 4) ;; no need to do this very often
-
+ (if (and (<= rem-time 4)
+ (> rem-time 0))
+ (thread-sleep! rem-time)
+ (thread-sleep! 4))) ;; fallback for if the math is changed ...
+
(if (< count 1) ;; 3x3 = 9 secs aprox
- (loop (+ count 1)))
+ (loop (+ count 1) 'running))
;; Check that iface and port have not changed (can happen if server port collides)
(mutex-lock! *heartbeat-mutex*)
(set! sdat *server-info*)
(mutex-unlock! *heartbeat-mutex*)
@@ -363,11 +379,18 @@
(if (and *server-run*
(> (+ last-access server-timeout)
(current-seconds)))
(begin
(debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
- (loop 0))
+ ;;
+ ;; Consider implementing some smarts here to re-insert the record or kill self is
+ ;; the db indicates so
+ ;;
+ ;; (if (tasks:server-am-i-the-server? tdb run-id)
+ ;; (tasks:server-set-state! tdb server-id "running"))
+ ;;
+ (loop 0 server-state))
(begin
(debug:print-info 0 "Starting to shutdown the server.")
;; need to delete only *my* server entry (future use)
(set! *time-to-exit* #t)
(if *inmemdb* (db:sync-touched *inmemdb* force-sync: #t))
@@ -390,11 +413,11 @@
"n/a (no queries)"
(/ *total-non-write-delay*
*number-non-write-queries*))
" ms")
(debug:print-info 0 "Server shutdown complete. Exiting")
- (tasks:server-delete-record! tdb server-id)
+ (tasks:server-delete-record tdb server-id " http-transport:keep-running")
(exit))))))
;; all routes though here end in exit ...
;;
;; start_server?
@@ -405,16 +428,23 @@
(daemon:ize))
(if (server:check-if-running run-id)
(begin
(debug:print 0 "INFO: Server for run-id " run-id " already running")
(exit 0)))
- (let ((server-id (open-run-close tasks:server-lock-slot tasks:open-db run-id)))
+ (let loop ((server-id (open-run-close tasks:server-lock-slot tasks:open-db run-id))
+ (remtries 4))
(if (not server-id)
- (begin
- ;; since we didn't get the server lock we are going to clean up and bail out
- (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue")
- (open-run-close tasks:server-delete-records-for-this-pid tasks:open-db))
+ (if (> remtries 0)
+ (begin
+ (thread-sleep! 2)
+ (loop (open-run-close tasks:server-lock-slot tasks:open-db run-id)
+ (- remtries 1)))
+ (begin
+ ;; since we didn't get the server lock we are going to clean up and bail out
+ (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue")
+ (open-run-close tasks:server-delete-records-for-this-pid tasks:open-db " http-transport:launch")
+ ))
(let* ((th2 (make-thread (lambda ()
(http-transport:run
(if (args:get-arg "-server")
(args:get-arg "-server")
"-")
Index: launch.scm
==================================================================
--- launch.scm
+++ launch.scm
@@ -90,14 +90,10 @@
fulln
runscript))))) ;; assume it is on the path
(rollup-status 0))
(change-directory top-path)
(debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name))
- ;; Setup the *runremote* global var
- (if *runremote* (debug:print 2 "ERROR: I'm not expecting *runremote* to be set at this time"))
- ;; (set! *runremote* runremote)
- ;; (set! *transport-type* (string->symbol transport))
(set! keys (rmt:get-keys))
(set! keyvals (keys:target->keyval keys target))
;; apply pre-overrides before other variables. The pre-override vars must not
;; clobbers things from the official sources such as megatest.config and runconfigs.config
(if (string? set-vars)
@@ -318,11 +314,12 @@
(begin
(debug:print-info 0 "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim)
#t)
#f)))))
;; open-run-close not needed for test-set-meta-info
- (tests:set-partial-meta-info test-id run-id minutes work-area)
+ (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f)
+ ;; (tests:set-partial-meta-info test-id run-id minutes work-area)
(if kill-job?
(begin
(mutex-lock! m)
;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
;; section and the runit section? Or add a loop that tries three times with a 1/4 second
@@ -369,11 +366,11 @@
(thread-join! th1)
(thread-sleep! 1) ;; give thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec.
(mutex-lock! m)
(let* ((item-path (item-list->path itemdat))
;; only state and status needed - use lazy routine
- (testinfo (rmt:get-testinfo-state-status run-id test-id))) ;;;(cdb:get-test-info-by-id *runremote* test-id))) ;; )) ;; run-id test-name item-path)))
+ (testinfo (rmt:get-testinfo-state-status run-id test-id)))
;; Am I completed?
(if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED"))
(let ((new-state (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status
;; "COMPLETED"
;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test
@@ -394,14 +391,10 @@
new-state
new-status
(args:get-arg "-m") #f)
;; need to update the top test record if PASS or FAIL and this is a subtest
;; NO NEED TO CALL roll-up-pass-fail-counts HERE, THIS IS DONE IN roll-up-pass-fail-counts called by tests:test-set-status!
- ;; (if (not (equal? item-path ""))
- ;; (begin
- ;; (thread-sleep! 0.1) ;; give other processes an opportunity to access the db as rollup is lower priority
- ;; (cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path new-status)))
))
;; for automated creation of the rollup html file this is a good place...
(if (not (equal? item-path ""))
(tests:summarize-items run-id test-id test-name #f))) ;; don't force - just update if no
(mutex-unlock! m)
@@ -500,11 +493,10 @@
(lnkbase (conc linktree "/" target "/" runname))
(lnkpath (conc lnkbase "/" testname))
(lnkpathf (conc lnkpath (if not-iterated "" "/") item-path)))
;; Update the rundir path in the test record for all
- ;; (cdb:test-set-rundir-by-test-id *runremote* test-id (filedb:register-path *fdb* lnkpathf))
(rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path)
(debug:print 2 "INFO:\n lnkbase=" lnkbase "\n lnkpath=" lnkpath "\n toptest-path=" toptest-path "\n test-path=" test-path)
(if (not (file-exists? linktree))
(begin
@@ -564,11 +556,10 @@
;; (rmt:sdb-qry 'getstr
(db:test-get-rundir testinfo) ;; ) ;; )
#f)))
(hash-table-set! *toptest-paths* testname curr-test-path)
;; NB// Was this for the test or for the parent in an iterated test?
- ;;(cdb:test-set-rundir! *runremote* run-id testname "" (filedb:register-path *fdb* lnkpath)) ;; toptest-path)
(rmt:general-call 'test-set-rundir-shortdir run-id lnkpath
(if (file-exists? lnkpath)
(resolve-pathname lnkpath)
lnkpath)
testname "")
@@ -700,11 +691,10 @@
(debug:print 0 "WARNING: No disk work area specified - running in the test directory under tmp_run")))
(set! cmdparms (base64:base64-encode
(with-output-to-string
(lambda () ;; (list 'hosts hosts)
(write (list (list 'testpath test-path)
- ;; (list 'runremote *runremote*)
(list 'transport (conc *transport-type*))
(list 'serverinf *server-info*)
(list 'toppath *toppath*)
(list 'work-area work-area)
(list 'test-name test-name)
Index: lock-queue.scm
==================================================================
--- lock-queue.scm
+++ lock-queue.scm
@@ -48,24 +48,32 @@
CONSTRAINT runlock_constraint UNIQUE (run_lock));")))
(sqlite3:set-busy-handler! db handler)
db))
(define (lock-queue:set-state db test-id newstate)
- (sqlite3:execute db "UPDATE queue SET state=? WHERE test_id=?;"
- newstate
- test-id))
+ (handle-exceptions
+ exn
+ (thread-sleep! 30)
+ (lock-queue:set-state db test-id newstate)
+ (sqlite3:execute db "UPDATE queue SET state=? WHERE test_id=?;"
+ newstate
+ test-id)))
(define (lock-queue:any-younger? db mystart test-id)
- (let ((res #f))
- (sqlite3:for-each-row
- (lambda (tid)
- ;; Actually this should not be needed as mystart cannot be simultaneously less than and test-id same as
- (if (not (equal? tid test-id))
- (set! res tid)))
- db
- "SELECT test_id FROM queue WHERE start_time > ?;" mystart)
- res))
+ (handle-exceptions
+ exn
+ (thread-sleep! 30)
+ (lock-queue:any-younger? db mystart test-id)
+ (let ((res #f))
+ (sqlite3:for-each-row
+ (lambda (tid)
+ ;; Actually this should not be needed as mystart cannot be simultaneously less than and test-id same as
+ (if (not (equal? tid test-id))
+ (set! res tid)))
+ db
+ "SELECT test_id FROM queue WHERE start_time > ?;" mystart)
+ res)))
(define (lock-queue:get-lock db test-id)
(let ((res #f)
(lckqry (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';"))
(mklckqry (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');")))
Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -8,11 +8,11 @@
;; PURPOSE.
;; (include "common.scm")
;; (include "megatest-version.scm")
-(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 format readline apropos json http-client) ;; (srfi 18) extras)
+(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 format readline apropos json http-client directory-utils) ;; (srfi 18) extras)
(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:))
;; (use zmq)
@@ -44,10 +44,13 @@
(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc")))
(if (file-exists? debugcontrolf)
(load debugcontrolf)))
+;; Disabled help items
+;; -rollup : (currently disabled) fill run (set by :runname) with latest test(s)
+;; from prior runs with same keys
(define help (conc "
Megatest, documentation at http://www.kiatoa.com/fossils/megatest
version " megatest-version "
license GPL, Copyright Matt Welland 2006-2012
@@ -62,14 +65,14 @@
-runtests tst1,tst2 ... : run tests
-remove-runs : remove the data for a run, requires :runname and -testpatt
Optionally use :state and :status
-set-state-status X,Y : set state to X and status to Y, requires controls per -remove-runs
-rerun FAIL,WARN... : force re-run for tests with specificed status(s)
- -rollup : (currently disabled) fill run (set by :runname) with latest test(s)
- from prior runs with same keys
-lock : lock run specified by target and runname
-unlock : unlock run specified by target and runname
+ -set-run-status status : sets status for run to status, requires -target and :runname
+ -get-run-status : gets status for run specified by target and runname
-run-wait : wait on run specified by target and runname
Selectors (e.g. use for -runtests, -remove-runs, -set-state-status, -list-runs etc.)
-target key1/key2/... : run for key1, key2, etc.
-reqtarg key1/key2/... : run for key1, key2, etc. but key1/key2 must be in runconfig
@@ -113,10 +116,11 @@
-show-runconfig : dump the internal representation of the runconfigs.config file
-dumpmode json : dump in json format instead of sexpr
-show-cmdinfo : dump the command info for a test (run in test environment)
Misc
+ -start-dir path : switch to this directory before running megatest
-rebuild-db : bring the database schema up to date
-cleanup-db : remove any orphan records, vacuum the db
-update-meta : update the tests metadata for all tests
-env2file fname : write the environment to fname.csh and fname.sh
-setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are
@@ -128,10 +132,11 @@
-stop-server id : stop server specified by id (see output of -list-servers), use
0 to kill all
-repl : start a repl (useful for extending megatest)
-load file.scm : load and run file.scm
-mark-incompletes : find and mark incomplete tests
+ -ping run-id|host:port : ping server, exit with 0 if found
Spreadsheet generation
-extract-ods fname.ods : extract an open document spreadsheet from the database
-pathmod path : insert path, i.e. path/runame/itempath/logfile.html
will clear the field if no rundir/testname/itempath/logfile
@@ -186,25 +191,28 @@
":value"
":expected"
":tol"
":units"
;; misc
+ "-start-dir"
"-server"
"-stop-server"
"-port"
"-extract-ods"
"-pathmod"
"-env2file"
"-setvars"
"-set-state-status"
+ "-set-run-status"
"-debug" ;; for *verbosity* > 2
"-gen-megatest-test"
"-override-timeout"
"-test-files" ;; -test-paths is for listing all
"-load" ;; load and exectute a scheme file
"-dumpmode"
"-run-id"
+ "-ping"
)
(list "-h"
"-version"
"-force"
"-xterm"
@@ -229,10 +237,12 @@
"-list-targets"
"-list-db-targets"
"-show-runconfig"
"-show-config"
"-show-cmdinfo"
+ "-get-run-status"
+
;; queries
"-test-paths" ;; get path(s) to a test, ordered by youngest first
"-runall" ;; run all tests
"-remove-runs"
@@ -256,10 +266,17 @@
(if (args:get-arg "-h")
(begin
(print help)
(exit)))
+
+(if (args:get-arg "-start-dir")
+ (if (file-exists? (args:get-arg "-start-dir"))
+ (change-directory (args:get-arg "-start-dir"))
+ (begin
+ (debug:print 0 "ERROR: non-existant start dir " (args:get-arg "-start-dir") " specified, exiting.")
+ (exit 1))))
(if (args:get-arg "-version")
(begin
(print megatest-version)
(exit)))
@@ -325,10 +342,38 @@
x
" => "))
(common:get-disks) )
"\n"))
(set! *didsomething* #t)))
+
+(if (args:get-arg "-ping")
+ (let* ((run-id (string->number (args:get-arg "-run-id")))
+ (host-port (let ((slst (string-split (args:get-arg "-ping") ":")))
+ (if (eq? (length slst) 2)
+ (list (car slst)(string->number (cadr slst)))
+ #f)))
+ (toppath (setup-for-run)))
+ (if (not run-id)
+ (begin
+ (debug:print 0 "ERROR: must specify run-id when doing ping, -run-id n")
+ (print "ERROR: No run-id")
+ (exit 1))
+ (if (not host-port)
+ (begin
+ (debug:print 0 "ERROR: argument to -ping is host:port, got " (args:get-arg "-ping"))
+ (print "ERROR: bad host:port")
+ (exit 1))
+ (let* ((server-dat (http-transport:client-connect (car host-port)(cadr host-port)))
+ (login-res (rmt:login-no-auto-client-setup server-dat run-id)))
+ (if (and (list? login-res)
+ (car login-res))
+ (begin
+ (print "LOGIN_OK")
+ (exit 0))
+ (begin
+ (print "LOGIN_FAILED")
+ (exit 1))))))))
;;======================================================================
;; Start the server - can be done in conjunction with -runall or -runtests (one day...)
;; we start the server if not running else start the client thread
;;======================================================================
@@ -453,33 +498,38 @@
(read-config "runconfigs.config" #f #t sections: sections))))
data))
(if (args:get-arg "-show-runconfig")
- (let ((data (full-runconfigs-read)))
- ;; keep this one local
- (cond
- ((not (args:get-arg "-dumpmode"))
- (pp (hash-table->alist data)))
- ((string=? (args:get-arg "-dumpmode") "json")
+ (let ((tl (setup-for-run)))
+ (push-directory *toppath*)
+ (let ((data (full-runconfigs-read)))
+ ;; keep this one local
+ (cond
+ ((not (args:get-arg "-dumpmode"))
+ (pp (hash-table->alist data)))
+ ((string=? (args:get-arg "-dumpmode") "json")
(json-write data))
- (else
- (debug:print 0 "ERROR: -dumpmode of " (args:get-arg "-dumpmode") " not recognised")))
- (set! *didsomething* #t)))
+ (else
+ (debug:print 0 "ERROR: -dumpmode of " (args:get-arg "-dumpmode") " not recognised")))
+ (set! *didsomething* #t))
+ (pop-directory)))
(if (args:get-arg "-show-config")
(let ((tl (setup-for-run))
(data *configdat*)) ;; (read-config "megatest.config" #f #t)))
+ (push-directory *toppath*)
;; keep this one local
(cond
((not (args:get-arg "-dumpmode"))
(pp (hash-table->alist data)))
((string=? (args:get-arg "-dumpmode") "json")
(json-write data))
(else
(debug:print 0 "ERROR: -dumpmode of " (args:get-arg "-dumpmode") " not recognised")))
- (set! *didsomething* #t)))
+ (set! *didsomething* #t)
+ (pop-directory)))
(if (args:get-arg "-show-cmdinfo")
(if (getenv "MT_CMDINFO")
(let ((data (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))))
(if (equal? (args:get-arg "-dumpmode") "json")
@@ -534,10 +584,31 @@
(general-run-call
"-set-state-status"
"set state and status"
(lambda (target runname keys keyvals)
(operate-on 'set-state-status))))
+
+(if (or (args:get-arg "-set-run-status")
+ (args:get-arg "-get-run-status"))
+ (general-run-call
+ "-set-run-status"
+ "set run status"
+ (lambda (target runname keys keyvals)
+ (let* ((runsdat (cdb:remote-run db:get-runs-by-patt #f keys runname (or (args:get-arg "-target")
+ (args:get-arg "-reqtarg")) #f #f))
+ (header (vector-ref runsdat 0))
+ (rows (vector-ref runsdat 1)))
+ (if (null? rows)
+ (begin
+ (debug:print-info 0 "No matching run found.")
+ (exit 1))
+ (let* ((row (car (vector-ref runsdat 1)))
+ (run-id (db:get-value-by-header row header "id")))
+ (if (args:get-arg "-set-run-status")
+ (cdb:remote-run db:set-run-status #f run-id (args:get-arg "-set-run-status") msg: (args:get-arg "-m"))
+ (print (open-run-close db:get-run-status #f run-id))
+ )))))))
;;======================================================================
;; Query runs
;;======================================================================
@@ -738,11 +809,10 @@
(state (args:get-arg ":state"))
(status (args:get-arg ":status"))
(target (args:get-arg "-target"))
(toppath (assoc/default 'toppath cmdinfo)))
(change-directory toppath)
- ;; (set! *runremote* runremote)
(if (not target)
(begin
(debug:print 0 "ERROR: -target is required.")
(exit 1)))
(if (not (setup-for-run))
@@ -786,11 +856,10 @@
(itemdat (assoc/default 'itemdat cmdinfo))
(state (args:get-arg ":state"))
(status (args:get-arg ":status"))
(target (args:get-arg "-target")))
(change-directory testpath)
- ;; (set! *runremote* runremote)
(if (not target)
(begin
(debug:print 0 "ERROR: -target is required.")
(exit 1)))
(if (not (setup-for-run))
@@ -866,11 +935,10 @@
(test-id (assoc/default 'test-id cmdinfo))
(itemdat (assoc/default 'itemdat cmdinfo))
(work-area (assoc/default 'work-area cmdinfo))
(db #f))
(change-directory testpath)
- ;; (set! *runremote* runremote)
(if (not (setup-for-run))
(begin
(debug:print 0 "Failed to setup, exiting")
(exit 1)))
(if (and state status)
@@ -915,11 +983,10 @@
(itemdat (assoc/default 'itemdat cmdinfo))
(work-area (assoc/default 'work-area cmdinfo))
(db #f) ;; (open-db))
(state (args:get-arg ":state"))
(status (args:get-arg ":status")))
- ;; (set! *runremote* runremote)
(if (not (setup-for-run))
(begin
(debug:print 0 "Failed to setup, exiting")
(exit 1)))
@@ -932,18 +999,17 @@
;; has sub commands that are rdb:
;; DO NOT put this one into either cdb:remote-run or open-run-close
(tdb:load-test-data run-id test-id))
(if (args:get-arg "-setlog")
(let ((logfname (args:get-arg "-setlog")))
- ;; (cdb:test-set-log! *runremote* test-id (sdb:qry 'getid logfname))))
(rmt:test-set-log! run-id test-id logfname)))
(if (args:get-arg "-set-toplog")
;; DO NOT run remote
(tests:test-set-toplog! run-id test-name (args:get-arg "-set-toplog")))
(if (args:get-arg "-summarize-items")
;; DO NOT run remote
- (tests:summarize-items db run-id test-id test-name #t)) ;; do force here
+ (tests:summarize-items run-id test-id test-name #t)) ;; do force here
(if (args:get-arg "-runstep")
(if (null? remargs)
(begin
(debug:print 0 "ERROR: nothing specified to run!")
(if db (sqlite3:finalize! db))
@@ -978,11 +1044,10 @@
(debug:print-info 2 "running \"" cmd "\"")
(change-directory startingdir)
(set! exitstat (system cmd))
(set! *globalexitstatus* exitstat) ;; no necessary
(change-directory testpath)
- ;; (cdb:test-set-log! *runremote* test-id (sdb:qry 'getid htmllogfile))))
(rmt:test-set-log! run-id test-id htmllogfile)))
(let ((msg (args:get-arg "-m")))
(rmt:teststep-set-status! run-id test-id stepname "end" exitstat msg logfile))
)))
(if (or (args:get-arg "-test-status")
@@ -1165,11 +1230,11 @@
(if (args:get-arg "-import-megatest.db")
(let* ((toppath (setup-for-run))
(dbstruct (if toppath (make-dbr:dbstruct path: toppath) #f))
(mtdb (if toppath (db:open-megatest-db)))
- (run-ids (if toppath (db:get-run-ids mtdb))))
+ (run-ids (if toppath (db:get-all-run-ids mtdb))))
;; sync runs, test_meta etc.
(db:sync-tables (db:sync-main-list mtdb) mtdb (db:get-db dbstruct #f))
(for-each
(lambda (run-id)
(let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id)))
@@ -1185,23 +1250,13 @@
;; Exit and clean up
;;======================================================================
(if *runremote* (close-all-connections!))
-;; this is the socket if we are a client
-;; (if (and *runremote*
-;; (socket? *runremote*))
-;; (close-socket *runremote*))
-
-;; (if sdb:qry (sdb:qry 'finalize #f))
-;; (if *fdb* (filedb:finalize-db! *fdb*))
-
(if (not *didsomething*)
(debug:print 0 help))
-;; (if *runremote* (rpc:close-all-connections!))
-
(if (not (eq? *globalexitstatus* 0))
(if (or (args:get-arg "-runtests")(args:get-arg "-runall"))
(begin
(debug:print 0 "NOTE: Subprocesses with non-zero exit code detected: " *globalexitstatus*)
(exit 0))
ADDED minimal/megatest.config
Index: minimal/megatest.config
==================================================================
--- /dev/null
+++ minimal/megatest.config
@@ -0,0 +1,12 @@
+[fields]
+RUNTYPE text
+
+[setup]
+linktree #{getenv PWD}/linktree
+max_concurrent_jobs 20
+
+[jobtools]
+launcher nbfake
+
+[disks]
+disk0 #{getenv PWD}/runs
ADDED minimal/runconfigs.config
Index: minimal/runconfigs.config
==================================================================
--- /dev/null
+++ minimal/runconfigs.config
@@ -0,0 +1,3 @@
+[default]
+EXAMPLEVAR 1
+
ADDED minimal/tests/tmpspace/testconfig
Index: minimal/tests/tmpspace/testconfig
==================================================================
--- /dev/null
+++ minimal/tests/tmpspace/testconfig
@@ -0,0 +1,27 @@
+[ezsteps]
+
+df [ `df -m /tmp | grep /tmp | awk '{print $3}'` -gt 200000 ]
+
+[items]
+TARGETHOST chlr10722 \
+ chlr15003 \
+ chlr13406 \
+ chlr12539 \
+ chlr12713 \
+ chlr11407 \
+ chlr14713 \
+ chlr11440 \
+ chlr11417 \
+ chlr14709 \
+ chlr11722 \
+ chlr11445 \
+ chlr11421 \
+ chlr11404
+
+[test_meta]
+author mrwellan
+owner mrwellan
+description Check for available space in /tmp
+tags Utility
+reviewed ww50.3
+
ADDED oldsrc/fs-transport.scm
Index: oldsrc/fs-transport.scm
==================================================================
--- /dev/null
+++ oldsrc/fs-transport.scm
@@ -0,0 +1,44 @@
+
+;; Copyright 2006-2012, Matthew Welland.
+;;
+;; This program is made available under the GNU GPL version 2.0 or
+;; greater. See the accompanying file COPYING for details.
+;;
+;; This program is distributed WITHOUT ANY WARRANTY; without even the
+;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+;; PURPOSE.
+
+(require-extension (srfi 18) extras tcp s11n)
+
+(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
+(import (prefix sqlite3 sqlite3:))
+
+(use spiffy uri-common intarweb http-client spiffy-request-vars)
+
+(tcp-buffer-size 2048)
+
+(declare (unit fs-transport))
+
+(declare (uses common))
+(declare (uses db))
+(declare (uses tests))
+(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
+
+(include "common_records.scm")
+(include "db_records.scm")
+
+
+;;======================================================================
+;; F S T R A N S P O R T S E R V E R
+;;======================================================================
+
+;; There is no "server" per se but a convience routine to make it non
+;; necessary to be reopening the db over and over again.
+;;
+
+(define (fs:process-queue-item packet)
+ (if (not *megatest-db*) ;; we will require that (setup-for-run) has already been called
+ (set! *megatest-db* (open-db)))
+ (debug:print-info 11 "fs:process-queue-item called with packet=" packet)
+ (db:process-queue-item *megatest-db* packet))
+
ADDED oldsrc/zmq-transport.scm
Index: oldsrc/zmq-transport.scm
==================================================================
--- /dev/null
+++ oldsrc/zmq-transport.scm
@@ -0,0 +1,494 @@
+;;======================================================================
+;; Copyright 2006-2012, Matthew Welland.
+;;
+;; This program is made available under the GNU GPL version 2.0 or
+;; greater. See the accompanying file COPYING for details.
+;;
+;; This program is distributed WITHOUT ANY WARRANTY; without even the
+;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+;; PURPOSE.
+;;======================================================================
+
+(require-extension (srfi 18) extras tcp s11n)
+
+(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
+(import (prefix sqlite3 sqlite3:))
+
+(use zmq)
+
+(declare (unit zmq-transport))
+
+(declare (uses common))
+(declare (uses db))
+(declare (uses tests))
+(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
+(declare (uses server))
+
+(include "common_records.scm")
+(include "db_records.scm")
+
+;; Transition to pub --> sub with pull <-- push
+;;
+;; 1. client sends request to server via push to the pull port
+;; 2. server puts request in queue or processes immediately as appropriate
+;; 3. server puts responses from completed requests into pub port
+;;
+;; TODO
+;;
+;; Done Tested
+;; [x] [ ] 1. Add columns pullport pubport to servers table
+;; [x] [ ] 2. Add rm of monitor.db if older than 11/12/2012
+;; [x] [ ] 3. Add create of pullport and pubport with finding of available ports
+;; [x] [ ] 4. Add client compose of request
+;; [x] [ ] - name of client: testname/itempath-test_id-hostname
+;; [x] [ ] - name of request: callname, params
+;; [x] [ ] - request key: f(clientname, callname, params)
+;; [x] [ ] 5. Add processing of subscription hits
+;; [x] [ ] - done when get key
+;; [x] [ ] - return results
+;; [x] [ ] 6. Add timeout processing
+;; [x] [ ] - after 60 seconds
+;; [ ] [ ] i. check server alive, connect to new if necessary
+;; [ ] [ ] ii. resend request
+;; [ ] [ ] 7. Turn self ping back on
+
+(define (zmq-transport:make-server-url hostport)
+ (if (not hostport)
+ #f
+ (conc "tcp://" (car hostport) ":" (cadr hostport))))
+
+(define *server-loop-heart-beat* (current-seconds))
+(define *heartbeat-mutex* (make-mutex))
+
+;;======================================================================
+;; S E R V E R
+;;======================================================================
+
+(define-inline (zmqsock:get-pub dat)(vector-ref dat 0))
+(define-inline (zmqsock:get-pull dat)(vector-ref dat 1))
+(define-inline (zmqsock:set-pub! dat s)(vector-set! dat s 0))
+(define-inline (zmqsock:set-pull! dat s)(vector-set! dat s 0))
+
+(define (zmq-transport:run hostn)
+ (debug:print 2 "Attempting to start the server ...")
+ (if (not *toppath*)
+ (if (not (setup-for-run))
+ (begin
+ (debug:print 0 "ERROR: cannot find megatest.config, cannot start server, exiting")
+ (exit))))
+ (let* ((db (open-db)) ;; here we *do not* want to be opening and closing the db
+ (zmq-sdat1 #f)
+ (zmq-sdat2 #f)
+ (pull-socket #f)
+ (pub-socket #f)
+ (p1 #f)
+ (p2 #f)
+ (zmq-sockets-dat #f)
+ (iface (if (string=? "-" hostn)
+ "*" ;; (get-host-name)
+ hostn))
+ (hostname (get-host-name))
+ (ipaddrstr (let ((ipstr (if (string=? "-" hostn)
+ (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
+ #f)))
+ (if ipstr ipstr hostname)))
+ (last-run 0))
+ (set! zmq-sockets-dat (zmq-transport:setup-ports ipaddrstr (if (args:get-arg "-port")
+ (string->number (args:get-arg "-port"))
+ (+ 5000 (random 1001)))))
+
+ (set! zmq-sdat1 (car zmq-sockets-dat))
+ (set! pull-socket (cadr zmq-sdat1)) ;; (iface s port)
+ (set! p1 (caddr zmq-sdat1))
+
+ (set! zmq-sdat2 (cadr zmq-sockets-dat))
+ (set! pub-socket (cadr zmq-sdat2))
+ (set! p2 (caddr zmq-sdat2))
+
+ (set! *cache-on* #t)
+
+ (set! *runremote* (vector pull-socket pub-socket)) ;; overloading the use of *runremote* BUG!?
+
+ ;; what to do when we quit
+ ;;
+;; (on-exit (lambda ()
+;; (if (and *toppath* *server-info*)
+;; (open-run-close tasks:server-deregister-self tasks:open-db (car *server-info*))
+;; (let loop ()
+;; (let ((queue-len 0))
+;; (thread-sleep! (random 5))
+;; (mutex-lock! *incoming-mutex*)
+;; (set! queue-len (length *incoming-data*))
+;; (mutex-unlock! *incoming-mutex*)
+;; (if (> queue-len 0)
+;; (begin
+;; (debug:print-info 0 "Queue not flushed, waiting ...")
+;; (loop))))))))
+
+ ;; The heavy lifting
+ ;;
+ ;; make-vector-record cdb packet client-sig qtype immediate query-sig params qtime
+ ;;
+ (debug:print-info 11 "Server setup complete, start listening for messages")
+ (let loop ((queue-lst '()))
+ (let* ((rawmsg (receive-message* pull-socket))
+ (packet (db:string->obj rawmsg))
+ (qtype (cdb:packet-get-qtype packet)))
+ (debug:print-info 12 "server=> received packet=" packet)
+ (if (not (member qtype '(sync ping)))
+ (begin
+ (mutex-lock! *heartbeat-mutex*)
+ (set! *last-db-access* (current-seconds))
+ (mutex-unlock! *heartbeat-mutex*)))
+ (if #t ;; (cdb:packet-get-immediate packet) ;; process immediately or put in queue
+ (begin
+ (db:process-queue-item db packet)
+ ;; (open-run-close db:process-queue #f pub-socket (cons packet queue-lst))
+
+ (loop '()))
+ (loop (cons packet queue-lst)))))))
+
+;; run zmq-transport:keep-running in a parallel thread to monitor that the db is being
+;; used and to shutdown after sometime if it is not.
+;;
+(define (zmq-transport:keep-running)
+ ;; if none running or if > 20 seconds since
+ ;; server last used then start shutdown
+ ;; This thread waits for the server to come alive
+ (let* ((server-info (let loop ()
+ (let ((sdat #f))
+ (mutex-lock! *heartbeat-mutex*)
+ (set! sdat *server-info*)
+ (mutex-unlock! *heartbeat-mutex*)
+ (if sdat sdat
+ (begin
+ (debug:print 12 "WARNING: server not started yet, waiting few seconds before trying again")
+ (sleep 4)
+ (loop))))))
+ (iface (cadr server-info))
+ (pullport (caddr server-info))
+ (pubport (cadddr server-info)) ;; id interface pullport pubport)
+ ;; (zmq-sockets (zmq-transport:client-connect iface pullport pubport))
+ (last-access 0))
+ (debug:print-info 11 "heartbeat started for zmq server on " iface " " pullport " " pubport)
+ (let loop ((count 0))
+ (thread-sleep! 4) ;; no need to do this very often
+ ;; NB// sync currently does NOT return queue-length
+ ;; GET REAL QUEUE LENGTH FROM THE VARIABLE
+ (let ((queue-len 0)) ;; FOR NOW DO NOT DO THIS (cdb:client-call zmq-sockets 'sync #t 1)))
+ ;; (print "Server running, count is " count)
+ (if (< count 1) ;; 3x3 = 9 secs aprox
+ (loop (+ count 1)))
+
+ ;; NOTE: Get rid of this mechanism! It really is not needed...
+ (open-run-close tasks:server-update-heartbeat tasks:open-db (car server-info))
+
+ ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access
+ (mutex-lock! *heartbeat-mutex*)
+ (set! last-access *last-db-access*)
+ (mutex-unlock! *heartbeat-mutex*)
+ (if (> (+ last-access
+ ;; (* 50 60 60) ;; 48 hrs
+ ;; 60 ;; one minute
+ ;; (* 60 60) ;; one hour
+ (* 45 60) ;; 45 minutes, until the db deletion bug is fixed.
+ )
+ (current-seconds))
+ (begin
+ (debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
+ (loop 0))
+ (begin
+ (debug:print-info 0 "Starting to shutdown the server.")
+ ;; need to delete only *my* server entry (future use)
+ (set! *time-to-exit* #t)
+ (open-run-close tasks:server-deregister-self tasks:open-db (get-host-name))
+ (thread-sleep! 1)
+ (debug:print-info 0 "Max cached queries was " *max-cache-size*)
+ (debug:print-info 0 "Server shutdown complete. Exiting")
+ (exit)))))))
+
+(define (zmq-transport:find-free-port-and-open iface s port stype #!key (trynum 50))
+ (let ((s (if s s (make-socket stype)))
+ (p (if (number? port) port 5555))
+ (old-handler (current-exception-handler)))
+ (handle-exceptions
+ exn
+ (begin
+ (debug:print 0 "Failed to bind to port " p ", trying next port")
+ (debug:print 0 " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn))
+ ;; (old-handler)
+ ;; (print-call-chain)
+ (if (> trynum 0)
+ (zmq-transport:find-free-port-and-open iface s (+ p 1) trynum: (- trynum 1))
+ (debug:print-info 0 "Tried ports up to " p
+ " but all were in use. Please try a different port range by starting the server with parameter \" -port N\" where N is the starting port number to use"))
+ (exit)) ;; To exit or not? That is the question.
+ (let ((zmq-url (conc "tcp://" iface ":" p)))
+ (debug:print 2 "Trying to start server on " zmq-url)
+ (bind-socket s zmq-url)
+ (list iface s port)))))
+
+(define (zmq-transport:setup-ports ipaddrstr startport)
+ (let* ((s1 (zmq-transport:find-free-port-and-open ipaddrstr #f startport 'pull))
+ (p1 (caddr s1))
+ (s2 (zmq-transport:find-free-port-and-open ipaddrstr #f (+ 1 (if p1 p1 (+ startport 1))) 'pub))
+ (p2 (caddr s2)))
+ (set! *runremote* #f)
+ (debug:print 0 "Server started on " ipaddrstr " ports " p1 " and " p2)
+ (mutex-lock! *heartbeat-mutex*)
+ (set! *server-info* (open-run-close tasks:server-register
+ tasks:open-db
+ (current-process-id)
+ ipaddrstr p1
+ 0
+ 'live
+ 'zmq
+ pubport: p2))
+ (debug:print-info 11 "*server-info* set to " *server-info*)
+ (mutex-unlock! *heartbeat-mutex*)
+ (list s1 s2)))
+
+(define (zmq-transport:mk-signature)
+ (message-digest-string (md5-primitive)
+ (with-output-to-string
+ (lambda ()
+ (write (list (current-directory)
+ (argv)))))))
+
+;;======================================================================
+;; S E R V E R U T I L I T I E S
+;;======================================================================
+
+;;======================================================================
+;; C L I E N T S
+;;======================================================================
+
+;;
+(define (zmq-transport:client-socket-connect iface port #!key (context #f)(type 'req)(subscriptions '()))
+ (debug:print-info 3 "client-connect " iface ":" port ", type=" type ", subscriptions=" subscriptions)
+ (let ((connect-ok #f)
+ (zmq-socket (if context
+ (make-socket type context)
+ (make-socket type)))
+ (conurl (zmq-transport:make-server-url (list iface port))))
+ (if (socket? zmq-socket)
+ (begin
+ ;; first apply subscriptions
+ (for-each (lambda (subscription)
+ (debug:print 2 "Subscribing to " subscription)
+ (socket-option-set! zmq-socket 'subscribe subscription))
+ subscriptions)
+ (connect-socket zmq-socket conurl)
+ zmq-socket)
+ (begin
+ (debug:print 0 "ERROR: Failed to open socket to " conurl)
+ #f))))
+
+(define (zmq-transport:client-connect iface pullport pubport)
+ (let* ((push-socket (zmq-transport:client-socket-connect iface pullport type: 'push))
+ (sub-socket (zmq-transport:client-socket-connect iface pubport
+ type: 'sub
+ subscriptions: (list (client:get-signature) "all")))
+ (zmq-sockets (vector push-socket sub-socket))
+ (login-res #f))
+ (debug:print-info 11 "zmq-transport:client-connect started. Next is login")
+ (set! login-res (client:login serverdat zmq-sockets))
+ (if (and (not (null? login-res))
+ (car login-res))
+ (begin
+ (debug:print-info 2 "Logged in and connected to " iface ":" pullport "/" pubport ".")
+ (set! *runremote* zmq-sockets)
+ zmq-sockets)
+ (begin
+ (debug:print-info 2 "Failed to login or connect to " conurl)
+ (set! *runremote* #f)
+ #f))))
+
+;; run zmq-transport:keep-running in a parallel thread to monitor that the db is being
+;; used and to shutdown after sometime if it is not.
+;;
+(define (zmq-transport:keep-running)
+ ;; if none running or if > 20 seconds since
+ ;; server last used then start shutdown
+ ;; This thread waits for the server to come alive
+ (let* ((server-info (let loop ()
+ (let ((sdat #f))
+ (mutex-lock! *heartbeat-mutex*)
+ (set! sdat *runremote*)
+ (mutex-unlock! *heartbeat-mutex*)
+ (if sdat sdat
+ (begin
+ (sleep 4)
+ (loop))))))
+ (iface (car server-info))
+ (port (cadr server-info))
+ (last-access 0)
+ (tdb (tasks:open-db))
+ (spid (tasks:server-get-server-id tdb #f iface port #f)))
+ (print "Keep-running got server pid " spid ", using iface " iface " and port " port)
+ (let loop ((count 0))
+ (thread-sleep! 4) ;; no need to do this very often
+ ;; NB// sync currently does NOT return queue-length
+ (let () ;; (queue-len (cdb:client-call server-info 'sync #t 1)))
+ ;; (print "Server running, count is " count)
+ (if (< count 1) ;; 3x3 = 9 secs aprox
+ (loop (+ count 1)))
+
+ ;; NOTE: Get rid of this mechanism! It really is not needed...
+ (tasks:server-update-heartbeat tdb spid)
+
+ ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access
+ (mutex-lock! *heartbeat-mutex*)
+ (set! last-access *last-db-access*)
+ (mutex-unlock! *heartbeat-mutex*)
+ (if (> (+ last-access
+ ;; (* 50 60 60) ;; 48 hrs
+ ;; 60 ;; one minute
+ ;; (* 60 60) ;; one hour
+ (* 45 60) ;; 45 minutes, until the db deletion bug is fixed.
+ )
+ (current-seconds))
+ (begin
+ (debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
+ (loop 0))
+ (begin
+ (debug:print-info 0 "Starting to shutdown the server.")
+ ;; need to delete only *my* server entry (future use)
+ (set! *time-to-exit* #t)
+ (tasks:server-deregister-self tdb (get-host-name))
+ (thread-sleep! 1)
+ (debug:print-info 0 "Max cached queries was " *max-cache-size*)
+ (debug:print-info 0 "Server shutdown complete. Exiting")
+ (exit)))))))
+
+;; all routes though here end in exit ...
+(define (zmq-transport:launch)
+ (if (not *toppath*)
+ (if (not (setup-for-run))
+ (begin
+ (debug:print 0 "ERROR: cannot find megatest.config, exiting")
+ (exit))))
+ (debug:print-info 2 "Starting zmq server")
+ (if *toppath*
+ (let* (;; (th1 (make-thread (lambda ()
+ ;; (let ((server-info #f))
+ ;; ;; wait for the server to be online and available
+ ;; (let loop ()
+ ;; (debug:print-info 2 "Waiting for the server to come online before starting heartbeat")
+ ;; (thread-sleep! 2)
+ ;; (mutex-lock! *heartbeat-mutex*)
+ ;; (set! server-info *server-info* )
+ ;; (mutex-unlock! *heartbeat-mutex*)
+ ;; (if (not server-info)(loop)))
+ ;; (debug:print 2 "Server alive, starting self-ping")
+ ;; (zmq-transport:self-ping server-info)
+ ;; ))
+ ;; "Self ping"))
+ (th2 (make-thread (lambda ()
+ (zmq-transport:run
+ (if (args:get-arg "-server")
+ (args:get-arg "-server")
+ "-"))) "Server run"))
+ ;; (th3 (make-thread (lambda ()(zmq-transport:keep-running)) "Keep running"))
+ )
+ (set! *client-non-blocking-mode* #t)
+ ;; (thread-start! th1)
+ (thread-start! th2)
+ ;; (thread-start! th3)
+ (set! *didsomething* #t)
+ ;; (thread-join! th3)
+ (thread-join! th2)
+ )
+ (debug:print 0 "ERROR: Failed to setup for megatest")))
+
+(define (zmq-transport:client-signal-handler signum)
+ (handle-exceptions
+ exn
+ (debug:print " ... exiting ...")
+ (let ((th1 (make-thread (lambda ()
+ (if (not *received-response*)
+ (receive-message* *runremote*))) ;; flush out last call if applicable
+ "eat response"))
+ (th2 (make-thread (lambda ()
+ (debug:print 0 "ERROR: Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.")
+ (thread-sleep! 3) ;; give the flush three seconds to do it's stuff
+ (debug:print 0 " Done.")
+ (exit 4))
+ "exit on ^C timer")))
+ (thread-start! th2)
+ (thread-start! th1)
+ (thread-join! th2))))
+
+(define (zmq-transport:client-launch)
+ (set-signal-handler! signal/int zmq-transport:client-signal-handler)
+ (if (zmq-transport:client-setup)
+ (debug:print-info 2 "connected as client")
+ (begin
+ (debug:print 0 "ERROR: Failed to connect as client")
+ (exit))))
+
+;;======================================================================
+;; Defunct functions
+;;======================================================================
+
+;; ping a server and return number of clients or #f (if no response)
+;; NOT IN USE!
+(define (zmq-transport:ping host port #!key (secs 10)(return-socket #f))
+ (cdb:use-non-blocking-mode
+ (lambda ()
+ (let* ((res #f)
+ (th1 (make-thread
+ (lambda ()
+ (let* ((zmq-context (make-context 1))
+ (zmq-socket (zmq-transport:client-connect host port context: zmq-context)))
+ (if zmq-socket
+ (if (zmq-transport:client-login zmq-socket)
+ (let ((numclients (cdb:num-clients zmq-socket)))
+ (if (not return-socket)
+ (begin
+ (zmq-transport:client-logout zmq-socket)
+ (close-socket zmq-socket)))
+ (set! res (list #t numclients (if return-socket zmq-socket #f))))
+ (begin
+ ;; (close-socket zmq-socket)
+ (set! res (list #f "CAN'T LOGIN" #f))))
+ (set! res (list #f "CAN'T CONNECT" #f)))))
+ "Ping: th1"))
+ (th2 (make-thread
+ (lambda ()
+ (let loop ((count 1))
+ (debug:print-info 1 "Ping " count " server on " host " at port " port)
+ (thread-sleep! 2)
+ (if (< count (/ secs 2))
+ (loop (+ count 1))))
+ ;; (thread-terminate! th1)
+ (set! res (list #f "TIMED OUT" #f)))
+ "Ping: th2")))
+ (thread-start! th2)
+ (thread-start! th1)
+ (handle-exceptions
+ exn
+ (set! res (list #f "TIMED OUT" #f))
+ (thread-join! th1 secs))
+ res))))
+
+;; (define (zmq-transport:self-ping server-info)
+;; ;; server-info: server-id interface pullport pubport
+;; (let ((iface (list-ref server-info 1))
+;; (pullport (list-ref server-info 2))
+;; (pubport (list-ref server-info 3)))
+;; (zmq-transport:client-connect iface pullport pubport)
+;; (let loop ()
+;; (thread-sleep! 2)
+;; (cdb:client-call *runremote* 'ping #t)
+;; (debug:print 4 "zmq-transport:self-ping - I'm alive on " iface ":" pullport "/" pubport "!")
+;; (mutex-lock! *heartbeat-mutex*)
+;; (set! *server-loop-heart-beat* (current-seconds))
+;; (mutex-unlock! *heartbeat-mutex*)
+;; (loop))))
+
+(define (zmq-transport:reply pubsock target query-sig success/fail result)
+ (debug:print-info 11 "zmq-transport:reply target=" target ", result=" result)
+ (send-message pubsock target send-more: #t)
+ (send-message pubsock (db:obj->string (vector success/fail query-sig result))))
+
Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -41,21 +41,22 @@
(let* ((run-id (if rid rid 0))
(connection-info (let ((cinfo (hash-table-ref/default *runremote* run-id #f)))
(if cinfo
cinfo
(let loop ((numtries 100))
- (thread-sleep! 1)
(let ((res (client:setup run-id)))
(if res
(hash-table-ref *runremote* run-id) ;; client:setup filled this in (hopefully)
(if (> numtries 0)
- (loop (- numtries 1))
+ (begin
+ (thread-sleep! 10)
+ (loop (- numtries 1)))
(begin
(debug:print 0 "ERROR: 100 tries and no server, giving up")
(exit 1)))))))))
(jparams (db:obj->string params))
- (res (http-transport:client-api-send-receive run-id connection-info cmd jparams)))
+ (res (http-transport:client-api-send-receive run-id connection-info cmd jparams)))
(if res
(db:string->obj res) ;; (rmt:json-str->dat res)
(let ((new-connection-info (client:setup run-id)))
(debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.")
(rmt:send-receive cmd run-id params)))))
@@ -83,10 +84,20 @@
;;======================================================================
;;
;; A C T U A L A P I C A L L S
;;
;;======================================================================
+
+;;======================================================================
+;; S E R V E R
+;;======================================================================
+
+(define (rmt:kill-server run-id)
+ (rmt:send-receive 'kill-server run-id (list run-id)))
+
+(define (rmt:start-server run-id)
+ (rmt:send-receive 'start-server 0 (list run-id)))
;;======================================================================
;; M I S C
;;======================================================================
@@ -96,13 +107,10 @@
;; This login does no retries under the hood - it acts a bit like a ping.
;;
(define (rmt:login-no-auto-client-setup connection-info run-id)
(rmt:send-receive-no-auto-client-setup connection-info 'login run-id (list *toppath* megatest-version run-id *my-client-signature*)))
-(define (rmt:kill-server run-id)
- (rmt:send-receive 'kill-server run-id (list run-id)))
-
;; hand off a call to one of the db:queries statements
;; added run-id to make looking up the correct db possible
;;
(define (rmt:general-call stmtname run-id . params)
(rmt:send-receive 'general-call run-id (append (list stmtname run-id) params)))
@@ -120,12 +128,12 @@
;;======================================================================
;; K E Y S
;;======================================================================
-;; These should not require run-id but it is more consistent to have it.
-;; run-id can theoretically be #f but how to handle that is not yet done.
+;; These require run-id because the values come from the run!
+;;
(define (rmt:get-key-val-pairs run-id)
(rmt:send-receive 'get-key-val-pairs run-id (list run-id)))
(define (rmt:get-keys)
(rmt:send-receive 'get-keys #f '()))
@@ -182,12 +190,12 @@
(rmt:send-receive 'delete-test-records run-id (list run-id test-id)))
(define (rmt:test-set-status-state run-id test-id status state msg)
(rmt:send-receive 'test-set-status-state run-id (list run-id test-id status state msg)))
-(define (rmt:get-previous-test-run-record run-id test-name item-path)
- (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path)))
+;; (define (rmt:get-previous-test-run-record run-id test-name item-path)
+;; (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path)))
(define (rmt:get-matching-previous-test-run-records run-id test-name item-path)
(rmt:send-receive 'get-matching-previous-test-run-records run-id (list run-id test-name item-path)))
(define (rmt:test-get-logfile-info run-id test-name)
@@ -200,17 +208,21 @@
(rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id)))
(define (rmt:test-set-log! run-id test-id logf)
(if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id)))
+(define (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)
+ (rmt:send-receive 'get-run-ids-matching-target #f (list keynames target res runname testpatt statepatt statuspatt)))
+
;; NOTE: This will open and access ALL run databases.
;;
(define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname)
- (let ((run-ids (rmt:get-all-run-ids))) ;; (rmt:get-run-ids-matching keynames target res)))
- (apply append (lambda (run-id)
- (rmt:send-receive 'test-get-paths-matching-keynames-target-new (list keynames target res testpatt statepatt statuspatt runname)))
- run-ids)))
+ (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)))
+ (apply append
+ (map (lambda (run-id)
+ (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname)))
+ run-ids))))
(define (rmt:get-run-ids-matching keynames target res)
(rmt:send-receive #f 'get-run-ids-matching (list keynames target res)))
(define (rmt:get-prereqs-not-met run-id waitons ref-item-path #!key (mode 'normal))
@@ -260,18 +272,52 @@
(rmt:send-receive 'get-runs #f (list runpatt count offset keypatts)))
(define (rmt:get-all-run-ids)
(rmt:send-receive 'get-all-run-ids #f '()))
+(define (rmt:get-prev-run-ids run-id)
+ (rmt:send-receive 'get-prev-run-ids #f (list run-id)))
+
(define (rmt:lock/unlock-run run-id lock unlock user)
(rmt:send-receive 'lock/unlock-run #f (list run-id lock unlock user)))
(define (rmt:update-run-event_time run-id)
(rmt:send-receive 'update-run-event_time #f (list run-id)))
(define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit)
(rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit)))
+
+;;======================================================================
+;; M U L T I R U N Q U E R I E S
+;;======================================================================
+
+;; get the previous record for when this test was run where all keys match but runname
+;; returns #f if no such test found, returns a single test record if found
+;;
+;; Run this at the client end since we have to connect to multiple run-id dbs
+;;
+(define (rmt:get-previous-test-run-record run-id test-name item-path)
+ (let* ((keyvals (rmt:get-key-val-pairs run-id))
+ (keys (rmt:get-keys))
+ (selstr (string-intersperse keys ","))
+ (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")))
+ (if (not keyvals)
+ #f
+ (let ((prev-run-ids (rmt:get-prev-run-ids run-id)))
+ ;; for each run starting with the most recent look to see if there is a matching test
+ ;; if found then return that matching test record
+ (debug:print 4 "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids)
+ (if (null? prev-run-ids) #f
+ (let loop ((hed (car prev-run-ids))
+ (tal (cdr prev-run-ids)))
+ (let ((results (rmt:get-tests-for-run hed (conc test-name "/" item-path) '() '() #f #f #f #f #f #f)))
+ (debug:print 4 "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results)
+ (if (and (null? results)
+ (not (null? tal)))
+ (loop (car tal)(cdr tal))
+ (if (null? results) #f
+ (car results))))))))))
;;======================================================================
;; S T E P S
;;======================================================================
Index: runconfig.scm
==================================================================
--- runconfig.scm
+++ runconfig.scm
@@ -29,11 +29,11 @@
(debug:print 4 "Using key=\"" thekey "\"")
(if change-env
(for-each ;; NB// This can be simplified with new content of keyvals having all that is needed.
(lambda (keyval)
- (setenv (car keyval)(cadr keyval)))
+ (safe-setenv (car keyval)(cadr keyval)))
keyvals))
(for-each
(lambda (section)
(let ((section-dat (hash-table-ref/default confdat section #f)))
@@ -43,11 +43,11 @@
(let ((val (cadr (assoc envvar section-dat))))
(hash-table-set! whatfound section (+ (hash-table-ref/default whatfound section 0) 1))
(if (and (string? envvar)
(string? val)
change-env)
- (setenv envvar val))
+ (safe-setenv envvar val))
(hash-table-set! finaldat envvar val)))
(map car section-dat)))))
sections)
(if already-seen
(begin
Index: runs.scm
==================================================================
--- runs.scm
+++ runs.scm
@@ -84,11 +84,11 @@
(exit 1)))
;; Now have runconfigs data loaded, set environment vars
(for-each (lambda (section)
(for-each (lambda (varval)
(set! envdat (append envdat (list varval)))
- (setenv (car varval)(cadr varval)))
+ (safe-setenv (car varval)(cadr varval)))
(configf:get-section runconfig section)))
(list "default" target))
(vector target runname testpatt keys keyvals envdat mconfig runconfig serverdat transport db toppath run-id)))
(define (set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f))
@@ -110,14 +110,11 @@
;; from the cached data set the vars
(hash-table-for-each
vals
(lambda (key val)
(debug:print 2 "setenv " key " " val)
- (if (and (string? key)
- (string? val))
- (setenv key val)
- (debug:print 0 "ERROR: Malformed environment variable definition: var=" var ", val=" val))))
+ (safe-setenv key val)))
(if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target))
(alist->env-vars (hash-table-ref/default *configdat* "env-override" '()))
;; Lets use this as an opportunity to put MT_RUNNAME in the environment
(let ((runname (if inrunname inrunname (rmt:get-run-name-from-id run-id))))
(if runname
@@ -1154,11 +1151,11 @@
(if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
(db:test-get-run_duration testdat)))
600) ;; i.e. no update for more than 600 seconds
(begin
(debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
- (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "Test is stuck or dead" #f))
+ (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "Test is stuck or dead" #f))
(debug:print 2 "NOTE: " test-name " is already running")))
(else
(debug:print 0 "ERROR: Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat))
(case (string->symbol (test:get-state testdat))
((COMPLETED INCOMPLETE)
Index: server.scm
==================================================================
--- server.scm
+++ server.scm
@@ -46,48 +46,19 @@
;; all routes though here end in exit ...
;;
;; start_server
;;
(define (server:launch run-id)
- ;; (if (server:check-if-running run-id)
- ;; a server is already running
- ;; (exit)
(http-transport:launch run-id))
-;; (define (server:launch-no-exit run-id)
-;; (if (server:check-if-running run-id)
-;; #t ;; if running
-;; (http-transport:launch run-id)))
-
;;======================================================================
;; Q U E U E M A N A G E M E N T
;;======================================================================
;; We don't want to flush the queue if it was just flushed
(define *server:last-write-flush* (current-milliseconds))
-;; Flush the queue every third of a second. Can we assume that setup-for-run
-;; has already been done?
-;; (define (server:write-queue-handler)
-;; (if (setup-for-run)
-;; (let ((db (open-db)))
-;; (let loop ()
-;; (let ((last-write-flush-time #f))
-;; (mutex-lock! *incoming-mutex*)
-;; (set! last-write-flush-time *server:last-write-flush*)
-;; (mutex-unlock! *incoming-mutex*)
-;; (if (> (- (current-milliseconds) last-write-flush-time) 10)
-;; (begin
-;; (mutex-lock! *db:process-queue-mutex*)
-;; (db:process-cached-writes db)
-;; (mutex-unlock! *db:process-queue-mutex*)
-;; (thread-sleep! 0.005))))
-;; (loop)))
-;; (begin
-;; (debug:print 0 "ERROR: failed to setup for Megatest in server:write-queue-handler")
-;; (exit 1))))
-
;;======================================================================
;; S E R V E R U T I L I T I E S
;;======================================================================
;; Generate a unique signature for this server
@@ -103,39 +74,73 @@
;; with spiffy or rpc this simply returns the return data to be returned
;;
(define (server:reply return-addr query-sig success/fail result)
(db:obj->string (vector success/fail query-sig result)))
-;; > file 2>&1
-(define (server:try-running run-id)
- (let* ((rand-name (random 100))
- (cmdln (conc (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest")
- " -server - -run-id " run-id " name=" rand-name " > " *toppath* "/db/" run-id
- ".log 2>&1 &")))
- ;; ".log &" )))
+;; Given a run id start a server process ### NOTE ### > file 2>&1
+;; if the run-id is zero and the target-host is set
+;; try running on that host
+;;
+(define (server:run run-id)
+ (let* ((target-host (configf:lookup *configdat* "server" "homehost" ))
+ (cmdln (conc (common:get-megatest-exe)
+ " -server - -run-id " run-id " >> " *toppath* "/db/" run-id ".log 2>&1 &")))
(debug:print 0 "INFO: Starting server (" cmdln ") as none running ...")
(push-directory *toppath*)
- (system cmdln)
+ (if target-host
+ (begin
+ (set-environment-variable "TARGETHOST" target-host)
+ (system (conc "nbfake " cmdln)))
+ (system cmdln))
(pop-directory)))
+
+;; kind start up of servers, wait 40 seconds before allowing another server for a given
+;; run-id to be launched
+(define (server:kind-run run-id)
+ (let ((last-run-time (hash-table-ref/default *server-kind-run* run-id #f)))
+ (if (or (not last-run-time)
+ (> (- (current-seconds) last-run-time) 40))
+ (begin
+ (server:run run-id)
+ (hash-table-set! *server-kind-run* run-id (current-seconds))))))
+
+;; The generic run a server command. Dispatches the call to server 0 if run-id != 0
+;;
+(define (server:try-running run-id)
+ (if (eq? run-id 0)
+ (server:run run-id)
+ (rmt:start-server run-id)))
(define (server:check-if-running run-id)
(let loop ((server-info (open-run-close tasks:get-server tasks:open-db run-id))
(trycount 0))
- (thread-sleep! 2)
(if server-info
;; note: client:start will set *runremote*. this needs to be changed
;; also, client:start will login to the server, also need to change that.
;;
;; client:start returns #t if login was successful.
;;
- (let ((res (http-transport:client-connect
+ (let ((res (server:ping-server run-id (vector-ref server 1)(vector-ref server 0))))
run-id
(tasks:hostinfo-get-interface server-info)
(tasks:hostinfo-get-port server-info))))
;; if the server didn't respond we must remove the record
(if res
- res
+ #t
(begin
- (debug:print 0 "WARNING: running server not reachable, removing record: " server-info)
- (open-run-close tasks:server-force-clean-running-records-for-run-id tasks:open-db run-id)
+ (open-run-close tasks:server-force-clean-running-records-for-run-id tasks:open-db run-id
+ " server:check-if-running")
res)))
#f)))
+
+(define (server:ping-server run-id iface port)
+ (with-input-from-pipe
+ (conc (common:get-megatest-exe) " -run-id " run-id " -ping " (conc iface ":" port))
+ (lambda ()
+ (let loop ((inl (read-line))
+ (res "NOREPLY"))
+ (if (eof-object? inl)
+ (case (string->symbol res)
+ ((NOREPLY) #f)
+ ((LOGIN_OK) #t)
+ (else #f))
+ (loop (read-line) inl))))))
Index: tasks.scm
==================================================================
--- tasks.scm
+++ tasks.scm
@@ -91,11 +91,11 @@
(define (tasks:hostinfo-get-transport vec) (vector-ref vec 4))
(define (tasks:hostinfo-get-pid vec) (vector-ref vec 5))
(define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6))
(define (tasks:server-lock-slot mdb run-id)
- (tasks:server-clean-out-old-records-for-run-id mdb run-id)
+ (tasks:server-clean-out-old-records-for-run-id mdb run-id " tasks:server-lock-slot")
(if (< (tasks:num-in-available-state mdb run-id) 4)
(begin
(tasks:server-set-available mdb run-id)
(thread-sleep! 0.2) ;; Try removing this. It may not be needed.
(tasks:server-am-i-the-server? mdb run-id))
@@ -123,58 +123,79 @@
(let ((res 0))
(sqlite3:for-each-row
(lambda (num-in-queue)
(set! res num-in-queue))
mdb
- "SELECT count(id) FROM servers WHERE run_id=?;"
+ "SELECT count(id) FROM servers WHERE run_id=? AND state = 'available';"
run-id)
res))
-(define (tasks:server-clean-out-old-records-for-run-id mdb run-id)
- (sqlite3:execute mdb "DELETE FROM servers WHERE state in ('available','shutting-down') AND (strftime('%s','now') - start_time) > 100 AND run_id=?;" run-id))
-
-(define (tasks:server-force-clean-running-records-for-run-id mdb run-id)
- (sqlite3:execute mdb "DELETE FROM servers WHERE state = 'running' AND run_id=?;" run-id))
-
-(define (tasks:server-force-clean-run-record mdb run-id iface port)
- (sqlite3:execute mdb "DELETE FROM servers WHERE state = 'running' AND run_id=? AND interface=? AND port=?;"
- run-id iface port))
+(define (tasks:server-clean-out-old-records-for-run-id mdb run-id tag)
+ (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state in ('available','shutting-down') AND (strftime('%s','now') - start_time) > 50 AND run_id=?;"
+ (conc "defunct" tag) run-id))
+
+(define (tasks:server-force-clean-running-records-for-run-id mdb run-id tag)
+ (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=?;"
+ (conc "defunct" tag) run-id))
+
+(define (tasks:server-force-clean-run-record mdb run-id iface port tag)
+ (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=? AND interface=? AND port=?;"
+ (conc "defunct" tag) run-id iface port))
+
+(define (tasks:server-delete-records-for-this-pid mdb tag)
+ (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE hostname=? AND pid=?;"
+ (conc "defunct" tag) (get-host-name) (current-process-id)))
+
+(define (tasks:server-delete-record mdb server-id tag)
+ (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE id=?;"
+ (conc "defunct" tag) server-id)
+ ;; use this opportuntity to clean out records over one month old or over 10 minutes old with port = -1 (i.e. a never used placeholder)
+ (sqlite3:execute mdb "DELETE FROM servers WHERE state not in ('running','shutting-down') AND (strftime('%s','now') - start_time) > 2628000;")
+ (sqlite3:execute mdb "DELETE FROM servers WHERE state like 'defunct%' AND port=-1 AND (strftime('%s','now') - start_time) > 600;")
+ )
(define (tasks:server-set-state! mdb server-id state)
- (sqlite3:execute mdb "UPDATE servers SET state=? WHERE id=?;" state server-id))
-
-(define (tasks:server-delete-record! mdb server-id)
- (sqlite3:execute mdb "DELETE FROM servers WHERE id=?;" server-id))
-
-(define (tasks:server-delete-records-for-this-pid mdb)
- (sqlite3:execute mdb "DELETE FROM servers WHERE hostname=? AND pid=?;" (get-host-name) (current-process-id)))
+ (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE id=?;" state server-id))
(define (tasks:server-set-interface-port mdb server-id interface port)
- (sqlite3:execute mdb "UPDATE servers SET interface=?,port=? WHERE id=?;" interface port server-id))
+ (sqlite3:execute mdb "UPDATE servers SET interface=?,port=?,heartbeat=strftime('%s','now') WHERE id=?;" interface port server-id))
+;; Get random port not used in long time
+;;
(define (tasks:server-get-next-port mdb)
- (let ((res #f)
- (port-param (if (and (args:get-arg "-port")
- (string->number (args:get-arg "-port")))
- (string->number (args:get-arg "-port"))
- #f))
- (config-port (if (and (config-lookup *configdat* "server" "port")
- (string->number (config-lookup *configdat* "server" "port")))
- (string->number (config-lookup *configdat* "server" "port"))
- #f)))
+ (let* ((lownum 30000)
+ (highnum 64000)
+ (used-ports '())
+ (get-rand-port (lambda ()
+ (+ lownum (random (- highnum lownum)))))
+ (port-param (if (and (args:get-arg "-port")
+ (string->number (args:get-arg "-port")))
+ (string->number (args:get-arg "-port"))
+ #f))
+ ;; (config-port (if (and (config-lookup *configdat* "server" "port")
+ ;; (string->number (config-lookup *configdat* "server" "port")))
+ ;; (string->number (config-lookup *configdat* "server" "port"))
+ ;; #f))
+ )
(sqlite3:for-each-row
(lambda (port)
- (set! res (+ port 1))) ;; set to next
+ (set! used-ports (cons port used-ports)))
mdb
- "SELECT max(port) FROM servers;")
+ "SELECT port FROM servers;")
(cond
((and port-param res) (if (> res port-param) res port-param))
(port-param port-param)
- ((and config-port res) (if (> res config-port) res config-port))
- (config-port config-port)
- ((and res (> res 8080)) res)
- (else (+ 5000 (random 1001))))))
+ ;; ((and config-port res) (if (> res config-port) res config-port))
+ ;; (config-port config-port)
+ (else
+ (let loop ((port (get-rand-port))
+ (remtries 100))
+ (if (member port used-ports)
+ (if (> remtries 0)
+ (loop (get-rand-port)(- remtries 1))
+ (get-rand-port))
+ port))))))
(define (tasks:server-am-i-the-server? mdb run-id)
(let* ((all (tasks:server-get-servers-vying-for-run-id mdb run-id))
(first (if (null? all)
(begin (debug:print 0 "ERROR: no servers listed, should be at least one by now.")
@@ -205,11 +226,11 @@
(res '()))
(sqlite3:for-each-row
(lambda (a . b)
(set! res (cons (apply vector a b) res)))
mdb
- (conc "SELECT " selstr " FROM servers WHERE run_id=? ORDER BY start_time DESC;")
+ (conc "SELECT " selstr " FROM servers WHERE run_id=? AND state in ('available','running') ORDER BY start_time DESC;")
run-id)
(vector header res)))
(define (tasks:get-server mdb run-id)
(let ((res #f)
@@ -226,14 +247,15 @@
res))
(define (tasks:get-all-servers mdb)
(let ((res '()))
(sqlite3:for-each-row
- (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport)
- (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport) res)))
+ (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id)
+ ;; 0 1 2 3 4 5 6 7 8 9 10 11 12
+ (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res)))
mdb
- "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport FROM servers ORDER BY start_time DESC;")
+ "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id FROM servers WHERE state NOT LIKE 'defunct%' ORDER BY start_time DESC;")
res))
(define (tasks:kill-server status hostname port pid)
(debug:print-info 1 "Removing defunct server record for " hostname ":" port)
(if port
@@ -253,11 +275,11 @@
) ;; local machine, send sig term
(begin
;;(debug:print-info 1 "Stopping remote servers not yet supported."))))
(debug:print-info 1 "Telling alive server on " hostname ":" port " to commit servercide")
(let ((serverdat (list hostname port)))
- (http-transport:client-connect hostname port)
+ (hash-table-set! *runremote* run-id (http-transport:client-connect hostname port))
(cdb:kill-server serverdat pid))))) ;; remote machine, try telling server to commit suicide
(begin
(if status
(if (equal? hostname (get-host-name))
(begin
Index: tdb.scm
==================================================================
--- tdb.scm
+++ tdb.scm
@@ -11,25 +11,19 @@
;;======================================================================
;; Database access
;;======================================================================
-(require-extension (srfi 18) extras tcp) ;; rpc)
-;; (import (prefix rpc rpc:))
-
+(require-extension (srfi 18) extras tcp)
(use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64)
(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:))
-;; Note, try to remove this dependency
-;; (use zmq)
-
(declare (unit tdb))
(declare (uses common))
(declare (uses keys))
(declare (uses ods))
-(declare (uses fs-transport))
(declare (uses client))
(declare (uses mt))
(include "common_records.scm")
(include "db_records.scm")
Index: tests/Makefile
==================================================================
--- tests/Makefile
+++ tests/Makefile
@@ -64,15 +64,15 @@
# NOTE: Only one instance can be a server
test5 : cleanprep
@echo "WARNING: No longer running fullprep, test converage may be lessened"
cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log &
- cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ab -debug $(DEBUG) $(LOGGING) > ab.log 2> ab.log &
- cd fullrun;sleep 5;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ac -debug $(DEBUG) $(LOGGING) > ac.log 2> ac.log &
- cd fullrun;sleep 8;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ad -debug $(DEBUG) $(LOGGING) > ad.log 2> ad.log &
-# cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log &
-# cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log & a
+ cd fullrun;sleep 3;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ab -debug $(DEBUG) $(LOGGING) > ab.log 2> ab.log &
+ cd fullrun;sleep 6;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ac -debug $(DEBUG) $(LOGGING) > ac.log 2> ac.log &
+ cd fullrun;sleep 9;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ad -debug $(DEBUG) $(LOGGING) > ad.log 2> ad.log &
+ cd fullrun;sleep 12;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log &
+ cd fullrun;sleep 15;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log &
# MUST ADD THIS BACK IN ASAP!!!!
# cd fullrun;sleep 10;$(MEGATEST) -run-wait -target $(TARGET) :runname % -testpatt % :state RUNNING,LAUNCHED,NOT_STARTED,REMOTEHOSTSTART;echo ALL DONE
test6: fullprep
Index: tests/fdktestqa/fdk.config
==================================================================
--- tests/fdktestqa/fdk.config
+++ tests/fdktestqa/fdk.config
@@ -2,11 +2,11 @@
SYSTEM TEXT
RELEASE TEXT
[setup]
# Adjust max_concurrent_jobs to limit how much you load your machines
-max_concurrent_jobs 150
+max_concurrent_jobs 500
# This is your link path, you can move it but it is generally better to keep it stable
linktree #{shell readlink -f #{getenv PWD}/../simplelinks}
[include testqa/configs/megatest.abc.config]
Index: tests/fdktestqa/testqa/Makefile
==================================================================
--- tests/fdktestqa/testqa/Makefile
+++ tests/fdktestqa/testqa/Makefile
@@ -5,25 +5,23 @@
all :
$(MEGATEST) -remove-runs -target a/b :runname c -testpatt %/%
$(MEGATEST) -runtests % -target a/b :runname c
bigbig :
- $(MEGATEST) -server - -daemonize ; sleep 3
for tn in a b c d;do \
($(MEGATEST) -runtests % -target a/b :runname $tn & ) ; \
done
bigrun :
- $(MEGATEST) -runtests bigrun -target a/bigrun :runname a
+ $(MEGATEST) -runtests bigrun -target a/bigrun :runname a$(shell date +%V)
bigrun2 :
- $(MEGATEST) -runtests bigrun2 -target a/bigrun2 :runname a -transport http
+ $(MEGATEST) -runtests bigrun2 -target a/bigrun2 :runname a$(shell date +%V)
dashboard :
$(DASHBOARD) -rows 20 &
compile :
- $(MEGATEST) -stop-server 0
(cd ../../..;make && make install)
clean :
- rm -rf ../simple*/*/* megatest.db
+ rm -rf ../simple*/*/* megatest.db db/*
Index: tests/fdktestqa/testqa/megatest.config
==================================================================
--- tests/fdktestqa/testqa/megatest.config
+++ tests/fdktestqa/testqa/megatest.config
@@ -1,10 +1,11 @@
[setup]
testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. >> TEST_TARG_PATH/mt_launch.log 2>> TEST_TARG_PATH/mt_launch.log
-runqueue 20
-transport http
launchwait no
+[jobtools]
+launcher loadrunner
+
[include ../fdk.config]
[server]
port 9080
Index: tests/fullrun/config/mt_include_1.config
==================================================================
--- tests/fullrun/config/mt_include_1.config
+++ tests/fullrun/config/mt_include_1.config
@@ -13,11 +13,11 @@
# launcher nbfake
launcher loadrunner
# launcher echo
# launcher nbfind
# launcher nodanggood
-launcher nbload
+# launcher nbload
## use "xterm -e csi -- " as a launcher to examine the launch environment.
## exit with (exit)
## get a shell with (system "bash")
# launcher xterm -e csi --
Index: tests/fullrun/megatest.config
==================================================================
--- tests/fullrun/megatest.config
+++ tests/fullrun/megatest.config
@@ -22,11 +22,11 @@
# this may save a few milliseconds on launching tests
# launchwait no
waivercommentpatt ^WW\d+ [a-z].*
# Use http instead of direct filesystem access
-transport http
+# transport http
# transport fs
# If set to "default" the old code is used. Otherwise defaults to 200 or uses
# numeric value given.
#
@@ -115,11 +115,12 @@
# it succeeds
port 8080
# This server will keep running this number of hours after last access.
# Three minutes is 0.05 hours
-timeout 0.025
+# timeout 0.025
+timeout 0.25
## disks are:
## name host:/path/to/area
## -or-
## name /path/to/area
Index: tests/watch-monitor.sh
==================================================================
--- tests/watch-monitor.sh
+++ tests/watch-monitor.sh
@@ -1,8 +1,10 @@
#!/bin/bash
+if [ -e fullrun/db/monitor.db ];then
sqlite3 fullrun/db/monitor.db << EOF
.header on
.mode column
-select * from servers;
+select * from servers order by start_time desc;
.q
EOF
+fi
Index: utils/installall.sh
==================================================================
--- utils/installall.sh
+++ utils/installall.sh
@@ -1,10 +1,10 @@
#! /usr/bin/env bash
# set -x
-# Copyright 2007-2010, Matthew Welland.
+# Copyright 2007-2014, Matthew Welland.
#
# This program is made available under the GNU GPL version 2.0 or
# greater. See the accompanying file COPYING for details.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the
@@ -14,10 +14,11 @@
echo You may need to do the following first:
echo sudo apt-get install libreadline-dev
echo sudo apt-get install libwebkitgtk-dev
echo sudo apt-get install libmotif3 -OR- set KTYPE=26g4
echo KTYPE can be 26, 26g4, or 32
+echo
echo KTYPE=$KTYPE
echo You are using PREFIX=$PREFIX
echo You are using proxy="$proxy"
echo
echo "Set additional_libpath to help find gtk or other libraries, don't forget a leading :"
@@ -58,14 +59,20 @@
export KTYPE=26
else
echo Using KTYPE=$KTYPE
fi
+# Put all the downloaded tar files in tgz
+mkdir -p tgz
+
+# http://code.call-cc.org/releases/4.8.0/chicken-4.8.0.5.tar.gz
export CHICKEN_VERSION=4.8.0.5
export CHICKEN_BASEVER=4.8.0
-if ! [[ -e chicken-${CHICKEN_VERSION}.tar.gz ]]; then
- wget http://code.call-cc.org/releases/${CHICKEN_BASEVER}/chicken-${CHICKEN_VERSION}.tar.gz
+chicken_targz=chicken-${CHICKEN_VERSION}.tar.gz
+if ! [[ -e tgz/$chicken_targz ]]; then
+ wget http://code.call-cc.org/releases/${CHICKEN_BASEVER}/${chicken_targz}
+ mv $chicken_targz tgz
fi
BUILDHOME=$PWD
DEPLOYTARG=$BUILDHOME/deploy
@@ -82,11 +89,11 @@
echo PATH=$PATH
echo LD_LIBRARY_PATH=$LD_LIBRARY_PATH
if ! [[ -e $PREFIX/bin/csi ]]; then
- tar xfvz chicken-${CHICKEN_VERSION}.tar.gz
+ tar xfvz tgz/$chicken_targz
cd chicken-${CHICKEN_VERSION}
# make PLATFORM=linux PREFIX=$PREFIX spotless
make PLATFORM=linux PREFIX=$PREFIX
make PLATFORM=linux PREFIX=$PREFIX install
cd $BUILDHOME
@@ -113,56 +120,65 @@
export LIBPATH=$PREFIX/lib$ADDITIONAL_LIBPATH
export LD_LIBRARY_PATH=$LIBPATH
export SQLITE3_VERSION=3071401
echo Install sqlite3
-if ! [[ -e sqlite-autoconf-$SQLITE3_VERSION.tar.gz ]]; then
- wget http://www.sqlite.org/sqlite-autoconf-$SQLITE3_VERSION.tar.gz
+sqlite3_tgz=sqlite-autoconf-$SQLITE3_VERSION.tar.gz
+if ! [[ -e tgz/$sqlite3_tgz ]]; then
+ wget http://www.sqlite.org/$sqlite3_tgz
+ mv $sqlite3_tgz tgz
fi
if ! [[ -e $PREFIX/bin/sqlite3 ]] ; then
- if [[ -e sqlite-autoconf-$SQLITE3_VERSION.tar.gz ]]; then
- tar xfz sqlite-autoconf-$SQLITE3_VERSION.tar.gz
+ if [[ -e tgz/sqlite-autoconf-$SQLITE3_VERSION.tar.gz ]]; then
+ tar xfz tgz/sqlite-autoconf-$SQLITE3_VERSION.tar.gz
(cd sqlite-autoconf-$SQLITE3_VERSION;./configure --prefix=$PREFIX;make;make install)
# CSC_OPTIONS="-I$PREFIX/include -L$PREFIX/lib" $CHICKEN_INSTALL -prefix $DEPLOYTARG -deploy $PROX sqlite3
CSC_OPTIONS="-I$PREFIX/include -L$PREFIX/lib" $CHICKEN_INSTALL $PROX sqlite3
fi
fi
# $CHICKEN_INSTALL $PROX sqlite3
+
+# IUP versions
+CDVER=5.7
+IUPVER=3.8
+IMVER=3.8
if [[ `uname -a | grep x86_64` == "" ]]; then
export ARCHSIZE=''
else
export ARCHSIZE=64_
fi
# export files="cd-5.4.1_Linux${KTYPE}_lib.tar.gz im-3.6.3_Linux${KTYPE}_lib.tar.gz iup-3.5_Linux${KTYPE}_lib.tar.gz"
if [[ x$USEOLDIUP == "x" ]];then
- export files="cd-5.5.1_Linux${KTYPE}_${ARCHSIZE}lib.tar.gz im-3.8_Linux${KTYPE}_${ARCHSIZE}lib.tar.gz iup-3.6_Linux${KTYPE}_${ARCHSIZE}lib.tar.gz"
+ export files="cd-${CDVER}_Linux${KTYPE}_${ARCHSIZE}lib.tar.gz im-${IMVER}_Linux${KTYPE}_${ARCHSIZE}lib.tar.gz iup-${IUPVER}_Linux${KTYPE}_${ARCHSIZE}lib.tar.gz"
else
echo WARNING: Using old IUP libraries
export files="cd-5.4.1_Linux${KTYPE}_${ARCHSIZE}lib.tar.gz im-3.6.3_Linux${KTYPE}_${ARCHSIZE}lib.tar.gz iup-3.5_Linux${KTYPE}_${ARCHSIZE}lib.tar.gz"
fi
mkdir -p $PREFIX/iuplib
for a in `echo $files` ; do
- if ! [[ -e $a ]] ; then
+ if ! [[ -e tgz/$a ]] ; then
wget http://www.kiatoa.com/matt/iup/$a
fi
- echo Untarring $a into $BUILDHOME/lib
- (cd $PREFIX/lib;tar xfvz $BUILDHOME/$a;mv include/* ../include)
+ mv $a tgz/$a
+ echo Untarring tgz/$a into $BUILDHOME/lib
+ (cd $PREFIX/lib;tar xfvz $BUILDHOME/tgz/$a;mv include/* ../include)
# (cd $DEPLOYTARG;tar xfvz $BUILDHOME/$a)
done
# ffcall obtained from:
# cvs -z3 -d:pserver:anonymous@cvs.savannah.gnu.org:/sources/libffcall co ffcall
-if ! [[ -e ffcall.tar.gz ]] ; then
+if ! [[ -e tgz/ffcall.tar.gz ]] ; then
wget http://www.kiatoa.com/matt/iup/ffcall.tar.gz
+ mv ffcall.tar.gz tgz
fi
-tar xfvz ffcall.tar.gz
+tar xfvz tgz/ffcall.tar.gz
cd ffcall
./configure --prefix=$PREFIX --enable-shared
make
make install
@@ -174,137 +190,12 @@
# CSC_OPTIONS="-I$PREFIX/include -L$CSCLIBS" $CHICKEN_INSTALL $PROX -D no-library-checks -feature disable-iup-web -deploy -prefix $DEPLOYTARG iup
# iup:1.0.2
CSC_OPTIONS="-I$PREFIX/include -L$CSCLIBS" $CHICKEN_INSTALL $PROX -D no-library-checks canvas-draw
# CSC_OPTIONS="-I$PREFIX/include -L$CSCLIBS" $CHICKEN_INSTALL $PROX -D no-library-checks -deploy -prefix $DEPLOYTARG canvas-draw
-# disabled zmq # #======================================================================
-# disabled zmq # # Note uuid needed only for zmq 2.x series
-# disabled zmq # #======================================================================
-# disabled zmq #
-# disabled zmq # # http://download.zeromq.org/zeromq-3.2.1-rc2.tar.gz
-# disabled zmq # # zpatchlev=-rc2
-# disabled zmq # # http://download.zeromq.org/zeromq-2.2.0.tar.gz
-# disabled zmq #
-# disabled zmq # if [[ -e /usr/lib/libzmq.so ]]; then
-# disabled zmq # echo "Using system installed zmq library"
-# disabled zmq # $CHICKEN_INSTALL zmq
-# disabled zmq # else
-# disabled zmq # ZEROMQ=zeromq-2.2.0
-# disabled zmq # # ZEROMQ=zeromq-3.2.2
-# disabled zmq #
-# disabled zmq # # wget http://www.kernel.org/pub/linux/utils/util-linux/v2.22/util-linux-2.22.tar.gz
-# disabled zmq # UTIL_LINUX=2.21
-# disabled zmq # # UTIL_LINUX=2.20.1
-# disabled zmq # if ! [[ -e util-linux-${UTIL_LINUX}.tar.gz ]] ; then
-# disabled zmq # # wget http://www.kiatoa.com/matt/util-linux-2.20.1.tar.gz
-# disabled zmq # wget http://www.kernel.org/pub/linux/utils/util-linux/v${UTIL_LINUX}/util-linux-${UTIL_LINUX}.tar.gz
-# disabled zmq # fi
-# disabled zmq #
-# disabled zmq # if [[ -e util-linux-${UTIL_LINUX}.tar.gz ]] ; then
-# disabled zmq # tar xfz util-linux-${UTIL_LINUX}.tar.gz
-# disabled zmq # cd util-linux-${UTIL_LINUX}
-# disabled zmq # mkdir -p build
-# disabled zmq # cd build
-# disabled zmq # if [[ $UTIL_LINUX = "2.22" ]] ; then
-# disabled zmq # ../configure --prefix=$PREFIX \
-# disabled zmq # --enable-shared \
-# disabled zmq # --disable-use-tty-group \
-# disabled zmq # --disable-makeinstall-chown \
-# disabled zmq # --disable-makeinstall-setuid \
-# disabled zmq # --disable-libtool-lock \
-# disabled zmq # --disable-login \
-# disabled zmq # --disable-sulogin \
-# disabled zmq # --disable-su \
-# disabled zmq # --disable-schedutils \
-# disabled zmq # --disable-libmount \
-# disabled zmq # --disable-mount \
-# disabled zmq # --disable-losetup \
-# disabled zmq # --disable-fsck \
-# disabled zmq # --disable-partx \
-# disabled zmq # --disable-mountpoint \
-# disabled zmq # --disable-fallocate \
-# disabled zmq # --disable-unshare \
-# disabled zmq # --disable-eject \
-# disabled zmq # --disable-agetty \
-# disabled zmq # --disable-cramfs \
-# disabled zmq # --disable-switch_root \
-# disabled zmq # --disable-pivot_root \
-# disabled zmq # --disable-kill \
-# disabled zmq # --disable-libblkid \
-# disabled zmq # --disable-utmpdump \
-# disabled zmq # --disable-rename \
-# disabled zmq # --disable-chsh-only-listed \
-# disabled zmq # --disable-wall \
-# disabled zmq # --disable-pg-bell \
-# disabled zmq # --disable-require-password \
-# disabled zmq # --disable-libtool-lock \
-# disabled zmq # --disable-nls \
-# disabled zmq # --disable-dmesg \
-# disabled zmq # --without-ncurses
-# disabled zmq # else
-# disabled zmq # ../configure --prefix=$PREFIX \
-# disabled zmq # --enable-shared \
-# disabled zmq # --disable-mount \
-# disabled zmq # --disable-fsck \
-# disabled zmq # --disable-partx \
-# disabled zmq # --disable-largefile \
-# disabled zmq # --disable-tls \
-# disabled zmq # --disable-libmount \
-# disabled zmq # --disable-mountpoint \
-# disabled zmq # --disable-nls \
-# disabled zmq # --disable-rpath \
-# disabled zmq # --disable-agetty \
-# disabled zmq # --disable-cramfs \
-# disabled zmq # --disable-switch_root \
-# disabled zmq # --disable-pivot_root \
-# disabled zmq # --disable-fallocate \
-# disabled zmq # --disable-unshare \
-# disabled zmq # --disable-rename \
-# disabled zmq # --disable-schedutils \
-# disabled zmq # --disable-libblkid \
-# disabled zmq # --disable-wall CFLAGS='-fPIC'
-# disabled zmq #
-# disabled zmq # # --disable-makeinstall-chown \
-# disabled zmq # # --disable-makeinstall-setuid \
-# disabled zmq #
-# disabled zmq # # --disable-chsh-only-listed
-# disabled zmq # # --disable-pg-bell let pg not ring the bell on invalid keys
-# disabled zmq # # --disable-require-password
-# disabled zmq # # --disable-use-tty-group do not install wall and write setgid tty
-# disabled zmq # # --disable-makeinstall-chown
-# disabled zmq # # --disable-makeinstall-setuid
-# disabled zmq # fi
-# disabled zmq #
-# disabled zmq # (cd libuuid;make install)
-# disabled zmq # # make
-# disabled zmq # # make install
-# disabled zmq # cp $PREFIX/include/uuid/uuid.h $PREFIX/include/uuid.h
-# disabled zmq # fi
-# disabled zmq #
-# disabled zmq #
-# disabled zmq # cd $BUILDHOME
-# disabled zmq #
-# disabled zmq # if ! [[ -e ${ZEROMQ}${zpatchlev}.tar.gz ]] ; then
-# disabled zmq # wget http://download.zeromq.org/${ZEROMQ}${zpatchlev}.tar.gz
-# disabled zmq # fi
-# disabled zmq #
-# disabled zmq # if [[ -e ${ZEROMQ}${zpatchlev}.tar.gz ]] ; then
-# disabled zmq # tar xfz ${ZEROMQ}.tar.gz
-# disabled zmq # cd ${ZEROMQ}
-# disabled zmq # ln -s $PREFIX/include/uuid src
-# disabled zmq # # LDFLAGS=-L$PREFIX/lib ./configure --prefix=$PREFIX
-# disabled zmq #
-# disabled zmq # ./configure --enable-static --prefix=$PREFIX --with-uuid=$PREFIX LDFLAGS="-L$PREFIX/lib" CPPFLAGS="-fPIC -I$PREFIX/include" LIBS="-lgcc"
-# disabled zmq # # --disable-shared CPPFLAGS="-fPIC
-# disabled zmq # # LDFLAGS="-L/usr/lib64 -L$PREFIX/lib" ./configure --enable-static --prefix=$PREFIX
-# disabled zmq # make
-# disabled zmq # make install
-# disabled zmq # CSC_OPTIONS="-I$PREFIX/include -L$CSCLIBS" $CHICKEN_INSTALL $PROX zmq
-# disabled zmq # # CSC_OPTIONS="-I$PREFIX/include -L$CSCLIBS" $CHICKEN_INSTALL $PROX -deploy -prefix $DEPLOYTARG zmq
-# disabled zmq # fi
-# disabled zmq # fi # if zmq is in /usr/lib
-# disabled zmq #
+# NB// Removed bunch of zmq compiling tricks. Look at older versions of this file if you need to recreate...
+
cd $BUILDHOME
git clone https://bitbucket.org/DerGuteMoritz/zmq/commits/branch/3.2 zmq-3.2
cd zmq-3.2
chicken-install
Index: utils/loadrunner
==================================================================
--- utils/loadrunner
+++ utils/loadrunner
@@ -23,7 +23,7 @@
echo "Load acceptable: lperc=$lperc %, max_load=$max_load %, load=$load, numcpu=$numcpu, MAX_ALLOWED_LOAD=$MAX_ALLOWED_LOAD %"
echo "Starting command: \"$@\""
nbfake "$@"
else
# echo "Load too high: lperc=$lperc, max_load=$max_load, waiting two minutes before trying to run command: \"$@\""
- echo "nbload $@" | at now + 2 minutes 2> /dev/null
+ echo "loadrunner $@" | at now + 2 minutes 2> /dev/null
fi
DELETED zmq-transport.scm
Index: zmq-transport.scm
==================================================================
--- zmq-transport.scm
+++ /dev/null
@@ -1,494 +0,0 @@
-;;======================================================================
-;; Copyright 2006-2012, Matthew Welland.
-;;
-;; This program is made available under the GNU GPL version 2.0 or
-;; greater. See the accompanying file COPYING for details.
-;;
-;; This program is distributed WITHOUT ANY WARRANTY; without even the
-;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-;; PURPOSE.
-;;======================================================================
-
-(require-extension (srfi 18) extras tcp s11n)
-
-(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
-(import (prefix sqlite3 sqlite3:))
-
-(use zmq)
-
-(declare (unit zmq-transport))
-
-(declare (uses common))
-(declare (uses db))
-(declare (uses tests))
-(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
-(declare (uses server))
-
-(include "common_records.scm")
-(include "db_records.scm")
-
-;; Transition to pub --> sub with pull <-- push
-;;
-;; 1. client sends request to server via push to the pull port
-;; 2. server puts request in queue or processes immediately as appropriate
-;; 3. server puts responses from completed requests into pub port
-;;
-;; TODO
-;;
-;; Done Tested
-;; [x] [ ] 1. Add columns pullport pubport to servers table
-;; [x] [ ] 2. Add rm of monitor.db if older than 11/12/2012
-;; [x] [ ] 3. Add create of pullport and pubport with finding of available ports
-;; [x] [ ] 4. Add client compose of request
-;; [x] [ ] - name of client: testname/itempath-test_id-hostname
-;; [x] [ ] - name of request: callname, params
-;; [x] [ ] - request key: f(clientname, callname, params)
-;; [x] [ ] 5. Add processing of subscription hits
-;; [x] [ ] - done when get key
-;; [x] [ ] - return results
-;; [x] [ ] 6. Add timeout processing
-;; [x] [ ] - after 60 seconds
-;; [ ] [ ] i. check server alive, connect to new if necessary
-;; [ ] [ ] ii. resend request
-;; [ ] [ ] 7. Turn self ping back on
-
-(define (zmq-transport:make-server-url hostport)
- (if (not hostport)
- #f
- (conc "tcp://" (car hostport) ":" (cadr hostport))))
-
-(define *server-loop-heart-beat* (current-seconds))
-(define *heartbeat-mutex* (make-mutex))
-
-;;======================================================================
-;; S E R V E R
-;;======================================================================
-
-(define-inline (zmqsock:get-pub dat)(vector-ref dat 0))
-(define-inline (zmqsock:get-pull dat)(vector-ref dat 1))
-(define-inline (zmqsock:set-pub! dat s)(vector-set! dat s 0))
-(define-inline (zmqsock:set-pull! dat s)(vector-set! dat s 0))
-
-(define (zmq-transport:run hostn)
- (debug:print 2 "Attempting to start the server ...")
- (if (not *toppath*)
- (if (not (setup-for-run))
- (begin
- (debug:print 0 "ERROR: cannot find megatest.config, cannot start server, exiting")
- (exit))))
- (let* ((db (open-db)) ;; here we *do not* want to be opening and closing the db
- (zmq-sdat1 #f)
- (zmq-sdat2 #f)
- (pull-socket #f)
- (pub-socket #f)
- (p1 #f)
- (p2 #f)
- (zmq-sockets-dat #f)
- (iface (if (string=? "-" hostn)
- "*" ;; (get-host-name)
- hostn))
- (hostname (get-host-name))
- (ipaddrstr (let ((ipstr (if (string=? "-" hostn)
- (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
- #f)))
- (if ipstr ipstr hostname)))
- (last-run 0))
- (set! zmq-sockets-dat (zmq-transport:setup-ports ipaddrstr (if (args:get-arg "-port")
- (string->number (args:get-arg "-port"))
- (+ 5000 (random 1001)))))
-
- (set! zmq-sdat1 (car zmq-sockets-dat))
- (set! pull-socket (cadr zmq-sdat1)) ;; (iface s port)
- (set! p1 (caddr zmq-sdat1))
-
- (set! zmq-sdat2 (cadr zmq-sockets-dat))
- (set! pub-socket (cadr zmq-sdat2))
- (set! p2 (caddr zmq-sdat2))
-
- (set! *cache-on* #t)
-
- (set! *runremote* (vector pull-socket pub-socket)) ;; overloading the use of *runremote* BUG!?
-
- ;; what to do when we quit
- ;;
-;; (on-exit (lambda ()
-;; (if (and *toppath* *server-info*)
-;; (open-run-close tasks:server-deregister-self tasks:open-db (car *server-info*))
-;; (let loop ()
-;; (let ((queue-len 0))
-;; (thread-sleep! (random 5))
-;; (mutex-lock! *incoming-mutex*)
-;; (set! queue-len (length *incoming-data*))
-;; (mutex-unlock! *incoming-mutex*)
-;; (if (> queue-len 0)
-;; (begin
-;; (debug:print-info 0 "Queue not flushed, waiting ...")
-;; (loop))))))))
-
- ;; The heavy lifting
- ;;
- ;; make-vector-record cdb packet client-sig qtype immediate query-sig params qtime
- ;;
- (debug:print-info 11 "Server setup complete, start listening for messages")
- (let loop ((queue-lst '()))
- (let* ((rawmsg (receive-message* pull-socket))
- (packet (db:string->obj rawmsg))
- (qtype (cdb:packet-get-qtype packet)))
- (debug:print-info 12 "server=> received packet=" packet)
- (if (not (member qtype '(sync ping)))
- (begin
- (mutex-lock! *heartbeat-mutex*)
- (set! *last-db-access* (current-seconds))
- (mutex-unlock! *heartbeat-mutex*)))
- (if #t ;; (cdb:packet-get-immediate packet) ;; process immediately or put in queue
- (begin
- (db:process-queue-item db packet)
- ;; (open-run-close db:process-queue #f pub-socket (cons packet queue-lst))
-
- (loop '()))
- (loop (cons packet queue-lst)))))))
-
-;; run zmq-transport:keep-running in a parallel thread to monitor that the db is being
-;; used and to shutdown after sometime if it is not.
-;;
-(define (zmq-transport:keep-running)
- ;; if none running or if > 20 seconds since
- ;; server last used then start shutdown
- ;; This thread waits for the server to come alive
- (let* ((server-info (let loop ()
- (let ((sdat #f))
- (mutex-lock! *heartbeat-mutex*)
- (set! sdat *server-info*)
- (mutex-unlock! *heartbeat-mutex*)
- (if sdat sdat
- (begin
- (debug:print 12 "WARNING: server not started yet, waiting few seconds before trying again")
- (sleep 4)
- (loop))))))
- (iface (cadr server-info))
- (pullport (caddr server-info))
- (pubport (cadddr server-info)) ;; id interface pullport pubport)
- ;; (zmq-sockets (zmq-transport:client-connect iface pullport pubport))
- (last-access 0))
- (debug:print-info 11 "heartbeat started for zmq server on " iface " " pullport " " pubport)
- (let loop ((count 0))
- (thread-sleep! 4) ;; no need to do this very often
- ;; NB// sync currently does NOT return queue-length
- ;; GET REAL QUEUE LENGTH FROM THE VARIABLE
- (let ((queue-len 0)) ;; FOR NOW DO NOT DO THIS (cdb:client-call zmq-sockets 'sync #t 1)))
- ;; (print "Server running, count is " count)
- (if (< count 1) ;; 3x3 = 9 secs aprox
- (loop (+ count 1)))
-
- ;; NOTE: Get rid of this mechanism! It really is not needed...
- (open-run-close tasks:server-update-heartbeat tasks:open-db (car server-info))
-
- ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access
- (mutex-lock! *heartbeat-mutex*)
- (set! last-access *last-db-access*)
- (mutex-unlock! *heartbeat-mutex*)
- (if (> (+ last-access
- ;; (* 50 60 60) ;; 48 hrs
- ;; 60 ;; one minute
- ;; (* 60 60) ;; one hour
- (* 45 60) ;; 45 minutes, until the db deletion bug is fixed.
- )
- (current-seconds))
- (begin
- (debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
- (loop 0))
- (begin
- (debug:print-info 0 "Starting to shutdown the server.")
- ;; need to delete only *my* server entry (future use)
- (set! *time-to-exit* #t)
- (open-run-close tasks:server-deregister-self tasks:open-db (get-host-name))
- (thread-sleep! 1)
- (debug:print-info 0 "Max cached queries was " *max-cache-size*)
- (debug:print-info 0 "Server shutdown complete. Exiting")
- (exit)))))))
-
-(define (zmq-transport:find-free-port-and-open iface s port stype #!key (trynum 50))
- (let ((s (if s s (make-socket stype)))
- (p (if (number? port) port 5555))
- (old-handler (current-exception-handler)))
- (handle-exceptions
- exn
- (begin
- (debug:print 0 "Failed to bind to port " p ", trying next port")
- (debug:print 0 " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn))
- ;; (old-handler)
- ;; (print-call-chain)
- (if (> trynum 0)
- (zmq-transport:find-free-port-and-open iface s (+ p 1) trynum: (- trynum 1))
- (debug:print-info 0 "Tried ports up to " p
- " but all were in use. Please try a different port range by starting the server with parameter \" -port N\" where N is the starting port number to use"))
- (exit)) ;; To exit or not? That is the question.
- (let ((zmq-url (conc "tcp://" iface ":" p)))
- (debug:print 2 "Trying to start server on " zmq-url)
- (bind-socket s zmq-url)
- (list iface s port)))))
-
-(define (zmq-transport:setup-ports ipaddrstr startport)
- (let* ((s1 (zmq-transport:find-free-port-and-open ipaddrstr #f startport 'pull))
- (p1 (caddr s1))
- (s2 (zmq-transport:find-free-port-and-open ipaddrstr #f (+ 1 (if p1 p1 (+ startport 1))) 'pub))
- (p2 (caddr s2)))
- (set! *runremote* #f)
- (debug:print 0 "Server started on " ipaddrstr " ports " p1 " and " p2)
- (mutex-lock! *heartbeat-mutex*)
- (set! *server-info* (open-run-close tasks:server-register
- tasks:open-db
- (current-process-id)
- ipaddrstr p1
- 0
- 'live
- 'zmq
- pubport: p2))
- (debug:print-info 11 "*server-info* set to " *server-info*)
- (mutex-unlock! *heartbeat-mutex*)
- (list s1 s2)))
-
-(define (zmq-transport:mk-signature)
- (message-digest-string (md5-primitive)
- (with-output-to-string
- (lambda ()
- (write (list (current-directory)
- (argv)))))))
-
-;;======================================================================
-;; S E R V E R U T I L I T I E S
-;;======================================================================
-
-;;======================================================================
-;; C L I E N T S
-;;======================================================================
-
-;;
-(define (zmq-transport:client-socket-connect iface port #!key (context #f)(type 'req)(subscriptions '()))
- (debug:print-info 3 "client-connect " iface ":" port ", type=" type ", subscriptions=" subscriptions)
- (let ((connect-ok #f)
- (zmq-socket (if context
- (make-socket type context)
- (make-socket type)))
- (conurl (zmq-transport:make-server-url (list iface port))))
- (if (socket? zmq-socket)
- (begin
- ;; first apply subscriptions
- (for-each (lambda (subscription)
- (debug:print 2 "Subscribing to " subscription)
- (socket-option-set! zmq-socket 'subscribe subscription))
- subscriptions)
- (connect-socket zmq-socket conurl)
- zmq-socket)
- (begin
- (debug:print 0 "ERROR: Failed to open socket to " conurl)
- #f))))
-
-(define (zmq-transport:client-connect iface pullport pubport)
- (let* ((push-socket (zmq-transport:client-socket-connect iface pullport type: 'push))
- (sub-socket (zmq-transport:client-socket-connect iface pubport
- type: 'sub
- subscriptions: (list (client:get-signature) "all")))
- (zmq-sockets (vector push-socket sub-socket))
- (login-res #f))
- (debug:print-info 11 "zmq-transport:client-connect started. Next is login")
- (set! login-res (client:login serverdat zmq-sockets))
- (if (and (not (null? login-res))
- (car login-res))
- (begin
- (debug:print-info 2 "Logged in and connected to " iface ":" pullport "/" pubport ".")
- (set! *runremote* zmq-sockets)
- zmq-sockets)
- (begin
- (debug:print-info 2 "Failed to login or connect to " conurl)
- (set! *runremote* #f)
- #f))))
-
-;; run zmq-transport:keep-running in a parallel thread to monitor that the db is being
-;; used and to shutdown after sometime if it is not.
-;;
-(define (zmq-transport:keep-running)
- ;; if none running or if > 20 seconds since
- ;; server last used then start shutdown
- ;; This thread waits for the server to come alive
- (let* ((server-info (let loop ()
- (let ((sdat #f))
- (mutex-lock! *heartbeat-mutex*)
- (set! sdat *runremote*)
- (mutex-unlock! *heartbeat-mutex*)
- (if sdat sdat
- (begin
- (sleep 4)
- (loop))))))
- (iface (car server-info))
- (port (cadr server-info))
- (last-access 0)
- (tdb (tasks:open-db))
- (spid (tasks:server-get-server-id tdb #f iface port #f)))
- (print "Keep-running got server pid " spid ", using iface " iface " and port " port)
- (let loop ((count 0))
- (thread-sleep! 4) ;; no need to do this very often
- ;; NB// sync currently does NOT return queue-length
- (let () ;; (queue-len (cdb:client-call server-info 'sync #t 1)))
- ;; (print "Server running, count is " count)
- (if (< count 1) ;; 3x3 = 9 secs aprox
- (loop (+ count 1)))
-
- ;; NOTE: Get rid of this mechanism! It really is not needed...
- (tasks:server-update-heartbeat tdb spid)
-
- ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access
- (mutex-lock! *heartbeat-mutex*)
- (set! last-access *last-db-access*)
- (mutex-unlock! *heartbeat-mutex*)
- (if (> (+ last-access
- ;; (* 50 60 60) ;; 48 hrs
- ;; 60 ;; one minute
- ;; (* 60 60) ;; one hour
- (* 45 60) ;; 45 minutes, until the db deletion bug is fixed.
- )
- (current-seconds))
- (begin
- (debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
- (loop 0))
- (begin
- (debug:print-info 0 "Starting to shutdown the server.")
- ;; need to delete only *my* server entry (future use)
- (set! *time-to-exit* #t)
- (tasks:server-deregister-self tdb (get-host-name))
- (thread-sleep! 1)
- (debug:print-info 0 "Max cached queries was " *max-cache-size*)
- (debug:print-info 0 "Server shutdown complete. Exiting")
- (exit)))))))
-
-;; all routes though here end in exit ...
-(define (zmq-transport:launch)
- (if (not *toppath*)
- (if (not (setup-for-run))
- (begin
- (debug:print 0 "ERROR: cannot find megatest.config, exiting")
- (exit))))
- (debug:print-info 2 "Starting zmq server")
- (if *toppath*
- (let* (;; (th1 (make-thread (lambda ()
- ;; (let ((server-info #f))
- ;; ;; wait for the server to be online and available
- ;; (let loop ()
- ;; (debug:print-info 2 "Waiting for the server to come online before starting heartbeat")
- ;; (thread-sleep! 2)
- ;; (mutex-lock! *heartbeat-mutex*)
- ;; (set! server-info *server-info* )
- ;; (mutex-unlock! *heartbeat-mutex*)
- ;; (if (not server-info)(loop)))
- ;; (debug:print 2 "Server alive, starting self-ping")
- ;; (zmq-transport:self-ping server-info)
- ;; ))
- ;; "Self ping"))
- (th2 (make-thread (lambda ()
- (zmq-transport:run
- (if (args:get-arg "-server")
- (args:get-arg "-server")
- "-"))) "Server run"))
- ;; (th3 (make-thread (lambda ()(zmq-transport:keep-running)) "Keep running"))
- )
- (set! *client-non-blocking-mode* #t)
- ;; (thread-start! th1)
- (thread-start! th2)
- ;; (thread-start! th3)
- (set! *didsomething* #t)
- ;; (thread-join! th3)
- (thread-join! th2)
- )
- (debug:print 0 "ERROR: Failed to setup for megatest")))
-
-(define (zmq-transport:client-signal-handler signum)
- (handle-exceptions
- exn
- (debug:print " ... exiting ...")
- (let ((th1 (make-thread (lambda ()
- (if (not *received-response*)
- (receive-message* *runremote*))) ;; flush out last call if applicable
- "eat response"))
- (th2 (make-thread (lambda ()
- (debug:print 0 "ERROR: Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.")
- (thread-sleep! 3) ;; give the flush three seconds to do it's stuff
- (debug:print 0 " Done.")
- (exit 4))
- "exit on ^C timer")))
- (thread-start! th2)
- (thread-start! th1)
- (thread-join! th2))))
-
-(define (zmq-transport:client-launch)
- (set-signal-handler! signal/int zmq-transport:client-signal-handler)
- (if (zmq-transport:client-setup)
- (debug:print-info 2 "connected as client")
- (begin
- (debug:print 0 "ERROR: Failed to connect as client")
- (exit))))
-
-;;======================================================================
-;; Defunct functions
-;;======================================================================
-
-;; ping a server and return number of clients or #f (if no response)
-;; NOT IN USE!
-(define (zmq-transport:ping host port #!key (secs 10)(return-socket #f))
- (cdb:use-non-blocking-mode
- (lambda ()
- (let* ((res #f)
- (th1 (make-thread
- (lambda ()
- (let* ((zmq-context (make-context 1))
- (zmq-socket (zmq-transport:client-connect host port context: zmq-context)))
- (if zmq-socket
- (if (zmq-transport:client-login zmq-socket)
- (let ((numclients (cdb:num-clients zmq-socket)))
- (if (not return-socket)
- (begin
- (zmq-transport:client-logout zmq-socket)
- (close-socket zmq-socket)))
- (set! res (list #t numclients (if return-socket zmq-socket #f))))
- (begin
- ;; (close-socket zmq-socket)
- (set! res (list #f "CAN'T LOGIN" #f))))
- (set! res (list #f "CAN'T CONNECT" #f)))))
- "Ping: th1"))
- (th2 (make-thread
- (lambda ()
- (let loop ((count 1))
- (debug:print-info 1 "Ping " count " server on " host " at port " port)
- (thread-sleep! 2)
- (if (< count (/ secs 2))
- (loop (+ count 1))))
- ;; (thread-terminate! th1)
- (set! res (list #f "TIMED OUT" #f)))
- "Ping: th2")))
- (thread-start! th2)
- (thread-start! th1)
- (handle-exceptions
- exn
- (set! res (list #f "TIMED OUT" #f))
- (thread-join! th1 secs))
- res))))
-
-;; (define (zmq-transport:self-ping server-info)
-;; ;; server-info: server-id interface pullport pubport
-;; (let ((iface (list-ref server-info 1))
-;; (pullport (list-ref server-info 2))
-;; (pubport (list-ref server-info 3)))
-;; (zmq-transport:client-connect iface pullport pubport)
-;; (let loop ()
-;; (thread-sleep! 2)
-;; (cdb:client-call *runremote* 'ping #t)
-;; (debug:print 4 "zmq-transport:self-ping - I'm alive on " iface ":" pullport "/" pubport "!")
-;; (mutex-lock! *heartbeat-mutex*)
-;; (set! *server-loop-heart-beat* (current-seconds))
-;; (mutex-unlock! *heartbeat-mutex*)
-;; (loop))))
-
-(define (zmq-transport:reply pubsock target query-sig success/fail result)
- (debug:print-info 11 "zmq-transport:reply target=" target ", result=" result)
- (send-message pubsock target send-more: #t)
- (send-message pubsock (db:obj->string (vector success/fail query-sig result))))
-