Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -28,11 +28,11 @@
ezsteps.scm lock-queue.scm sdb.scm rmt.scm api.scm \
subrun.scm portlogger.scm archive.scm env.scm \
diff-report.scm cgisetup/models/pgdb.scm
# module source files
-MSRCFILES = mutils.scm
+MSRCFILES = adjutant.scm mutils.scm mttop.scm
# ftail.scm rmtmod.scm commonmod.scm removed
# MSRCFILES = ducttape-lib.scm pkts.scm stml2.scm cookie.scm mutils.scm \
# mtargs.scm commonmod.scm dbmod.scm adjutant.scm ulex.scm \
# rmtmod.scm apimod.scm
@@ -315,19 +315,40 @@
# install dashboard as dboard so wrapper script can be called dashboard
$(PREFIX)/bin/.$(ARCHSTR)/dboard : dboard $(FILES) utils/mk_wrapper
utils/mk_wrapper $(PREFIX) dboard $(PREFIX)/bin/dashboard
chmod a+x $(PREFIX)/bin/dashboard
$(INSTALL) dboard $(PREFIX)/bin/.$(ARCHSTR)/dboard
+
+$(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so : lib/libpangox-1.0.so
+ if [[ $(ARCHSTR) == 12.5 ]]; then \
+ mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib; \
+ $(INSTALL) lib/libpangox-1.0.so $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so; \
+ fi
+
+$(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 : lib/libpangox-1.0.so.0
+ if [[ $(ARCHSTR) == 12.5 ]]; then \
+ mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib; \
+ $(INSTALL) lib/libpangox-1.0.so.0 $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0; \
+ fi
+
+$(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0 : lib/libxcb-xlib.so.0
+ if [[ $(ARCHSTR) == 12.5 ]]; then \
+ mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib; \
+ $(INSTALL) lib/libxcb-xlib.so.0 $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0; \
+ fi
install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \
$(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \
$(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \
$(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \
$(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \
$(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \
$(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql \
- $(PREFIX)/share/js/jquery-3.1.0.slim.min.js
+ $(PREFIX)/share/js/jquery-3.1.0.slim.min.js \
+ $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so \
+ $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 \
+ $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0
# $(PREFIX)/bin/.$(ARCHSTR)/ndboard
# $(PREFIX)/bin/newdashboard
$(PREFIX)/bin/.$(ARCHSTR) :
Index: adjutant.scm
==================================================================
--- adjutant.scm
+++ adjutant.scm
@@ -22,12 +22,23 @@
(module adjutant *
(import scheme chicken data-structures extras files)
(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69
- md5 message-digest
+ md5 message-digest matchable
regex srfi-1)
-(define (adjutant-run)
- (print "Running the adjutant!"))
+(define (adjutant-run host-type rmt:no-sync-take-job)
+ (print "Running the adjutant!")
+ (let loop ((wait-count 0))
+ (if (< wait-count 10) ;; 6 x 10 seconds = one minute
+ (let* ((dat (rmt:no-sync-take-job host-type)))
+ (match dat
+ ((id ht vars exekey cmdline state event-time last-update)
+ (system cmdline)
+ (loop 0))
+ (else
+ (thread-sleep! 10)
+ (loop (+ wait-count 1)))))
+ (print "I'm bored. Exiting."))))
)
Index: api.scm
==================================================================
--- api.scm
+++ api.scm
@@ -253,17 +253,20 @@
;; NO SYNC DB
((no-sync-set) (apply db:no-sync-set *no-sync-db* params))
((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params))
((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params))
((no-sync-get-lock) (apply db:no-sync-get-lock *no-sync-db* params))
+ ((no-sync-add-job) (apply db:no-sync-add-job *no-sync-db* params))
+ ((no-sync-take-job) (apply db:no-sync-take-job *no-sync-db* params))
+ ((no-sync-job-records-clean) (apply db:no-sync-job-records-clean *no-sync-db* params))
;; ARCHIVES
;; ((archive-get-allocations)
((archive-register-disk) (apply db:archive-register-disk dbstruct params))
((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
;; ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey))
-
+
;;======================================================================
;; READ ONLY QUERIES
;;======================================================================
;; KEYS
Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -3210,31 +3210,39 @@
;; arm cubie01 cubie02
;; x86_64 zeus xena myth01
;; allhosts #{g hosts arm} #{g hosts x86_64}
;;
;; [host-types]
+;; C/M/A lets megatest know this launcher provides C cores, M bytes memory for architecture A
+;; 2/2G/arm smart -cores 2 -memory 2G -arch arm
;; general #MTLOWESTLOAD #{g hosts allhosts}
;; arm #MTLOWESTLOAD #{g hosts arm}
;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo
+;;
+;; NOTE: host-rules is ONLY used for MTLOWESTLOAD
;;
;; [host-rules]
;; # maxnload => max normalized load
;; # maxnjobs => max jobs per cpu
;; # maxjobrate => max jobs per second
;; general maxnload=1.1; maxnjobs=1.2; maxjobrate=0.1
;;
;; [launchers]
;; envsetup general
-;; xor/%/n 4C16G
+;; xor/%/n 2/2G/arm
;; % nbgeneral
;;
;; [jobtools]
;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match.
;; flexi-launcher yes
;; launcher nbfake
+;; mode adjutant|normal (default is normal)
+;;
+;;
+;; mode is 'normal (i.e. directly use launcher) or 'adjutant (i.e. use adjutant)
;;
-(define (common:get-launcher configdat testname itempath)
+(define (common:get-launcher configdat testname itempath mode)
(let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher")))
(if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher
(not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no")))
(let* ((launchers (hash-table-ref/default configdat "launchers" '())))
(if (null? launchers)
@@ -3241,32 +3249,36 @@
fallback-launcher
(let loop ((hed (car launchers))
(tal (cdr launchers)))
(let ((patt (car hed))
(host-type (cadr hed)))
- (if (tests:match patt testname itempath)
+ (if (tests:match patt testname itempath) ;; have a launcher match for this test
(begin
(debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type)
- (let ((launcher (configf:lookup configdat "host-types" host-type)))
+ (let ((launcher (configf:lookup configdat "host-types" host-type))) ;; find the actual launcher from the host-types table
+ ;; if we are in adjutant mode then we want to return both host-type and launcher
(if launcher
(let* ((launcher-parts (string-split launcher))
(launcher-exe (car launcher-parts)))
(if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline
(let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat))
(count 100))
(if targ-host
(conc "remrun " targ-host)
(if (> count 0)
+
(begin
(debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type)
(thread-sleep! (- 101 count))
(host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)
(- count 1)))
(begin
(debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type)
(exit)))))
- launcher))
+ (case mode
+ ((adjutant) (list host-type launcher))
+ (else launcher))))
(begin
(debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type)
(if (null? tal)
fallback-launcher
(loop (car tal)(cdr tal)))))))
Index: configf.scm
==================================================================
--- configf.scm
+++ configf.scm
@@ -530,12 +530,12 @@
(define configf:read-file read-config)
;; safely look up a value that is expected to be a number, return
;; a default (#f unless provided)
;;
-(define (configf:lookup-number cfdat section varname #!key (default #f))
- (let* ((val (configf:lookup *configdat* section varname))
+(define (configf:lookup-number cfgdat section varname #!key (default #f))
+ (let* ((val (configf:lookup cfgdat section varname))
(res (if val
(string->number (string-substitute "\\s+" "" val #t))
#f)))
(cond
(res res)
Index: dashboard-tests.scm
==================================================================
--- dashboard-tests.scm
+++ dashboard-tests.scm
@@ -324,12 +324,12 @@
(lambda (state color)
(for-each
(lambda (btn)
(let* ((name (iup:attribute btn "TITLE"))
(newcolor (if (equal? name state) color "192 192 192")))
- (if (not (colors-similar? newcolor (iup:attribute btn "BGCOLOR")))
- (iup:attribute-set! btn "BGCOLOR" newcolor))))
+ (if (not (colors-similar? newcolor (iup:attribute btn "FGCOLOR")))
+ (iup:attribute-set! btn "FGCOLOR" newcolor))))
btns)))
btns))
(apply iup:hbox
(iup:label "STATUS:" #:size "30x")
(let* ((btns (map (lambda (status)
@@ -358,12 +358,12 @@
(lambda (status color)
(for-each
(lambda (btn)
(let* ((name (iup:attribute btn "TITLE"))
(newcolor (if (equal? name status) color "192 192 192")))
- (if (not (colors-similar? newcolor (iup:attribute btn "BGCOLOR")))
- (iup:attribute-set! btn "BGCOLOR" newcolor))))
+ (if (not (colors-similar? newcolor (iup:attribute btn "FGCOLOR")))
+ (iup:attribute-set! btn "FGCOLOR" newcolor))))
btns)))
btns))))))
(define (dashboard-tests:run-a-step info)
#t)
Index: dashboard.scm
==================================================================
--- dashboard.scm
+++ dashboard.scm
@@ -1169,13 +1169,14 @@
(button (vector-ref columndat rown))
(color (car (gutils:get-color-for-state-status teststate teststatus)))
(curr-color (vector-ref buttondat 1)) ;; (iup:attribute button "BGCOLOR"))
(curr-title (vector-ref buttondat 2))) ;; (iup:attribute button "TITLE")))
(if (not (equal? curr-color color))
- (iup:attribute-set! button "BGCOLOR" color))
+ #;(iup:attribute-set! button "BGCOLOR" color)
+ (iup:attribute-set! button "FGCOLOR" color))
(if (not (equal? curr-title buttontxt))
- (iup:attribute-set! button "TITLE" buttontxt))
+ (iup:attribute-set! button "TITLE" (conc "" buttontxt "")))
(vector-set! buttondat 0 run-id)
(vector-set! buttondat 1 color)
(vector-set! buttondat 2 buttontxt)
(vector-set! buttondat 3 testdat)
(vector-set! buttondat 4 run-key)))
@@ -2500,22 +2501,22 @@
(set! hide (iup:button "Hide"
#:expand "NO" #:size "40x15" ;; #:expand "HORIZONTAL"
#:action (lambda (obj)
(dboard:tabdat-hide-not-hide-set! tabdat #t) ;; (not (dboard:tabdat-hide-not-hide tabdat)))
;; (iup:attribute-set! obj "TITLE" (if (dboard:tabdat-hide-not-hide tabdat) "HideTests" "NotHide"))
- (iup:attribute-set! hide "BGCOLOR" sel-color)
- (iup:attribute-set! show "BGCOLOR" nonsel-color)
+ (iup:attribute-set! hide "FGCOLOR" sel-color)
+ (iup:attribute-set! show "FGCOLOR" nonsel-color)
(mark-for-update tabdat))))
(set! show (iup:button "Show"
#:expand "NO" #:size "40x15" ;; #:expand "HORIZONTAL"
#:action (lambda (obj)
(dboard:tabdat-hide-not-hide-set! tabdat #f) ;; (not (dboard:tabdat-hide-not-hide tabdat)))
- (iup:attribute-set! show "BGCOLOR" sel-color)
- (iup:attribute-set! hide "BGCOLOR" nonsel-color)
+ (iup:attribute-set! show "FGCOLOR" sel-color)
+ (iup:attribute-set! hide "FGCOLOR" nonsel-color)
(mark-for-update tabdat))))
- (iup:attribute-set! hide "BGCOLOR" sel-color)
- (iup:attribute-set! show "BGCOLOR" nonsel-color)
+ (iup:attribute-set! hide "FGCOLOR" sel-color)
+ (iup:attribute-set! show "FGCOLOR" nonsel-color)
;; (dboard:tabdat-hide-not-hide-button-set! tabdat hideit) ;; never used, can eliminate ...
(iup:vbox
(iup:hbox hide show)
sort-lb)))
)
@@ -2884,10 +2885,11 @@
(let* ((button-key (mkstr runnum testnum))
(butn (iup:button
"" ;; button-key
#:size (conc cell-width btn-height )
#:expand "HORIZONTAL"
+ #:MARKUP "YES"
#:fontsize btn-fontsz
#:button-cb
(lambda (obj a pressed x y btn . rem)
;; (print "pressed= " pressed " x= " x " y= " y " rem=" rem " btn=" btn " string? " (string? btn))
(if (substring-index "3" btn)
Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -2166,14 +2166,66 @@
(db-exists (common:file-exists? dbname))
(db (sqlite3:open-database dbname)))
(sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))
(if (not db-exists)
(begin
- (sqlite3:execute db "PRAGMA synchronous = 0;")
(sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));")
(sqlite3:execute db "PRAGMA journal_mode=WAL;")))
+ ;; MOVE THIS TABLE CREATION TO THE (begin above in about six months (it is Sep 2020 right now).
+ (sqlite3:execute db "CREATE TABLE IF NOT EXISTS jobs_queue (id INTEGER PRIMARY KEY, host_type TEXT, cores INTEGER, memory TEXT, vars TEXT, exekey TEXT, cmdline TEXT, state TEXT, event_time INTEGER, last_update INTEGER);")
+ ;; not sure I'll use this next one. I prefer if tests simply append to a file:
+ ;; last-update-seconds cpuload tmpspace rundirspace
+ (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_extra_data (id INTEGER PRIMARY KEY, run_id INTEGER, test_id INTEGER, last_seen_running INTEGER);")
+ (sqlite3:execute db "PRAGMA synchronous = 0;")
db))
+
+(define (db:no-sync-add-job db-in host-type vars-list exekey cmdline)
+ (sqlite3:execute (db:no-sync-db db-in) "INSERT INTO jobs_queue (host_type,vars,exekey,cmdline,state,event_time,last_update) VALUES (?,?,?,?,?,?,?);"
+ host-type
+ (with-output-to-string
+ (lambda ()
+ (write vars-list)))
+ exekey cmdline "waiting" (current-seconds)(current-seconds)))
+
+;; find next job (waiting longest) that matches host-type - future, we'll find jobs that fit if no exact match
+(define (db:no-sync-take-job db-in host-type)
+ (let* ((db (db:no-sync-db db-in))
+ (stmt1 "SELECT id,host_type,vars,exekey,cmdline,state,event_time,last_update FROM jobs_queue WHERE host_type=? AND state != 'taken' ORDER BY event_time ASC;")
+ (stmt1h (sqlite3:prepare db stmt1))
+ (stmt2 "UPDATE jobs_queue SET state='taken',last_update=? WHERE id=?;")
+ (stmt2h (sqlite3:prepare db stmt2))
+ (res (sqlite3:with-transaction
+ db
+ (lambda ()
+ (let* ((matching-jobs (sqlite3:fold-row
+ (lambda (res . row) ;; id host-type vars exekey state event-time last-update)
+ (cons row res))
+ '()
+ stmt1h
+ host-type)))
+ (if (null? matching-jobs)
+ #f
+ (let ((choosen-one (let loop ((tal matching-jobs)
+ (res #f)) ;; put bestest one in here
+ (if (null? tal)
+ res
+ (let ((curr (car tal))
+ (rem (cdr tal)))
+ curr) ;; here we will compare with res, if better candidate the loop with curr else loop with res
+ ))))
+ (if choosen-one ;; we need to mark it as taken
+ (sqlite3:execute stmt2h (current-seconds) (car choosen-one)))
+ choosen-one)))))))
+ (sqlite3:finalize! stmt1h) ;; it'd be nice to cache these and finalize on exit.
+ (sqlite3:finalize! stmt2h)
+ res))
+
+;; clean out old jobs in queue, i.e. taken and event_time > 24 hrs ago
+;;
+(define (db:no-sync-job-records-clean db)
+ (sqlite3:execute (db:no-sync-db db) "DELETE FROM jobs_queue WHERE state='taken' AND event_time < ?;" (- (current-seconds)(* 24 3600))))
+
;; if we are not a server create a db handle. this is not finalized
;; so watch for problems. I'm still not clear if it is needed to manually
;; finalize sqlite3 dbs with the sqlite3 egg.
;;
@@ -2191,12 +2243,13 @@
(sqlite3:execute (db:no-sync-db db) "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val))
(define (db:no-sync-del! db var)
(sqlite3:execute (db:no-sync-db db) "DELETE FROM no_sync_metadat WHERE var=?;" var))
-(define (db:no-sync-get/default db var default)
- (let ((res default))
+(define (db:no-sync-get/default db-in var default)
+ (let ((db (db:no-sync-db db-in))
+ (res default))
(sqlite3:for-each-row
(lambda (val)
(set! res val))
(db:no-sync-db db)
"SELECT val FROM no_sync_metadat WHERE var=?;"
Index: http-transport.scm
==================================================================
--- http-transport.scm
+++ http-transport.scm
@@ -557,26 +557,31 @@
(server-started (conc tmp-area "/.server-started"))
(start-time (common:lazy-modification-time server-start))
(started-time (common:lazy-modification-time server-started))
(server-starting (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting
(start-time-old (> (- (current-seconds) start-time) 5))
- (cleanup-proc (lambda (msg)
+ (cleanup-proc (lambda (msg) ;; would like to use (modulo (current-seconds) 60) instead of process-id to wrap filenames
(let* ((serv-fname (conc "server-" (current-process-id) "-" (get-host-name) ".log"))
+ (new-fname (conc "server-" (modulo (current-seconds) 60) "-" (get-host-name) ".log"))
(full-serv-fname (conc *toppath* "/logs/" serv-fname))
- (new-serv-fname (conc *toppath* "/logs/" "defunct-" serv-fname)))
+ ;; (new-serv-fname (conc *toppath* "/logs/" "defunct-" serv-fname))
+ (new-serv-fname (conc *toppath* "/logs/" new-fname))
+ )
(debug:print 0 *default-log-port* msg)
(if (common:file-exists? full-serv-fname)
- (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname))
+ (with-output-to-pipe "at now + 10 minutes" (lambda ()
+ (print "mv -f " full-serv-fname " " new-serv-fname)))
+ ;; (system (conc "sleep 10;mv -f " full-serv-fname " " new-serv-fname))
(debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname))
(exit)))))
- #;(if (and (not start-time-old) ;; last server start try was less than five seconds ago
+ (if (and (not start-time-old) ;; last server start try was less than five seconds ago
(not server-starting))
(begin
(cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting")
(exit)))
;; lets not even bother to start if there are already three or more server files ready to go
- #;(let* ((num-alive (server:get-num-alive (server:get-list *toppath*))))
+ (let* ((num-alive (server:get-num-alive (server:get-list *toppath*))))
(if (> num-alive 3)
(begin
(cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up"))
(exit))))
(common:save-pkt `((action . start)
Index: launch.scm
==================================================================
--- launch.scm
+++ launch.scm
@@ -812,45 +812,11 @@
(loop (car tal) (cdr tal) (+ kill-cnt flag))
(+ kill-cnt flag))))))
;; DO NOT USE - caching of configs is handled in launch:setup now.
;;
-(define (launch:cache-config)
- ;; if we have a linktree and -runtests and -target and the directory exists dump the config
- ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg
- (if (and *configdat*
- (or (args:get-arg "-run")
- (args:get-arg "-runtests")
- (args:get-arg "-execute")))
- (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE"))
- (target (common:args-get-target exit-if-bad: #t))
- (runname (or (args:get-arg "-runname")
- (args:get-arg ":runname")
- (getenv "MT_RUNNAME")))
- (fulldir (conc linktree "/"
- target "/"
- runname)))
- (if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree
- (begin
- (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%"))
- (if (not (common:file-exists? fulldir))
- (create-directory fulldir #t)) ;; need to protect with exception handler
- (if (and target
- runname
- (common:file-exists? fulldir))
- (let ((tmpfile (conc fulldir "/.megatest.cfg." (current-seconds)))
- (targfile (conc fulldir "/.megatest.cfg-" megatest-version "-" megatest-fossil-hash))
- (rconfig (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash)))
- (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached
- (begin
- (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile)
- (if (not (common:in-running-test?))
- (configf:write-alist *configdat* tmpfile))
- (system (conc "ln -sf " tmpfile " " targfile))))
- )))
- (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs.")))))
-
+;; (launch:cache-config) moved to attic below
;; gather available information, if legit read configs in this order:
;;
;; if have cache;
;; read it a return it
@@ -874,14 +840,30 @@
(mutex-unlock! *launch-setup-mutex*)
*toppath*)
(let ((res (launch:setup-body force-reread: force-reread areapath: areapath)))
(mutex-unlock! *launch-setup-mutex*)
res)))
+
+(define (launch:cache-files-changed? cache-files ref-seconds)
+ (let* ((changed #f))
+ (if (or (not cache-files)
+ (null? cache-files))
+ (set! changed #t) ;; yep, they've changed
+ (for-each
+ (lambda (fname)
+ (if (not fname)
+ (set! changed #t)
+ (if (not (file-exists? fname))
+ (set! changed #t)
+ (if (> (file-modification-time fname) ref-seconds)
+ (set! changed #t)))))
+ cache-files))
+ changed))
;; return paths depending on what info is available.
;;
-(define (launch:get-cache-file-paths areapath toppath target mtconfig)
+(define (launch:get-cache-file-paths areapath toppath target)
(let* ((use-cache (common:use-cache?))
(runname (common:args-get-runname))
(linktree (common:get-linktree))
(testname (common:get-full-test-name))
(rundir (if (and runname target linktree)
@@ -912,11 +894,11 @@
(let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting. We do not have *configdat*. Bootstrapping problem here.
(toppath (common:get-toppath areapath))
(target (common:args-get-target))
(sections (if target (list "default" target) #f)) ;; for runconfigs
(mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config
- (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig))
+ (cachefiles (launch:get-cache-file-paths areapath toppath target))
;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ...
(mtcachef (if (null? cachefiles)
#f
(car cachefiles))) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash)))
(rccachef (if (null? cachefiles)
@@ -993,11 +975,11 @@
(for-each (lambda (kt)
(setenv (car kt) (cadr kt)))
key-vals)
(read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ...
sections: sections)))
- (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig))
+ (cachefiles (launch:get-cache-file-paths areapath toppath target))
(mtcachef (car cachefiles))
(rccachef (cdr cachefiles)))
;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
;; TODO - consider 1) using simple-lock to bracket cache write
;; 2) cache in hash on server, since need to do rmt: anyway to lock.
@@ -1076,11 +1058,11 @@
(debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
(set! *toppath* #f) ;; force it to be false so we return #f
#f))
;; one more attempt to cache the configs for future reading
- (let* ((cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig))
+ (let* ((cachefiles (launch:get-cache-file-paths areapath toppath target))
(mtcachef (car cachefiles))
(rccachef (cdr cachefiles)))
;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
;; TODO - consider 1) using simple-lock to bracket cache write
@@ -1363,12 +1345,20 @@
(debug:print 1 *default-log-port* "INFO: search and mark zombie tests")
(rmt:set-var key (current-seconds))
(rmt:find-and-mark-incomplete run-id #f))))
-
+(defstruct launch:ajt
+ (vars '())
+ (exekey #f)
+ (host-type #f)
+ (test-sig #f)
+ (cmdline #f))
+;; append vars
+(define (launch:ajt-add-vars dat vars)
+ (launch:ajt-vars-set! dat (append (launch:ajt-vars dat) vars)))
;; 1. look though disks list for disk with most space
;; 2. create run dir on disk, path name is meaningful
;; 3. create link from run dir to megatest runs area
;; 4. remotely run the test on allocated host
@@ -1375,44 +1365,41 @@
;; - could be ssh to host from hosts table (update regularly with load)
;; - could be netbatch
;; (launch-test db (cadr status) test-conf))
(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
(mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
- (let* ( ;; (lock-key (conc "test-" test-id))
- ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key))
- ;; (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
- ;; (if (car lock)
- ;; #t
- ;; (if (> (current-seconds) expire-time)
- ;; (begin
- ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path)
- ;; (rmt:no-sync-del! lock-key) ;; destroy the lock
- ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;;
- ;; (begin
- ;; (thread-sleep! 1)
- ;; (loop (rmt:no-sync-get-lock lock-key) expire-time))))))
+ (let* (;; locking code removed from here commented out and pasted at end of file
(item-path (item-list->path itemdat))
- (contour #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
+ (contour #f) ;; NOT READY FOR THIS (args:get-arg "-contour")))
+ ;; launcher-mode will be 'adjutant or 'normal
+ (launcher-mode (string->symbol (or (configf:lookup *configdat* "jobtools" "mode") "normal")))
+ (ajtdat (make-launch:ajt)))
(let loop ((delta (- (current-seconds) *last-launch*))
(launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 0)))
(if (> launch-delay delta)
(begin
(if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
(thread-sleep! (- launch-delay delta))
(loop (- (current-seconds) *last-launch*) launch-delay))))
(change-directory *toppath*)
- (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
- (append
- (list
- (list "MT_RUN_AREA_HOME" *toppath*)
- (list "MT_TEST_NAME" test-name)
- (list "MT_RUNNAME" runname)
- (list "MT_ITEMPATH" item-path)
- (list "MT_CONTOUR" contour)
- )
- itemdat))
+ (let ((var-list (append
+ (list
+ (list "MT_RUN_AREA_HOME" *toppath*)
+ (list "MT_TEST_NAME" test-name)
+ (list "MT_RUNNAME" runname)
+ (list "MT_ITEMPATH" item-path)
+ (list "MT_CONTOUR" contour)
+ )
+ itemdat)))
+ ;; consolidate this code with the code in megatest.scm for
+ ;; "-execute", *maybe* - the longer they are set the longer
+ ;; each launch takes (must be non-overlapping with the vars)
+ (alist->env-vars var-list)
+ ;; the var-list into the ajtdat adjutant record whether it is needed or not.
+ (launch:ajt-add-vars ajtdat var-list))
+
(let* ((tregistry (tests:get-all)) ;; third param (below) is system-allowed
;; for tconfig, why do we allow fallback to test-conf?
(tconfig (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t)
(begin
(debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.")
@@ -1430,26 +1417,17 @@
;; (memory (configf:lookup tconfig "requirements" "memory"))
;; (hosts (configf:lookup *configdat* "jobtools" "workhosts")) ;; I'm pretty sure this was never completed
(remote-megatest (configf:lookup *configdat* "setup" "executable"))
(run-time-limit (or (configf:lookup tconfig "requirements" "runtimelim")
(configf:lookup *configdat* "setup" "runtimelim")))
- ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to
- ;; allow running from dashboard. Extract the path
- ;; from the called megatest and convert dashboard
- ;; or dboard to megatest
(local-megatest (common:find-local-megatest))
- #;(local-megatest (let* ((lm (car (argv)))
- (dir (pathname-directory lm))
- (exe (pathname-strip-directory lm)))
- (conc (if dir (conc dir "/") "")
- (case (string->symbol exe)
- ((dboard) "../megatest")
- ((mtest) "../megatest")
- ((dashboard) "megatest")
- (else exe)))))
- (launcher (common:get-launcher *configdat* test-name item-path)) ;; (configf:lookup *configdat* "jobtools" "launcher"))
- (test-sig (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path
+ (launcher (let ((l (common:get-launcher *configdat* test-name item-path launcher-mode)))
+ (if (string? l)
+ (string-split l)
+ l))) ;; some nonhomogenuity here. '(cmd param1 param2 ...) OR '(host-type launcher)
+ ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path
+ (test-sig (conc (common:get-testsuite-name) ":" test-name ":" item-path))
(work-area #f)
(toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
(diskpath #f)
(cmdparms #f)
(fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x))))
@@ -1459,15 +1437,24 @@
(debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '())
(if (args:get-arg "-logging")(list "-logging") '())
(if (configf:lookup *configdat* "misc" "profilesw")
(list (configf:lookup *configdat* "misc" "profilesw"))
'()))))
+ ;; save the test-sig in the ajtdat record
+ (launch:ajt-test-sig-set! ajtdat test-sig)
+ ;; go ahead and figure out if we have a host-type from the
+ ;; launcher call above and save it in the ajtdat record
+ (if (and (eq? launcher-mode 'adjutant)
+ (list? launcher)
+ (> (length launcher) 1))
+ (launch:ajt-host-type-set! ajtdat (car launcher)))
+
;; (if hosts (set! hosts (string-split hosts)))
;; set the megatest to be called on the remote host
(if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
(set! mt-bindir-path (pathname-directory remote-megatest))
- (if launcher (set! launcher (string-split launcher)))
+ ;; (if launcher (set! launcher (string-split launcher))) ;; yuk!
;; set up the run work area for this test
(if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run
(not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir
(begin
(debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path)
@@ -1520,63 +1507,92 @@
(list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f))
(list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '()))
(list 'set-vars (if params (hash-table-ref/default params "-setvars" #f)))
(list 'runname runname)
(list 'mt-bindir-path mt-bindir-path))))))))
+ ;; save the cmdparms in the ajtdat
+ (launch:ajt-exekey-set! ajtdat cmdparms)
;; clean out step records from previous run if they exist
;; (rmt:delete-test-step-records run-id test-id)
;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway
(if (common:file-exists? work-area)
(change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir
- (cond
- ;; ((and launcher hosts) ;; must be using ssh hostname
- ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
- ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
+
+ ;; save the command line for adjutant mode (might never be needed but best to assemble it here)
+ (launch:ajt-cmdline-set! ajtdat (string-intersperse
+ (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
+ (cond
(launcher
(set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
- ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))
(else
(if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section"))
(set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" ""))))))
- ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" "")))))
+
(if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm"))))
(debug:print 1 *default-log-port* "Launching " work-area)
;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done
(debug:print 4 *default-log-port* "fullcmd: " fullcmd)
(set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible.
- (let* ((commonprevvals (alist->env-vars
- (hash-table-ref/default *configdat* "env-override" '())))
- (miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
- (append (list (list "MT_TEST_RUN_DIR" work-area)
- (list "MT_TEST_NAME" test-name)
- (list "MT_ITEM_INFO" (conc itemdat))
- (list "MT_RUNNAME" runname)
- (list "MT_TARGET" mt_target)
- (list "MT_ITEMPATH" item-path))
- itemdat)))
- (testprevvals (alist->env-vars
- (hash-table-ref/default tconfig "pre-launch-env-overrides" '())))
+ (let* ((env-override-vars (hash-table-ref/default *configdat* "env-override" '()))
+ (commonprevvals (alist->env-vars env-override-vars))
+ (misc-vars (append (list (list "MT_TEST_RUN_DIR" work-area)
+ (list "MT_TEST_NAME" test-name)
+ (list "MT_ITEM_INFO" (conc itemdat))
+ (list "MT_RUNNAME" runname)
+ (list "MT_TARGET" mt_target)
+ (list "MT_ITEMPATH" item-path))
+ itemdat))
+ (miscprevvals (alist->env-vars misc-vars));; consolidate this code with the code in megatest.scm for "-execute"
+ (test-vars (hash-table-ref/default tconfig "pre-launch-env-overrides" '()))
+ (testprevvals (alist->env-vars test-vars))
+
;; Launchwait defaults to true, must override it to turn off wait
(launchwait (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t))
- (launch-results-prev (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed.
- process:cmd-run-with-stderr-and-exitcode->list
- process-run)
- (if useshell
- (let ((cmdstr (string-intersperse fullcmd " ")))
- (if launchwait
- cmdstr
- (conc cmdstr " >> mt_launch.log 2>&1 &")))
- (car fullcmd))
- (if useshell
- '()
- (cdr fullcmd))))
+ ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed.
+ (launch-results-prev (if (eq? launcher-mode 'adjutant)
+ '(#t 0) ;; just some fake data to fool downstream but non-applicable code
+ (apply (if launchwait
+ process:cmd-run-with-stderr-and-exitcode->list
+ process-run)
+ (if useshell
+ (let ((cmdstr (string-intersperse fullcmd " ")))
+ (if launchwait
+ cmdstr
+ (conc cmdstr " >> mt_launch.log 2>&1 &")))
+ (car fullcmd))
+ (if useshell
+ '()
+ (cdr fullcmd)))))
(success (if launchwait (equal? 0 (cadr launch-results-prev)) #t))
(launch-results (if launchwait (car launch-results-prev) launch-results-prev)))
- (if (not success)
+
+ (launch:ajt-add-vars ajtdat env-override-vars)
+ (launch:ajt-add-vars ajtdat misc-vars)
+ (launch:ajt-add-vars ajtdat test-vars)
+
+ ;; if in adjutant mode we register the job in the jobs_queue
+ ;; then fire off an adjutant runner
+ ;;
+ (if (eq? launcher-mode 'adjutant)
+ (let* ((adjutant-runner-cmd (append (cdr launcher)
+ (list remote-megatest "-adjutant"
+ (launch:ajt-host-type ajtdat)
+ "-start-dir" *toppath*)))
+ (adj-cmd (conc (string-intersperse (map conc adjutant-runner-cmd) " ")
+ "&")))
+ (rmt:no-sync-add-job
+ (launch:ajt-host-type ajtdat)
+ (launch:ajt-vars ajtdat)
+ (launch:ajt-exekey ajtdat)
+ (launch:ajt-cmdline ajtdat))
+ (print "adj-cmd: " adj-cmd)
+ (system adj-cmd)
+ ))
+
+ (if (not success)
(tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED"))
- (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork.
;; (rmt:no-sync-del! lock-key) ;; release the lock for starting this test
(if (not launchwait) ;; give the OS a little time to allow the process to start
(thread-sleep! 0.01))
(with-output-to-file "mt_launch.log"
(lambda ()
@@ -1598,10 +1614,14 @@
(process-signal (current-process-id) signal/kill)
))
(alist->env-vars miscprevvals)
(alist->env-vars testprevvals)
(alist->env-vars commonprevvals)
+ ;; yes, really should mutex all the way to here. Need to put this entire process into a fork.
+ ;; the unlock previously was further up. This seemed wrong as we should not proceed until the
+ ;; vars have been reset.
+ (mutex-unlock! *launch-setup-mutex*)
launch-results))
(change-directory *toppath*)
(thread-sleep! (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.0))))
;; recover a test where the top controlling mtest may have died
@@ -1627,5 +1647,60 @@
;; now wait on that process if all is correct
;; periodically update the db with runtime
;; when the process exits look at the db, if still RUNNING after 10 seconds set
;; state/status appropriately
(process-wait pid)))
+
+;;======================================================================
+;; Attic
+;;======================================================================
+
+ ;; (lock-key (conc "test-" test-id))
+ ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key))
+ ;; (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
+ ;; (if (car lock)
+ ;; #t
+ ;; (if (> (current-seconds) expire-time)
+ ;; (begin
+ ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path)
+ ;; (rmt:no-sync-del! lock-key) ;; destroy the lock
+ ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;;
+ ;; (begin
+ ;; (thread-sleep! 1)
+ ;; (loop (rmt:no-sync-get-lock lock-key) expire-time))))))
+
+
+
+#;(define (launch:cache-config)
+ ;; if we have a linktree and -runtests and -target and the directory exists dump the config
+ ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg
+ (if (and *configdat*
+ (or (args:get-arg "-run")
+ (args:get-arg "-runtests")
+ (args:get-arg "-execute")))
+ (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE"))
+ (target (common:args-get-target exit-if-bad: #t))
+ (runname (or (args:get-arg "-runname")
+ (args:get-arg ":runname")
+ (getenv "MT_RUNNAME")))
+ (fulldir (conc linktree "/"
+ target "/"
+ runname)))
+ (if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree
+ (begin
+ (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%"))
+ (if (not (common:file-exists? fulldir))
+ (create-directory fulldir #t)) ;; need to protect with exception handler
+ (if (and target
+ runname
+ (common:file-exists? fulldir))
+ (let ((tmpfile (conc fulldir "/.megatest.cfg." (current-seconds)))
+ (targfile (conc fulldir "/.megatest.cfg-" megatest-version "-" megatest-fossil-hash))
+ (rconfig (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash)))
+ (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached
+ (begin
+ (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile)
+ (if (not (common:in-running-test?))
+ (configf:write-alist *configdat* tmpfile))
+ (system (conc "ln -sf " tmpfile " " targfile))))
+ )))
+ (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs.")))))
ADDED lib/libpangox-1.0.so
Index: lib/libpangox-1.0.so
==================================================================
--- /dev/null
+++ lib/libpangox-1.0.so
cannot compute difference between binary files
ADDED lib/libpangox-1.0.so.0
Index: lib/libpangox-1.0.so.0
==================================================================
--- /dev/null
+++ lib/libpangox-1.0.so.0
cannot compute difference between binary files
ADDED lib/libxcb-xlib.so.0
Index: lib/libxcb-xlib.so.0
==================================================================
--- /dev/null
+++ lib/libxcb-xlib.so.0
cannot compute difference between binary files
Index: megatest-version.scm
==================================================================
--- megatest-version.scm
+++ megatest-version.scm
@@ -18,6 +18,6 @@
;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..
;; (declare (unit megatest-version))
-(define megatest-version 1.6577)
+(define megatest-version 1.6579)
Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -40,10 +40,16 @@
(declare (uses api))
(declare (uses tasks)) ;; only used for debugging.
(declare (uses env))
(declare (uses diff-report))
(declare (uses mutils))
+(declare (uses adjutant))
+(import adjutant)
+
+(declare (uses mttop))
+(import mttop)
+
;; (declare (uses ftail))
;; (import ftail)
(define *db* #f) ;; this is only for the repl, do not use in general!!!!
@@ -52,12 +58,12 @@
(include "db_records.scm")
(include "run_records.scm")
(include "megatest-fossil-hash.scm")
(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:)
- readline apropos json http-client directory-utils typed-records
- http-client srfi-18 extras format)
+ readline apropos json http-client directory-utils typed-records matchable
+ http-client srfi-18 extras format call-with-environment-variables)
;; Added for csv stuff - will be removed
;;
(use sparse-vectors)
@@ -105,10 +111,11 @@
Usage: megatest [options]
-h : this help
-manual : show the Megatest user manual
-version : print megatest version (currently " megatest-version ")
+ help : help for the new Megatest interface
Launching and managing runs
-run : run all tests or as specified by -testpatt
-remove-runs : remove the data for a run, requires -runname and -testpatt
Optionally use :state and :status, use -keep-records to remove only
@@ -202,11 +209,11 @@
-update-meta : update the tests metadata for all tests
-setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are
overwritten by values set in config files.
-server -|hostname : start the server (reduces contention on megatest.db), use
- to automatically figure out hostname
- -adjutant C,M : start the server/adjutant with allocated cores C and Mem M (Gig),
+ -adjutant host-type : start the server/adjutant with given host-type
use 0,0 to auto use full machine
-transport http|rpc : use http or rpc for transport (default is http)
-log logfile : send stdout and stderr to logfile
-list-servers : list the servers
-kill-servers : kill all servers
@@ -271,10 +278,14 @@
Version " megatest-version ", built from " megatest-fossil-hash ))
;; -gui : start a gui interface
;; -config fname : override the runconfigs file with fname
+
+(mttop-run (command-line-arguments)
+ '("help"))
+
;; process args
(define remargs (args:get-args
(argv)
(list "-runtests" ;; run a specific test
"-config" ;; override the config file name
@@ -915,16 +926,47 @@
(let ((tl (launch:setup))
(transport-type (string->symbol (or (args:get-arg "-transport") "http"))))
(server:launch 0 transport-type)
(set! *didsomething* #t)))
+
+(define (naylist->alist inlst)
+ (map (lambda (dat)
+ (cons (car dat)
+ (or (if (list? (cdr dat))
+ (if (null? (cdr dat)) ""
+ (cadr dat))
+ (cdr dat))
+ ""))) ;; we need a string for call-with-environment-variables
+ inlst))
+
;; The adjutant is a bit different, it does NOT run (launch:setup) as it is not necessarily tied to
;; a specific Megatest area. Detail are being hashed out and this may change.
;;
(if (args:get-arg "-adjutant")
- (begin
- (adjutant-run)
+ (let* ((host-type (args:get-arg "-adjutant")))
+ (launch:setup) ;; dang it, wish this wasn't needed
+ (print "Running the adjutant!")
+ (let loop ((wait-count 0))
+ (if (< wait-count 10) ;; 6 x 10 seconds = one minute
+ (let* ((dat (rmt:no-sync-take-job host-type)))
+ (match dat
+ ((id ht vars exekey cmdline state event-time last-update)
+ (let ((vars-alist (with-input-from-string vars read)
+ ))
+ (print "Vars:")
+ (pp vars-alist)
+ (call-with-environment-variables
+ (naylist->alist vars-alist)
+ (lambda ()
+ (system cmdline))))
+ (loop 0))
+ (else
+ (thread-sleep! 10)
+ (loop (+ wait-count 1)))))
+ (print "I'm bored. Exiting.")))
+ ;; (adjutant-run (args:get-arg "-ajutant") rmt:no-sync-take-job)
(set! *didsomething* #t)))
(if (or (args:get-arg "-list-servers")
(args:get-arg "-kill-servers"))
(let ((tl (launch:setup)))
ADDED mttop.scm
Index: mttop.scm
==================================================================
--- /dev/null
+++ mttop.scm
@@ -0,0 +1,55 @@
+;; Copyright 2006-2011, Matthew Welland.
+;;
+;; This program is made available under the GNU GPL version 2.0 or
+;; greater. See the accompanying file COPYING for details.
+;;
+;; This program is distributed WITHOUT ANY WARRANTY; without even the
+;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+;; PURPOSE.
+
+;; This is from the perl world, a hash of hashes is a super easy way to keep a handle on
+;; lots of disparate data
+;;
+
+(declare (unit mttop))
+
+(module mttop
+ *
+
+(import chicken scheme
+ ;; data-structures posix
+ srfi-1
+ ;; srfi-13
+ srfi-69
+ ports
+ extras
+ regex
+ posix
+ data-structures
+ matchable
+ )
+
+(define (str-is-cmd cmd all-cmds)
+ (let* ((rx (regexp (conc "^" cmd ".*")))
+ (mx (filter string? (map (lambda (x)
+ (let ((res (string-match rx x)))
+ (if res (car res) #f)))
+ all-cmds))))
+ (if (eq? (length mx) 1) ;; have a command
+ (car mx)
+ #f)))
+
+(define (mttop-run args all-cmds)
+ ;; any path through this call must end in exit if it is NOT an old Megatest call
+ (if (null? args)
+ #f ;; continue on and do the old Megatest stuff
+ (let ((cmd (str-is-cmd (car args) all-cmds)))
+ (if cmd
+ (begin
+ (case (string->symbol cmd)
+ ((help)(print "New help"))
+ (else (print "Command " cmd " is not implemented yet.")))
+ (exit)) ;; always exit here
+ #f)))) ;; or continue on to Megatest old stuff here
+
+)
Index: mtut.scm
==================================================================
--- mtut.scm
+++ mtut.scm
@@ -154,10 +154,11 @@
show [areas|contours... ] : show areas, contours or other section from megatest.config
gendot : generate a graphviz dot file from pkts.
Contour actions:
process : runs import, rungen and dispatch
+ go : runs import, rungen and dispatch every five minutes forever
Trigger propagation actions:
tsend a=b,c=d... : send trigger info to all recpients in the [listeners] section
tlisten -port N : listen for trigger info on port N
@@ -773,11 +774,11 @@
(begin
(print-call-chain)
(print "FAILED TO RUN RUNNAME MAPPER " callname " FOR AREA " area)
(print " message: " ((condition-property-accessor 'exn 'message) exn))
runname)
- (print "(mapper " (string-intersperse (list runkey runname area area-path reason contour mode-patt) ", ") ")")
+ (print "(mapper " (string-intersperse (map conc (list runkey runname area area-path reason contour mode-patt)) ", ") ")")
(mapper runkey runname area area-path reason contour mode-patt))
(case callname
((auto #f) runname)
(else runtrans)))))
(new-target target) ;; I believe we will want target manipulation here .. (map-targets xlatr-key runkey area contour))
@@ -1323,11 +1324,13 @@
(begin
(print "ERROR: Unknown key in packet \"" key "\" with value \"" val "\"")
res)))))
(conc "megatest " (if (not (member action '("sync")))
(conc action " " action-param)
- ""))
+ "") (if (member action '("-run" "-rerun-clean" "-rerun-all" "-kill-rerun"))
+ "-rerun DEAD,ABORT,KILLED"
+ ""))
pkta)))
;; (use trace)(trace pkt->cmdline)
(define (write-pkt pktsdir uuid pkt)
@@ -1610,23 +1613,70 @@
;; (hash-table-keys adjargs))
(let-values (((uuid pkt)
(command-line->pkt *action* adjargs #f area-path: area-path new-ss: new-ss)))
(print "run log @ " (conc (current-directory) "/" uuid "-" *action* ".log"))
(write-pkt pktsdir uuid pkt))))
- ((dispatch import rungen process)
+ ((dispatch import rungen process go)
(let* ((mtconfdat (simple-setup (args:get-arg "-start-dir")))
(mtconf (car mtconfdat))
- (toppath (configf:lookup mtconf "scratchdat" "toppath")))
+ (toppath (configf:lookup mtconf "scratchdat" "toppath"))
+ (period (configf:lookup-number mtconf "mtutil" "autorun-period" default: 300))
+ (rest-time (configf:lookup-number mtconf "mtutil" "autorun-rest" default: 30)))
+ (print "Using period="period" and rest time="rest-time)
(case (string->symbol *action*)
((process) (begin
(common:load-pkts-to-db mtconf)
(generate-run-pkts mtconf toppath)
(common:load-pkts-to-db mtconf)
(dispatch-commands mtconf toppath)))
((import) (common:load-pkts-to-db mtconf)) ;; import pkts
((rungen) (generate-run-pkts mtconf toppath))
- ((dispatch) (dispatch-commands mtconf toppath)))))
+ ((dispatch) (dispatch-commands mtconf toppath))
+ ;; [mtutil]
+ ;; # approximate interval between run processing in mtutil (seconds)
+ ;; autorun-period 300
+ ;; # minimal rest period between processing
+ ;; autorun-rest 30
+ ((go)
+ ;; determine if I'm the boss
+ (if (file-exists? "mtutil-go.pid")
+ (begin
+ (print "ERROR: mtutil go is already running under host and pid " (with-input-from-file "mtutil-go.pid" read-line)
+ ". Please kill that process and remove the file \"mutil-go.pid\" and try again.")
+ (exit)))
+ (with-output-to-file "mtutil-go.pid" (lambda ()(print (get-host-name) " " (current-process-id))))
+ (print "Starting long running import, rungen, and process loop")
+ (if (file-exists? "do-not-run-mtutil-go")
+ (begin
+ (print "NOTE: Removing flag file "(current-directory)"/do-not-run-mtutil-go")
+ (delete-file* "do-not-run-mtutil-go")))
+ (let loop ((last-run (- (current-seconds) (+ period 10))) ;; fake out first time in
+ (this-run (current-seconds)))
+ (if (file-exists? "do-not-run-mtutil-go")
+ (begin
+ (print "File do-not-run-mtutil-go exists, exiting.")
+ (delete-file* "mtutil-go.pid")
+ (exit)))
+ (let ((delta (- this-run last-run)))
+ (if (>= delta period)
+ (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir")))
+ (mtconf (car mtconfdat)))
+ (print "Running import at " (current-seconds))
+ (common:load-pkts-to-db mtconf)
+ (print "Running generate run pkts at " (current-seconds))
+ (generate-run-pkts mtconf toppath)
+ (print "Running run dispatch at " (current-seconds))
+ (common:load-pkts-to-db mtconf)
+ (dispatch-commands mtconf toppath)
+ (print "Done running import, generate, and dispatch done in " (- (current-seconds) this-run))
+ (print "NOTE: touch " (current-directory) "/do-not-run-mtutil-go to kill this runner.")
+ (loop this-run (current-seconds)))
+ (let ((now (current-seconds)))
+ (print "Sleeping " rest-time " seconds, next run in aproximately " (- period (- now last-run)) " seconds")
+ (thread-sleep! rest-time)
+ (loop last-run (current-seconds))))))
+ (delete-file* "mtutil-go.pid")))))
;; misc
((show)
(if (> (length remargs) 0)
(let* ((mtconfdat (simple-setup (args:get-arg "-start-dir")))
(mtconf (car mtconfdat))
@@ -1805,46 +1855,52 @@
)
)))
(loop (nn-recv rep))))
(print "ERROR: Port " portnum " already in use. Try another port")))))))
-
-
-
-
- ((tlisten)
- (if (null? remargs)
- (print "ERROR: useage for tlisten is \"mtutil tlisten portnum\"")
- (let ((portnum (string->number (car remargs))))
-
- (if (not portnum)
- (print "ERROR: the portnumber parameter must be a number, you gave: " (car remargs))
- (begin
- (if (not (is-port-in-use portnum))
- (let* ((rep (start-nn-server portnum))
- (mtconfdat (simple-setup (args:get-arg "-start-dir")))
- (mtconf (car mtconfdat))
- (contact (configf:lookup mtconf "listener" "owner"))
- (script (configf:lookup mtconf "listener" "script")))
- (print "Listening on port " portnum " for messages.")
- (set-signal-handler! signal/int (lambda (signum)
- (set! *time-to-exit* #t)
- (debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting !!")
- (let ((email-body (mtut:stml->string (s:body
- (s:p (conc "Received signal " signum ". Lister has been terminated on host " (get-environment-variable "HOST") ". "))))))
- (sendmail contact "Listner has been terminated." email-body use_html: #t))
- (exit)))
- (set-signal-handler! signal/term (lambda (signum)
- (set! *time-to-exit* #t)
- (debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting !!")
- (let ((email-body (mtut:stml->string (s:body
- (s:p (conc "Received signal " signum ". Lister has been terminated on host " (get-environment-variable "HOST") ". "))))))
- (sendmail contact "Listner has been terminated." email-body use_html: #t))
- (exit)))
-
- ;(set-signal-handler! signal/term special-signal-handler)
-
+
+ ((tlisten)
+ (if (null? remargs)
+ (print "ERROR: useage for tlisten is \"mtutil tlisten portnum\"")
+ (let ((portnum (string->number (car remargs))))
+
+ (if (not portnum)
+ (print "ERROR: the portnumber parameter must be a number, you gave: " (car remargs))
+ (begin
+ (if (not (is-port-in-use portnum))
+ (let* ((rep (start-nn-server portnum))
+ (mtconfdat (simple-setup (args:get-arg "-start-dir")))
+ (mtconf (car mtconfdat))
+ (contact (configf:lookup mtconf "listener" "owner"))
+ (script (configf:lookup mtconf "listener" "script")))
+ (print "Listening on port " portnum " for messages.")
+ (set-signal-handler! signal/int
+ (lambda (signum)
+ (set! *time-to-exit* #t)
+ (debug:print-error 0 *default-log-port* "Received signal " signum
+ " sending email befor exiting !!")
+ (let ((email-body (mtut:stml->string
+ (s:body
+ (s:p (conc "Received signal " signum
+ ". Lister has been terminated on host "
+ (get-environment-variable "HOST") ". "))))))
+ (sendmail contact "Listner has been terminated." email-body use_html: #t))
+ (exit)))
+ (set-signal-handler! signal/term (lambda (signum)
+ (set! *time-to-exit* #t)
+ (debug:print-error 0 *default-log-port* "Received signal "
+ signum " sending email befor exiting !!")
+ (let ((email-body (mtut:stml->string
+ (s:body
+ (s:p (conc "Received signal " signum
+ ". Lister has been terminated on host "
+ (get-environment-variable "HOST") ". "))))))
+ (sendmail contact "Listner has been terminated." email-body use_html: #t))
+ (exit)))
+
+ ;; (set-signal-handler! signal/term special-signal-handler)
+
(let loop ((instr (nn-recv rep)))
(nn-send rep "ok")
(let ((ctime (date->string (current-date))))
(if (equal? instr "time-to-die")
(begin
Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -969,10 +969,19 @@
(define (rmt:no-sync-del! var)
(rmt:send-receive 'no-sync-del! #f `(,var)))
(define (rmt:no-sync-get-lock keyname)
(rmt:send-receive 'no-sync-get-lock #f `(,keyname)))
+
+(define (rmt:no-sync-add-job host-type vars-list exekey cmdline)
+ (rmt:send-receive 'no-sync-add-job #f `(,host-type ,vars-list ,exekey ,cmdline)))
+
+(define (rmt:no-sync-take-job host-type)
+ (rmt:send-receive 'no-sync-take-job #f `(,host-type)))
+
+(define (rmt:no-sync-job-records-clean)
+ (rmt:set-receive 'no-sync-job-records-clean #f '()))
;;======================================================================
;; A R C H I V E S
;;======================================================================
Index: runs.scm
==================================================================
--- runs.scm
+++ runs.scm
@@ -350,15 +350,14 @@
(if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
(let* ((num-running (rmt:get-count-tests-running run-id))
(num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
- (job-group-limit (let ((jobg-count (configf:lookup *configdat* "jobgroups" jobgroup)))
- (if (string? jobg-count)
- (string->number jobg-count)
- jobg-count))))
- (if (> (+ num-running num-running-in-jobgroup) 0)
+ (job-group-limit (configf:lookup-number *configdat* "jobgroups" jobgroup)))
+ (if (and (number? num-running) ;; checking for number - had a crash where a non-number was returned. Not sure why.
+ (number? num-running-in-jobgroup) ;; probably can remove this when rmt switches away from http.
+ (> (+ num-running num-running-in-jobgroup) 0))
(runs:inc-can-run-more-tests-count runsdat)) ;; (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1)))
(if (not (eq? *last-num-running-tests* num-running))
(begin
(debug:print 2 *default-log-port* "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
(set! *last-num-running-tests* num-running)))
@@ -419,11 +418,13 @@
(debug:print 0 *default-log-port* "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file))
(debug:print-info 0 *default-log-port* "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf)
(system (conc run-pre-hook " >> " actual-logf " 2>&1"))
(debug:print-info 0 *default-log-port* "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))
(debug:print 0 *default-log-port* "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run.")))))
-
+
+
+
(define (runs:run-post-hook run-id)
(let* ((run-post-hook (configf:lookup *configdat* "runs" "post-hook"))
(existing-tests (if run-post-hook
(rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses
#f #f ;; offset limit
@@ -458,10 +459,65 @@
(debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
(debug:print 0 *default-log-port* "ERROR: failed to run post-hook " run-post-hook ", check the log " log-file))
(debug:print-info 0 *default-log-port* "running run-post-hook: \"" run-post-hook "\", log is " actual-logf)
(system (conc run-post-hook " >> " actual-logf " 2>&1"))
(debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run."))))))
+
+
+(define (runs:rerun-hook test-id new-test-path testdat rerunlst)
+ (let* ((rerun-hook (configf:lookup *configdat* "runs" "rerun-hook"))
+ (log-dir (conc *toppath* "/reruns/logs"))
+ (target (getenv "MT_TARGET"))
+ (runname (common:args-get-runname))
+ (rundir (db:test-get-rundir testdat))
+ (tarfiledir (conc *toppath* "/reruns"))
+ (status (db:test-get-status testdat))
+ (comment (conc "\"" (db:test-get-comment testdat) "\"" ))
+ (testname (db:test-get-testname testdat))
+ (itempath (db:test-get-item-path testdat))
+ (file-body (conc status "-" (string-translate target "/" "-") "-" runname "." testname (if (not (string=? itempath "")) (conc "." (string-translate itempath "/" "-")) "")))
+ (log-file (conc file-body ".log"))
+ ;; (log-file (conc status "-" (string-translate target "/" "-") "-" runname "." testname (if (not (string=? itempath "")) (conc "." (string-translate itempath "/" "-")) "") ".log"))
+ (full-log-fname (conc log-dir "/" log-file))
+ (tarfilename (conc file-body ".tar"))
+ ;; (tarfilename (conc status "." (string-translate target "/" "-") "." runname "." testname (if (not (string=? itempath "")) (conc "." (string-translate itempath "/" "-")) "") ".tar"))
+ )
+ (if rerun-hook
+ (let* ((use-log-dir (if (not (directory-exists? log-dir))
+ (handle-exceptions
+ exn
+ (begin
+ (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir ", exn=" exn)
+ #f)
+ (create-directory log-dir #t)
+ #t)
+ #t))
+ (start-time (current-seconds))
+ (actual-logf (if use-log-dir full-log-fname log-file))
+ (sys-call-text (conc rerun-hook " " tarfilename " " rundir " " actual-logf " " runname " " tarfiledir " " status " " target " " comment " " testname " " itempath " >> " actual-logf " 2>&1"))
+ )
+ (debug:print 2 *default-log-port* "Found rerun-hook in config:" rerun-hook)
+ (handle-exceptions
+ exn
+ (begin
+ (print-call-chain *default-log-port*)
+ (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
+ (debug:print 0 *default-log-port* "ERROR: failed to run rerun-hook " rerun-hook ", check the log " log-file))
+ (debug:print-info 0 *default-log-port* "running rerun-hook: \"" rerun-hook "\", log is " actual-logf)
+ ;; call the hook
+ (debug:print-info 0 *default-log-port* "Calling rerun-hook for " test-id new-test-path testdat rerunlst)
+ (debug:print-info 0 *default-log-port* "rerun hook: " rerun-hook)
+ (debug:print-info 0 *default-log-port* "tarfilename: " tarfilename)
+ (debug:print-info 0 *default-log-port* "rundir: " rundir)
+ (debug:print-info 0 *default-log-port* "actual-logf: " actual-logf)
+ (debug:print-info 0 *default-log-port* "runname: " runname)
+ (debug:print-info 0 *default-log-port* "sys-call-text: " sys-call-text)
+ (system sys-call-text)
+ (debug:print-info 0 *default-log-port* "rerun-hook \"" rerun-hook "\" took " (- (current-seconds) start-time) " seconds to run."))))))
+
+
+
;; return #t when all items in waitors-upon list are represented in test-patt, #f otherwise.
(define (runs:testpatts-mention-waitors-upon? test-patt waitors-upon)
(null? (tests:filter-test-names-not-matched waitors-upon test-patt)))
@@ -493,11 +549,13 @@
(task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
;; (tdbdat (tasks:open-db))
(config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns")))
(if x (string->number x) #f)))
(allowed-tests #f)
- (runconf #f))
+ (runconf #f)
+ (cache-files (launch:get-cache-file-paths #f (common:get-toppath *toppath* ) target))
+ (runstart-time (current-seconds)))
;; check if readonly
(when readonly-mode
(debug:print-error 0 *default-log-port* "megatest.db is readonly. Cannot proceed.")
(exit 1))
@@ -631,11 +689,11 @@
(runs:update-all-test_meta #f)
;; run the run prehook if there are no tests yet run for this run:
;;
(runs:run-pre-hook run-id)
- ;; mark all test launced flag as false in the meta table
+ ;; mark all test launched flag as false in the meta table
(rmt:set-var (conc "lunch-complete-" run-id) "no")
(debug:print-info 1 *default-log-port* "Setting end-of-run to no")
(let* ((config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns")))
(if x (string->number x) #f)))
(config-rerun-cnt (if config-reruns
@@ -780,20 +838,25 @@
(thread-start! th2)
(runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
(any->number reglen) all-tests-registry)
(set! keep-going #f)
(thread-join! th2)
+ (if (launch:cache-files-changed? cache-files runstart-time)
+ (begin ;; force a start-over
+ (launch:setup force-reread: #t)
+ (runs:run-tests target runname test-patts user flags run-count: 0)))
+
;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
(if (> run-count 0) ;; handle reruns
(begin
(if (not (hash-table-ref/default flags "-preclean" #f))
(hash-table-set! flags "-preclean" #t))
(if (not (hash-table-ref/default flags "-rerun" #f))
- (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS"))
+ (hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS"))
;; recursive call to self
(runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))
- (launch:end-of-run-check run-id)))
+ (launch:end-of-run-check run-id)))
(debug:print-info 0 *default-log-port* "No tests to run")))
(debug:print-info 4 *default-log-port* "All done by here")
;; TODO: try putting post hook call here
; (debug:print-info 2 *default-log-port* " run-count " run-count)
@@ -2056,25 +2119,34 @@
(or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED"))
(member (test:get-state testdat) '("COMPLETED"))))
(debug:print-info 2 *default-log-port* "running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat))
(hash-table-set! test-registry full-test-name 'DONOTRUN) ;; COMPLETED)
(set! runflag #f))
+
;; -rerun and status is one of the specifed, run it
((and rerun
(let* ((rerunlst (string-split rerun ","))
(must-rerun (member (test:get-status testdat) rerunlst)))
(debug:print-info 3 *default-log-port* "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun)
must-rerun))
(debug:print-info 2 *default-log-port* "Rerun forced for test " test-name "/" item-path)
- (set! runflag #t))
- ;; -keepgoing, do not rerun FAIL
+ (set! runflag #t)
+ (debug:print-info 2 *default-log-port* "Calling rerun hook")
+ (runs:rerun-hook test-id new-test-path testdat rerun)
+ )
+
+
+
+ ;; -keepgoing, do not rerun FAIL
((and keepgoing
(member (test:get-status testdat) '("FAIL")))
(set! runflag #f))
- ((and (not rerun)
+
+ ((and (not rerun)
(member (test:get-status testdat) '("FAIL" "n/a")))
(set! runflag #t))
+
(else (set! runflag #f)))
(debug:print 4 *default-log-port* "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat))
(if (not runflag)
(if (not parent-test)
(if (runs:lownoise (conc "not starting test" full-test-name) 60)
Index: server.scm
==================================================================
--- server.scm
+++ server.scm
@@ -322,29 +322,75 @@
*my-client-signature*)))
;; wait for server=start-last to be three seconds old
;;
(define (server:wait-for-server-start-last-flag areapath)
+ (let* ((flag-dir (conc areapath "/logs"))
+ (start-flag (conc flag-dir "/server-start-last"))
+ ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds)
+ (reftime (configf:lookup-number *configdat* "server" "idletime" default: 4))
+ (server-key (conc (get-host-name) "-" (current-process-id)))
+ (create-key-file (lambda ()
+ (with-output-to-file start-flag
+ (lambda ()
+ (print server-key)))))
+ (check-key-file (lambda ()
+ (let ((res (with-input-from-file start-flag
+ (lambda ()
+ (read-line)))))
+ (equal? server-key res))))
+ (get-file-age (lambda ()
+ (let* ((fmodtime (file-modification-time start-flag)))
+ (- (current-seconds) fmodtime)))))
+ (if (not (directory-exists? flag-dir))
+ (begin
+ (debug:print-info 0 *default-log-port* "Directory " flag-dir " does not exist! Cannot gate.")
+ #f)
+ (if (file-exists? start-flag)
+ (if (check-key-file) ;; is it me?
+ #t ;; yes, it is me, proceed
+ (let* ((file-age (get-file-age)))
+ (if (> file-age reftime) ;; let the previous guy have at least 4 seconds to do their thing
+ (begin ;; file is old enough, we can try to take it
+ (create-key-file) ;; take the file and try again
+ (server:wait-for-server-start-last-flag areapath))
+ (let* ((remtime (max 1 (min file-age reftime))))
+ (debug:print-info 0 *default-log-port* "Gating server start, waiting remtime="remtime)
+ (thread-sleep! remtime)
+ (server:wait-for-server-start-last-flag areapath)))))
+ (begin
+ (create-key-file)
+ (server:wait-for-server-start-last-flag areapath))))))
+
+
+
+;; wait for server=start-last to be three seconds old
+;;
+(define (server:wait-for-server-start-last-flag-old areapath)
(let* ((start-flag (conc areapath "/logs/server-start-last"))
;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds)
(reftime (configf:lookup-number *configdat* "server" "idletime" default: 4))
- (server-key (conc (get-host-name) "-" (current-process-id))))
+ (server-key (conc (get-host-name) "-" (current-process-id)))
+ (create-key-file (lambda ()
+ (with-output-to-file start-flag
+ (lambda ()
+ (print server-key)))))
+ (check-key-file (lambda ()
+ (let ((res (with-input-from-file start-flag
+ (lambda ()
+ (read-line)))))
+ (equal? server-key res)))))
;; (thread-sleep! (/ (random 500) 1000)) ;; I don't think this made a difference
(if (file-exists? start-flag)
(let* ((fmodtime (file-modification-time start-flag))
(delta (- (current-seconds) fmodtime))
(all-go (> delta reftime)))
(if (and all-go
(begin
- (with-output-to-file start-flag
- (lambda ()
- (print server-key)))
+ (create-key-file)
(thread-sleep! 0.25)
- (let ((res (with-input-from-file start-flag
- (lambda ()
- (read-line)))))
- (equal? server-key res))))
+ (check-key-file)))
#t ;; (system (conc "touch " start-flag)) ;; lazy but safe
(begin
(debug:print-info 0 *default-log-port* "Gating server start, last start: "
fmodtime ", delta: " delta ", reftime: " reftime ", all-go=" all-go)
(thread-sleep! reftime)
Index: tests/unittests/all-api.scm
==================================================================
--- tests/unittests/all-api.scm
+++ tests/unittests/all-api.scm
@@ -116,11 +116,11 @@
(test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'delete-run (list 2))) 0)) ;; delete a non-existant run
(test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'update-run-stats (list 1 '()))) 0))
(test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'get-main-run-stats (list 1 ))) 0))
(test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'delete-old-deleted-test-records '())) 0))
(test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'get-runs (list "%" 10 0 keypatts))) 0))
-(test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'simple-get-runs (list "%" 10 0 keypatts))) 0))
+(test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'simple-get-runs (list "%" 10 0 keypatts 0))) 0))
(test #f #(#t (1))(api:execute-requests my-dbstruct (vector 'get-all-run-ids '())))
(test #f #(#t ()) (api:execute-requests my-dbstruct (vector 'get-prev-run-ids '(1))))
(test #f #(#t "JUSTFINE") (api:execute-requests my-dbstruct (vector 'get-run-status '(1))))
(test #f #(#t "NEW") (api:execute-requests my-dbstruct (vector 'get-run-state '(1))))
(test #f #(#t (("Totals" "UNKNOWN" 1) ("bar" "UNKNOWN" 1))) (api:execute-requests my-dbstruct (vector 'get-run-stats '())))
Index: tests/unittests/all-rmt.scm
==================================================================
--- tests/unittests/all-rmt.scm
+++ tests/unittests/all-rmt.scm
@@ -38,12 +38,12 @@
(test #f #f (server:check-if-running toppath)) ;; these are used by server:start-and-wait
(test #f #t (list? (server:get-list toppath)))
(test #f '() (server:get-best '()))
(test #f #t (common:simple-file-lock-and-wait "test.lock" expire-time: 15))
(test #f "test.lock" (common:simple-file-release-lock "test.lock"))
-(test #f #t (server:get-best-guess-address (get-host-name)))
-(test #f #t (string? (common:get-homehost)))
+(test #f #t (string? (server:get-best-guess-address (get-host-name))))
+(test #f #t (string? (car (common:get-homehost))))
;; clean out any old running servers
;;
(let ((servers (server:get-list toppath)))
(print "Known servers: " servers)
@@ -68,11 +68,11 @@
(thread-sleep! 2)
;; (test #f #t (string? (server:start-and-wait *toppath*)))
(test "setup for run" #t (begin (launch:setup)
(string? (getenv "MT_RUN_AREA_HOME"))))
-(test #f #t (client:setup-http toppath))
+(test #f #t (vector? (client:setup-http toppath)))
(test #f #t (vector? (client:setup toppath)))
(test #f #t (vector? (rmt:get-connection-info toppath))) ;; TODO: push areapath down.
(test #f #t (string? (server:check-if-running ".")))
;; DEF (test #f #f (rmt:send-receive-no-auto-client-setup *runremote* 'get-keys #f '()))
ADDED utils/Makefile.whodunit
Index: utils/Makefile.whodunit
==================================================================
--- /dev/null
+++ utils/Makefile.whodunit
@@ -0,0 +1,9 @@
+.DEFAULT : all
+
+all : whodunit
+
+clean :
+ rm whodunit
+
+whodunit : whodunit.scm
+ csc -static -L -static -L -lm -L -ldl -L -lpthread -L -lssl -L -lcrypto -L -lz whodunit.scm -o whodunit
Index: utils/mk_wrapper
==================================================================
--- utils/mk_wrapper
+++ utils/mk_wrapper
@@ -19,10 +19,11 @@
prefix=$1
cmd=$2
target=$3
cfgfile="$prefix/bin/.$(lsb_release -sr)/cfg.sh"
+libdir="$prefix/bin/.$(lsb_release -sr)/lib"
# we wish to create a var in cfg.sh for finding sqlite3 executable
chicken_bin_dir=$(dirname $(which csi))
if [[ -e $chicken_bin_dir/sqlite3 ]];then
sqlite3_exe=$chicken_bin_dir/sqlite3
@@ -30,19 +31,20 @@
sqlite3_exe=$(which sqlite3)
fi
if [ "$LD_LIBRARY_PATH" != "" ];then
echo "INFO: Using LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >&2
+ echo "INFO: Writing $cfgfile" >&2
( cat << __EOF
if [ -z \$MT_ORIG_ENV ]; then
export MT_ORIG_ENV=\$( $prefix/bin/serialize-env )
fi
if [ "\$LD_LIBRARY_PATH" != "" ];then
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:\$LD_LIBRARY_PATH
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:\$LD_LIBRARY_PATH:$libdir
else
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$libdir
fi
export MT_SQLITE3_EXE=$sqlite3_exe
__EOF
) > $cfgfile
Index: utils/whodunit.scm
==================================================================
--- utils/whodunit.scm
+++ utils/whodunit.scm
@@ -13,11 +13,25 @@
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with Megatest. If not, see .
;;
-(use posix srfi-69)
+(module whodunit
+
+ *
+
+
+(import
+ (chicken process-context)
+ (chicken process)
+ (chicken string)
+ (chicken base)
+ (chicken sort)
+ (chicken io)
+ srfi-69
+ scheme
+)
(define *numsamples* (or (and (> (length (argv)) 1)
(string->number (cadr (argv))))
3))
@@ -24,11 +38,11 @@
(define (topdata)
(with-input-from-pipe
(conc "top -b -n " *numsamples* " -d 0.1")
read-lines))
-(define (cleanup-data topdat)list
+(define (cleanup-data topdat)
(let loop ((hed (car topdat))
(tal (cdr topdat))
(res '()))
(let* ((line-list (string-split hed))
(nums (map (lambda (indat)(or (string->number indat) indat)) line-list))
@@ -37,29 +51,37 @@
(new-res (if not-data res (cons nums res))))
(if (null? tal)
new-res
(loop (car tal)(cdr tal) new-res)))))
+
+;; sum up
+(define (sum-up data ht)
+ (for-each
+ (lambda (indat)
+ (let ((pid (car indat))
+ (usr (cadr indat))
+ (cpu (list-ref indat 8)))
+ (hash-table-set! ht usr (+ cpu (hash-table-ref/default ht usr 0)))))
+ data))
+
+(define (print-results userhash)
+ (for-each
+ (lambda (usr)
+ (let* ((usage (inexact->exact (round (/ (hash-table-ref userhash usr) *numsamples*)))))
+ (if (> usage 0)
+ (print usr (if (< (string-length usr) 8) "\t\t" "\t") usage))))
+ (sort (hash-table-keys userhash)
+ (lambda (a b)
+ (> (hash-table-ref userhash a)
+ (hash-table-ref userhash b))))))
+
+)
+(import whodunit srfi-69 (chicken sort))
+
(print "Getting " *numsamples* " samples of cpu usage data.")
(define data (cleanup-data (topdata)))
(define pidhash (make-hash-table))
(define userhash (make-hash-table))
-
-;; sum up and normalize the
-(for-each
- (lambda (indat)
- (let ((pid (car indat))
- (usr (cadr indat))
- (cpu (list-ref indat 8)))
- (hash-table-set! userhash usr (+ cpu (hash-table-ref/default userhash usr 0)))))
- data)
-
-(for-each
- (lambda (usr)
- (print usr
- (if (< (string-length usr) 8) "\t\t" "\t")
- (inexact->exact (round (/ (hash-table-ref userhash usr) *numsamples*)))))
- (sort (hash-table-keys userhash)
- (lambda (a b)
- (> (hash-table-ref userhash a)
- (hash-table-ref userhash b)))))
-
+(sum-up data userhash)
+
+(print-results userhash)