Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -28,11 +28,11 @@ ezsteps.scm lock-queue.scm sdb.scm rmt.scm api.scm \ subrun.scm portlogger.scm archive.scm env.scm \ diff-report.scm cgisetup/models/pgdb.scm # module source files -MSRCFILES = mutils.scm +MSRCFILES = adjutant.scm mutils.scm mttop.scm # ftail.scm rmtmod.scm commonmod.scm removed # MSRCFILES = ducttape-lib.scm pkts.scm stml2.scm cookie.scm mutils.scm \ # mtargs.scm commonmod.scm dbmod.scm adjutant.scm ulex.scm \ # rmtmod.scm apimod.scm @@ -315,19 +315,40 @@ # install dashboard as dboard so wrapper script can be called dashboard $(PREFIX)/bin/.$(ARCHSTR)/dboard : dboard $(FILES) utils/mk_wrapper utils/mk_wrapper $(PREFIX) dboard $(PREFIX)/bin/dashboard chmod a+x $(PREFIX)/bin/dashboard $(INSTALL) dboard $(PREFIX)/bin/.$(ARCHSTR)/dboard + +$(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so : lib/libpangox-1.0.so + if [[ $(ARCHSTR) == 12.5 ]]; then \ + mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib; \ + $(INSTALL) lib/libpangox-1.0.so $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so; \ + fi + +$(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 : lib/libpangox-1.0.so.0 + if [[ $(ARCHSTR) == 12.5 ]]; then \ + mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib; \ + $(INSTALL) lib/libpangox-1.0.so.0 $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0; \ + fi + +$(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0 : lib/libxcb-xlib.so.0 + if [[ $(ARCHSTR) == 12.5 ]]; then \ + mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib; \ + $(INSTALL) lib/libxcb-xlib.so.0 $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0; \ + fi install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \ $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \ $(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \ $(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \ $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \ $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \ $(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql \ - $(PREFIX)/share/js/jquery-3.1.0.slim.min.js + $(PREFIX)/share/js/jquery-3.1.0.slim.min.js \ + $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so \ + $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 \ + $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0 # $(PREFIX)/bin/.$(ARCHSTR)/ndboard # $(PREFIX)/bin/newdashboard $(PREFIX)/bin/.$(ARCHSTR) : Index: adjutant.scm ================================================================== --- adjutant.scm +++ adjutant.scm @@ -22,12 +22,23 @@ (module adjutant * (import scheme chicken data-structures extras files) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 - md5 message-digest + md5 message-digest matchable regex srfi-1) -(define (adjutant-run) - (print "Running the adjutant!")) +(define (adjutant-run host-type rmt:no-sync-take-job) + (print "Running the adjutant!") + (let loop ((wait-count 0)) + (if (< wait-count 10) ;; 6 x 10 seconds = one minute + (let* ((dat (rmt:no-sync-take-job host-type))) + (match dat + ((id ht vars exekey cmdline state event-time last-update) + (system cmdline) + (loop 0)) + (else + (thread-sleep! 10) + (loop (+ wait-count 1))))) + (print "I'm bored. Exiting.")))) ) Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -253,17 +253,20 @@ ;; NO SYNC DB ((no-sync-set) (apply db:no-sync-set *no-sync-db* params)) ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params)) ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params)) ((no-sync-get-lock) (apply db:no-sync-get-lock *no-sync-db* params)) + ((no-sync-add-job) (apply db:no-sync-add-job *no-sync-db* params)) + ((no-sync-take-job) (apply db:no-sync-take-job *no-sync-db* params)) + ((no-sync-job-records-clean) (apply db:no-sync-job-records-clean *no-sync-db* params)) ;; ARCHIVES ;; ((archive-get-allocations) ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) ;; ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey)) - + ;;====================================================================== ;; READ ONLY QUERIES ;;====================================================================== ;; KEYS Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -3210,31 +3210,39 @@ ;; arm cubie01 cubie02 ;; x86_64 zeus xena myth01 ;; allhosts #{g hosts arm} #{g hosts x86_64} ;; ;; [host-types] +;; C/M/A lets megatest know this launcher provides C cores, M bytes memory for architecture A +;; 2/2G/arm smart -cores 2 -memory 2G -arch arm ;; general #MTLOWESTLOAD #{g hosts allhosts} ;; arm #MTLOWESTLOAD #{g hosts arm} ;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo +;; +;; NOTE: host-rules is ONLY used for MTLOWESTLOAD ;; ;; [host-rules] ;; # maxnload => max normalized load ;; # maxnjobs => max jobs per cpu ;; # maxjobrate => max jobs per second ;; general maxnload=1.1; maxnjobs=1.2; maxjobrate=0.1 ;; ;; [launchers] ;; envsetup general -;; xor/%/n 4C16G +;; xor/%/n 2/2G/arm ;; % nbgeneral ;; ;; [jobtools] ;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match. ;; flexi-launcher yes ;; launcher nbfake +;; mode adjutant|normal (default is normal) +;; +;; +;; mode is 'normal (i.e. directly use launcher) or 'adjutant (i.e. use adjutant) ;; -(define (common:get-launcher configdat testname itempath) +(define (common:get-launcher configdat testname itempath mode) (let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher"))) (if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher (not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no"))) (let* ((launchers (hash-table-ref/default configdat "launchers" '()))) (if (null? launchers) @@ -3241,32 +3249,36 @@ fallback-launcher (let loop ((hed (car launchers)) (tal (cdr launchers))) (let ((patt (car hed)) (host-type (cadr hed))) - (if (tests:match patt testname itempath) + (if (tests:match patt testname itempath) ;; have a launcher match for this test (begin (debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type) - (let ((launcher (configf:lookup configdat "host-types" host-type))) + (let ((launcher (configf:lookup configdat "host-types" host-type))) ;; find the actual launcher from the host-types table + ;; if we are in adjutant mode then we want to return both host-type and launcher (if launcher (let* ((launcher-parts (string-split launcher)) (launcher-exe (car launcher-parts))) (if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline (let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)) (count 100)) (if targ-host (conc "remrun " targ-host) (if (> count 0) + (begin (debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type) (thread-sleep! (- 101 count)) (host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat) (- count 1))) (begin (debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type) (exit))))) - launcher)) + (case mode + ((adjutant) (list host-type launcher)) + (else launcher)))) (begin (debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type) (if (null? tal) fallback-launcher (loop (car tal)(cdr tal))))))) Index: configf.scm ================================================================== --- configf.scm +++ configf.scm @@ -530,12 +530,12 @@ (define configf:read-file read-config) ;; safely look up a value that is expected to be a number, return ;; a default (#f unless provided) ;; -(define (configf:lookup-number cfdat section varname #!key (default #f)) - (let* ((val (configf:lookup *configdat* section varname)) +(define (configf:lookup-number cfgdat section varname #!key (default #f)) + (let* ((val (configf:lookup cfgdat section varname)) (res (if val (string->number (string-substitute "\\s+" "" val #t)) #f))) (cond (res res) Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -324,12 +324,12 @@ (lambda (state color) (for-each (lambda (btn) (let* ((name (iup:attribute btn "TITLE")) (newcolor (if (equal? name state) color "192 192 192"))) - (if (not (colors-similar? newcolor (iup:attribute btn "BGCOLOR"))) - (iup:attribute-set! btn "BGCOLOR" newcolor)))) + (if (not (colors-similar? newcolor (iup:attribute btn "FGCOLOR"))) + (iup:attribute-set! btn "FGCOLOR" newcolor)))) btns))) btns)) (apply iup:hbox (iup:label "STATUS:" #:size "30x") (let* ((btns (map (lambda (status) @@ -358,12 +358,12 @@ (lambda (status color) (for-each (lambda (btn) (let* ((name (iup:attribute btn "TITLE")) (newcolor (if (equal? name status) color "192 192 192"))) - (if (not (colors-similar? newcolor (iup:attribute btn "BGCOLOR"))) - (iup:attribute-set! btn "BGCOLOR" newcolor)))) + (if (not (colors-similar? newcolor (iup:attribute btn "FGCOLOR"))) + (iup:attribute-set! btn "FGCOLOR" newcolor)))) btns))) btns)))))) (define (dashboard-tests:run-a-step info) #t) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -1169,13 +1169,14 @@ (button (vector-ref columndat rown)) (color (car (gutils:get-color-for-state-status teststate teststatus))) (curr-color (vector-ref buttondat 1)) ;; (iup:attribute button "BGCOLOR")) (curr-title (vector-ref buttondat 2))) ;; (iup:attribute button "TITLE"))) (if (not (equal? curr-color color)) - (iup:attribute-set! button "BGCOLOR" color)) + #;(iup:attribute-set! button "BGCOLOR" color) + (iup:attribute-set! button "FGCOLOR" color)) (if (not (equal? curr-title buttontxt)) - (iup:attribute-set! button "TITLE" buttontxt)) + (iup:attribute-set! button "TITLE" (conc "" buttontxt ""))) (vector-set! buttondat 0 run-id) (vector-set! buttondat 1 color) (vector-set! buttondat 2 buttontxt) (vector-set! buttondat 3 testdat) (vector-set! buttondat 4 run-key))) @@ -2500,22 +2501,22 @@ (set! hide (iup:button "Hide" #:expand "NO" #:size "40x15" ;; #:expand "HORIZONTAL" #:action (lambda (obj) (dboard:tabdat-hide-not-hide-set! tabdat #t) ;; (not (dboard:tabdat-hide-not-hide tabdat))) ;; (iup:attribute-set! obj "TITLE" (if (dboard:tabdat-hide-not-hide tabdat) "HideTests" "NotHide")) - (iup:attribute-set! hide "BGCOLOR" sel-color) - (iup:attribute-set! show "BGCOLOR" nonsel-color) + (iup:attribute-set! hide "FGCOLOR" sel-color) + (iup:attribute-set! show "FGCOLOR" nonsel-color) (mark-for-update tabdat)))) (set! show (iup:button "Show" #:expand "NO" #:size "40x15" ;; #:expand "HORIZONTAL" #:action (lambda (obj) (dboard:tabdat-hide-not-hide-set! tabdat #f) ;; (not (dboard:tabdat-hide-not-hide tabdat))) - (iup:attribute-set! show "BGCOLOR" sel-color) - (iup:attribute-set! hide "BGCOLOR" nonsel-color) + (iup:attribute-set! show "FGCOLOR" sel-color) + (iup:attribute-set! hide "FGCOLOR" nonsel-color) (mark-for-update tabdat)))) - (iup:attribute-set! hide "BGCOLOR" sel-color) - (iup:attribute-set! show "BGCOLOR" nonsel-color) + (iup:attribute-set! hide "FGCOLOR" sel-color) + (iup:attribute-set! show "FGCOLOR" nonsel-color) ;; (dboard:tabdat-hide-not-hide-button-set! tabdat hideit) ;; never used, can eliminate ... (iup:vbox (iup:hbox hide show) sort-lb))) ) @@ -2884,10 +2885,11 @@ (let* ((button-key (mkstr runnum testnum)) (butn (iup:button "" ;; button-key #:size (conc cell-width btn-height ) #:expand "HORIZONTAL" + #:MARKUP "YES" #:fontsize btn-fontsz #:button-cb (lambda (obj a pressed x y btn . rem) ;; (print "pressed= " pressed " x= " x " y= " y " rem=" rem " btn=" btn " string? " (string? btn)) (if (substring-index "3" btn) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -2166,14 +2166,66 @@ (db-exists (common:file-exists? dbname)) (db (sqlite3:open-database dbname))) (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) (if (not db-exists) (begin - (sqlite3:execute db "PRAGMA synchronous = 0;") (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));") (sqlite3:execute db "PRAGMA journal_mode=WAL;"))) + ;; MOVE THIS TABLE CREATION TO THE (begin above in about six months (it is Sep 2020 right now). + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS jobs_queue (id INTEGER PRIMARY KEY, host_type TEXT, cores INTEGER, memory TEXT, vars TEXT, exekey TEXT, cmdline TEXT, state TEXT, event_time INTEGER, last_update INTEGER);") + ;; not sure I'll use this next one. I prefer if tests simply append to a file: + ;; last-update-seconds cpuload tmpspace rundirspace + (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_extra_data (id INTEGER PRIMARY KEY, run_id INTEGER, test_id INTEGER, last_seen_running INTEGER);") + (sqlite3:execute db "PRAGMA synchronous = 0;") db)) + +(define (db:no-sync-add-job db-in host-type vars-list exekey cmdline) + (sqlite3:execute (db:no-sync-db db-in) "INSERT INTO jobs_queue (host_type,vars,exekey,cmdline,state,event_time,last_update) VALUES (?,?,?,?,?,?,?);" + host-type + (with-output-to-string + (lambda () + (write vars-list))) + exekey cmdline "waiting" (current-seconds)(current-seconds))) + +;; find next job (waiting longest) that matches host-type - future, we'll find jobs that fit if no exact match +(define (db:no-sync-take-job db-in host-type) + (let* ((db (db:no-sync-db db-in)) + (stmt1 "SELECT id,host_type,vars,exekey,cmdline,state,event_time,last_update FROM jobs_queue WHERE host_type=? AND state != 'taken' ORDER BY event_time ASC;") + (stmt1h (sqlite3:prepare db stmt1)) + (stmt2 "UPDATE jobs_queue SET state='taken',last_update=? WHERE id=?;") + (stmt2h (sqlite3:prepare db stmt2)) + (res (sqlite3:with-transaction + db + (lambda () + (let* ((matching-jobs (sqlite3:fold-row + (lambda (res . row) ;; id host-type vars exekey state event-time last-update) + (cons row res)) + '() + stmt1h + host-type))) + (if (null? matching-jobs) + #f + (let ((choosen-one (let loop ((tal matching-jobs) + (res #f)) ;; put bestest one in here + (if (null? tal) + res + (let ((curr (car tal)) + (rem (cdr tal))) + curr) ;; here we will compare with res, if better candidate the loop with curr else loop with res + )))) + (if choosen-one ;; we need to mark it as taken + (sqlite3:execute stmt2h (current-seconds) (car choosen-one))) + choosen-one))))))) + (sqlite3:finalize! stmt1h) ;; it'd be nice to cache these and finalize on exit. + (sqlite3:finalize! stmt2h) + res)) + +;; clean out old jobs in queue, i.e. taken and event_time > 24 hrs ago +;; +(define (db:no-sync-job-records-clean db) + (sqlite3:execute (db:no-sync-db db) "DELETE FROM jobs_queue WHERE state='taken' AND event_time < ?;" (- (current-seconds)(* 24 3600)))) + ;; if we are not a server create a db handle. this is not finalized ;; so watch for problems. I'm still not clear if it is needed to manually ;; finalize sqlite3 dbs with the sqlite3 egg. ;; @@ -2191,12 +2243,13 @@ (sqlite3:execute (db:no-sync-db db) "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val)) (define (db:no-sync-del! db var) (sqlite3:execute (db:no-sync-db db) "DELETE FROM no_sync_metadat WHERE var=?;" var)) -(define (db:no-sync-get/default db var default) - (let ((res default)) +(define (db:no-sync-get/default db-in var default) + (let ((db (db:no-sync-db db-in)) + (res default)) (sqlite3:for-each-row (lambda (val) (set! res val)) (db:no-sync-db db) "SELECT val FROM no_sync_metadat WHERE var=?;" Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -557,26 +557,31 @@ (server-started (conc tmp-area "/.server-started")) (start-time (common:lazy-modification-time server-start)) (started-time (common:lazy-modification-time server-started)) (server-starting (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting (start-time-old (> (- (current-seconds) start-time) 5)) - (cleanup-proc (lambda (msg) + (cleanup-proc (lambda (msg) ;; would like to use (modulo (current-seconds) 60) instead of process-id to wrap filenames (let* ((serv-fname (conc "server-" (current-process-id) "-" (get-host-name) ".log")) + (new-fname (conc "server-" (modulo (current-seconds) 60) "-" (get-host-name) ".log")) (full-serv-fname (conc *toppath* "/logs/" serv-fname)) - (new-serv-fname (conc *toppath* "/logs/" "defunct-" serv-fname))) + ;; (new-serv-fname (conc *toppath* "/logs/" "defunct-" serv-fname)) + (new-serv-fname (conc *toppath* "/logs/" new-fname)) + ) (debug:print 0 *default-log-port* msg) (if (common:file-exists? full-serv-fname) - (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname)) + (with-output-to-pipe "at now + 10 minutes" (lambda () + (print "mv -f " full-serv-fname " " new-serv-fname))) + ;; (system (conc "sleep 10;mv -f " full-serv-fname " " new-serv-fname)) (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname)) (exit))))) - #;(if (and (not start-time-old) ;; last server start try was less than five seconds ago + (if (and (not start-time-old) ;; last server start try was less than five seconds ago (not server-starting)) (begin (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting") (exit))) ;; lets not even bother to start if there are already three or more server files ready to go - #;(let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) + (let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) (if (> num-alive 3) (begin (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")) (exit)))) (common:save-pkt `((action . start) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -812,45 +812,11 @@ (loop (car tal) (cdr tal) (+ kill-cnt flag)) (+ kill-cnt flag)))))) ;; DO NOT USE - caching of configs is handled in launch:setup now. ;; -(define (launch:cache-config) - ;; if we have a linktree and -runtests and -target and the directory exists dump the config - ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg - (if (and *configdat* - (or (args:get-arg "-run") - (args:get-arg "-runtests") - (args:get-arg "-execute"))) - (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE")) - (target (common:args-get-target exit-if-bad: #t)) - (runname (or (args:get-arg "-runname") - (args:get-arg ":runname") - (getenv "MT_RUNNAME"))) - (fulldir (conc linktree "/" - target "/" - runname))) - (if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree - (begin - (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%")) - (if (not (common:file-exists? fulldir)) - (create-directory fulldir #t)) ;; need to protect with exception handler - (if (and target - runname - (common:file-exists? fulldir)) - (let ((tmpfile (conc fulldir "/.megatest.cfg." (current-seconds))) - (targfile (conc fulldir "/.megatest.cfg-" megatest-version "-" megatest-fossil-hash)) - (rconfig (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash))) - (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached - (begin - (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile) - (if (not (common:in-running-test?)) - (configf:write-alist *configdat* tmpfile)) - (system (conc "ln -sf " tmpfile " " targfile)))) - ))) - (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs."))))) - +;; (launch:cache-config) moved to attic below ;; gather available information, if legit read configs in this order: ;; ;; if have cache; ;; read it a return it @@ -874,14 +840,30 @@ (mutex-unlock! *launch-setup-mutex*) *toppath*) (let ((res (launch:setup-body force-reread: force-reread areapath: areapath))) (mutex-unlock! *launch-setup-mutex*) res))) + +(define (launch:cache-files-changed? cache-files ref-seconds) + (let* ((changed #f)) + (if (or (not cache-files) + (null? cache-files)) + (set! changed #t) ;; yep, they've changed + (for-each + (lambda (fname) + (if (not fname) + (set! changed #t) + (if (not (file-exists? fname)) + (set! changed #t) + (if (> (file-modification-time fname) ref-seconds) + (set! changed #t))))) + cache-files)) + changed)) ;; return paths depending on what info is available. ;; -(define (launch:get-cache-file-paths areapath toppath target mtconfig) +(define (launch:get-cache-file-paths areapath toppath target) (let* ((use-cache (common:use-cache?)) (runname (common:args-get-runname)) (linktree (common:get-linktree)) (testname (common:get-full-test-name)) (rundir (if (and runname target linktree) @@ -912,11 +894,11 @@ (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting. We do not have *configdat*. Bootstrapping problem here. (toppath (common:get-toppath areapath)) (target (common:args-get-target)) (sections (if target (list "default" target) #f)) ;; for runconfigs (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config - (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) + (cachefiles (launch:get-cache-file-paths areapath toppath target)) ;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ... (mtcachef (if (null? cachefiles) #f (car cachefiles))) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) (rccachef (if (null? cachefiles) @@ -993,11 +975,11 @@ (for-each (lambda (kt) (setenv (car kt) (cadr kt))) key-vals) (read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ... sections: sections))) - (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) + (cachefiles (launch:get-cache-file-paths areapath toppath target)) (mtcachef (car cachefiles)) (rccachef (cdr cachefiles))) ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342 ;; TODO - consider 1) using simple-lock to bracket cache write ;; 2) cache in hash on server, since need to do rmt: anyway to lock. @@ -1076,11 +1058,11 @@ (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.") (set! *toppath* #f) ;; force it to be false so we return #f #f)) ;; one more attempt to cache the configs for future reading - (let* ((cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) + (let* ((cachefiles (launch:get-cache-file-paths areapath toppath target)) (mtcachef (car cachefiles)) (rccachef (cdr cachefiles))) ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342 ;; TODO - consider 1) using simple-lock to bracket cache write @@ -1363,12 +1345,20 @@ (debug:print 1 *default-log-port* "INFO: search and mark zombie tests") (rmt:set-var key (current-seconds)) (rmt:find-and-mark-incomplete run-id #f)))) - +(defstruct launch:ajt + (vars '()) + (exekey #f) + (host-type #f) + (test-sig #f) + (cmdline #f)) +;; append vars +(define (launch:ajt-add-vars dat vars) + (launch:ajt-vars-set! dat (append (launch:ajt-vars dat) vars))) ;; 1. look though disks list for disk with most space ;; 2. create run dir on disk, path name is meaningful ;; 3. create link from run dir to megatest runs area ;; 4. remotely run the test on allocated host @@ -1375,44 +1365,41 @@ ;; - could be ssh to host from hosts table (update regularly with load) ;; - could be netbatch ;; (launch-test db (cadr status) test-conf)) (define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex - (let* ( ;; (lock-key (conc "test-" test-id)) - ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) - ;; (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds - ;; (if (car lock) - ;; #t - ;; (if (> (current-seconds) expire-time) - ;; (begin - ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path) - ;; (rmt:no-sync-del! lock-key) ;; destroy the lock - ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; - ;; (begin - ;; (thread-sleep! 1) - ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)))))) + (let* (;; locking code removed from here commented out and pasted at end of file (item-path (item-list->path itemdat)) - (contour #f)) ;; NOT READY FOR THIS (args:get-arg "-contour"))) + (contour #f) ;; NOT READY FOR THIS (args:get-arg "-contour"))) + ;; launcher-mode will be 'adjutant or 'normal + (launcher-mode (string->symbol (or (configf:lookup *configdat* "jobtools" "mode") "normal"))) + (ajtdat (make-launch:ajt))) (let loop ((delta (- (current-seconds) *last-launch*)) (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 0))) (if (> launch-delay delta) (begin (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay. (debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds")) (thread-sleep! (- launch-delay delta)) (loop (- (current-seconds) *last-launch*) launch-delay)))) (change-directory *toppath*) - (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars) - (append - (list - (list "MT_RUN_AREA_HOME" *toppath*) - (list "MT_TEST_NAME" test-name) - (list "MT_RUNNAME" runname) - (list "MT_ITEMPATH" item-path) - (list "MT_CONTOUR" contour) - ) - itemdat)) + (let ((var-list (append + (list + (list "MT_RUN_AREA_HOME" *toppath*) + (list "MT_TEST_NAME" test-name) + (list "MT_RUNNAME" runname) + (list "MT_ITEMPATH" item-path) + (list "MT_CONTOUR" contour) + ) + itemdat))) + ;; consolidate this code with the code in megatest.scm for + ;; "-execute", *maybe* - the longer they are set the longer + ;; each launch takes (must be non-overlapping with the vars) + (alist->env-vars var-list) + ;; the var-list into the ajtdat adjutant record whether it is needed or not. + (launch:ajt-add-vars ajtdat var-list)) + (let* ((tregistry (tests:get-all)) ;; third param (below) is system-allowed ;; for tconfig, why do we allow fallback to test-conf? (tconfig (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t) (begin (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.") @@ -1430,26 +1417,17 @@ ;; (memory (configf:lookup tconfig "requirements" "memory")) ;; (hosts (configf:lookup *configdat* "jobtools" "workhosts")) ;; I'm pretty sure this was never completed (remote-megatest (configf:lookup *configdat* "setup" "executable")) (run-time-limit (or (configf:lookup tconfig "requirements" "runtimelim") (configf:lookup *configdat* "setup" "runtimelim"))) - ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to - ;; allow running from dashboard. Extract the path - ;; from the called megatest and convert dashboard - ;; or dboard to megatest (local-megatest (common:find-local-megatest)) - #;(local-megatest (let* ((lm (car (argv))) - (dir (pathname-directory lm)) - (exe (pathname-strip-directory lm))) - (conc (if dir (conc dir "/") "") - (case (string->symbol exe) - ((dboard) "../megatest") - ((mtest) "../megatest") - ((dashboard) "megatest") - (else exe))))) - (launcher (common:get-launcher *configdat* test-name item-path)) ;; (configf:lookup *configdat* "jobtools" "launcher")) - (test-sig (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path + (launcher (let ((l (common:get-launcher *configdat* test-name item-path launcher-mode))) + (if (string? l) + (string-split l) + l))) ;; some nonhomogenuity here. '(cmd param1 param2 ...) OR '(host-type launcher) + ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path + (test-sig (conc (common:get-testsuite-name) ":" test-name ":" item-path)) (work-area #f) (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all (diskpath #f) (cmdparms #f) (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) @@ -1459,15 +1437,24 @@ (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) (if (args:get-arg "-logging")(list "-logging") '()) (if (configf:lookup *configdat* "misc" "profilesw") (list (configf:lookup *configdat* "misc" "profilesw")) '())))) + ;; save the test-sig in the ajtdat record + (launch:ajt-test-sig-set! ajtdat test-sig) + ;; go ahead and figure out if we have a host-type from the + ;; launcher call above and save it in the ajtdat record + (if (and (eq? launcher-mode 'adjutant) + (list? launcher) + (> (length launcher) 1)) + (launch:ajt-host-type-set! ajtdat (car launcher))) + ;; (if hosts (set! hosts (string-split hosts))) ;; set the megatest to be called on the remote host (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) (set! mt-bindir-path (pathname-directory remote-megatest)) - (if launcher (set! launcher (string-split launcher))) + ;; (if launcher (set! launcher (string-split launcher))) ;; yuk! ;; set up the run work area for this test (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir (begin (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) @@ -1520,63 +1507,92 @@ (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) (list 'runname runname) (list 'mt-bindir-path mt-bindir-path)))))))) + ;; save the cmdparms in the ajtdat + (launch:ajt-exekey-set! ajtdat cmdparms) ;; clean out step records from previous run if they exist ;; (rmt:delete-test-step-records run-id test-id) ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway (if (common:file-exists? work-area) (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir - (cond - ;; ((and launcher hosts) ;; must be using ssh hostname - ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) - ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) + + ;; save the command line for adjutant mode (might never be needed but best to assemble it here) + (launch:ajt-cmdline-set! ajtdat (string-intersperse + (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) + (cond (launcher (set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) - ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms)))) (else (if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) (set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) - ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" ""))))) + (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) (debug:print 1 *default-log-port* "Launching " work-area) ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done (debug:print 4 *default-log-port* "fullcmd: " fullcmd) (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible. - (let* ((commonprevvals (alist->env-vars - (hash-table-ref/default *configdat* "env-override" '()))) - (miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" - (append (list (list "MT_TEST_RUN_DIR" work-area) - (list "MT_TEST_NAME" test-name) - (list "MT_ITEM_INFO" (conc itemdat)) - (list "MT_RUNNAME" runname) - (list "MT_TARGET" mt_target) - (list "MT_ITEMPATH" item-path)) - itemdat))) - (testprevvals (alist->env-vars - (hash-table-ref/default tconfig "pre-launch-env-overrides" '()))) + (let* ((env-override-vars (hash-table-ref/default *configdat* "env-override" '())) + (commonprevvals (alist->env-vars env-override-vars)) + (misc-vars (append (list (list "MT_TEST_RUN_DIR" work-area) + (list "MT_TEST_NAME" test-name) + (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_RUNNAME" runname) + (list "MT_TARGET" mt_target) + (list "MT_ITEMPATH" item-path)) + itemdat)) + (miscprevvals (alist->env-vars misc-vars));; consolidate this code with the code in megatest.scm for "-execute" + (test-vars (hash-table-ref/default tconfig "pre-launch-env-overrides" '())) + (testprevvals (alist->env-vars test-vars)) + ;; Launchwait defaults to true, must override it to turn off wait (launchwait (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t)) - (launch-results-prev (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed. - process:cmd-run-with-stderr-and-exitcode->list - process-run) - (if useshell - (let ((cmdstr (string-intersperse fullcmd " "))) - (if launchwait - cmdstr - (conc cmdstr " >> mt_launch.log 2>&1 &"))) - (car fullcmd)) - (if useshell - '() - (cdr fullcmd)))) + ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed. + (launch-results-prev (if (eq? launcher-mode 'adjutant) + '(#t 0) ;; just some fake data to fool downstream but non-applicable code + (apply (if launchwait + process:cmd-run-with-stderr-and-exitcode->list + process-run) + (if useshell + (let ((cmdstr (string-intersperse fullcmd " "))) + (if launchwait + cmdstr + (conc cmdstr " >> mt_launch.log 2>&1 &"))) + (car fullcmd)) + (if useshell + '() + (cdr fullcmd))))) (success (if launchwait (equal? 0 (cadr launch-results-prev)) #t)) (launch-results (if launchwait (car launch-results-prev) launch-results-prev))) - (if (not success) + + (launch:ajt-add-vars ajtdat env-override-vars) + (launch:ajt-add-vars ajtdat misc-vars) + (launch:ajt-add-vars ajtdat test-vars) + + ;; if in adjutant mode we register the job in the jobs_queue + ;; then fire off an adjutant runner + ;; + (if (eq? launcher-mode 'adjutant) + (let* ((adjutant-runner-cmd (append (cdr launcher) + (list remote-megatest "-adjutant" + (launch:ajt-host-type ajtdat) + "-start-dir" *toppath*))) + (adj-cmd (conc (string-intersperse (map conc adjutant-runner-cmd) " ") + "&"))) + (rmt:no-sync-add-job + (launch:ajt-host-type ajtdat) + (launch:ajt-vars ajtdat) + (launch:ajt-exekey ajtdat) + (launch:ajt-cmdline ajtdat)) + (print "adj-cmd: " adj-cmd) + (system adj-cmd) + )) + + (if (not success) (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED")) - (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. ;; (rmt:no-sync-del! lock-key) ;; release the lock for starting this test (if (not launchwait) ;; give the OS a little time to allow the process to start (thread-sleep! 0.01)) (with-output-to-file "mt_launch.log" (lambda () @@ -1598,10 +1614,14 @@ (process-signal (current-process-id) signal/kill) )) (alist->env-vars miscprevvals) (alist->env-vars testprevvals) (alist->env-vars commonprevvals) + ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. + ;; the unlock previously was further up. This seemed wrong as we should not proceed until the + ;; vars have been reset. + (mutex-unlock! *launch-setup-mutex*) launch-results)) (change-directory *toppath*) (thread-sleep! (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.0)))) ;; recover a test where the top controlling mtest may have died @@ -1627,5 +1647,60 @@ ;; now wait on that process if all is correct ;; periodically update the db with runtime ;; when the process exits look at the db, if still RUNNING after 10 seconds set ;; state/status appropriately (process-wait pid))) + +;;====================================================================== +;; Attic +;;====================================================================== + + ;; (lock-key (conc "test-" test-id)) + ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) + ;; (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds + ;; (if (car lock) + ;; #t + ;; (if (> (current-seconds) expire-time) + ;; (begin + ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path) + ;; (rmt:no-sync-del! lock-key) ;; destroy the lock + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; + ;; (begin + ;; (thread-sleep! 1) + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)))))) + + + +#;(define (launch:cache-config) + ;; if we have a linktree and -runtests and -target and the directory exists dump the config + ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg + (if (and *configdat* + (or (args:get-arg "-run") + (args:get-arg "-runtests") + (args:get-arg "-execute"))) + (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE")) + (target (common:args-get-target exit-if-bad: #t)) + (runname (or (args:get-arg "-runname") + (args:get-arg ":runname") + (getenv "MT_RUNNAME"))) + (fulldir (conc linktree "/" + target "/" + runname))) + (if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree + (begin + (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%")) + (if (not (common:file-exists? fulldir)) + (create-directory fulldir #t)) ;; need to protect with exception handler + (if (and target + runname + (common:file-exists? fulldir)) + (let ((tmpfile (conc fulldir "/.megatest.cfg." (current-seconds))) + (targfile (conc fulldir "/.megatest.cfg-" megatest-version "-" megatest-fossil-hash)) + (rconfig (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash))) + (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached + (begin + (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile) + (if (not (common:in-running-test?)) + (configf:write-alist *configdat* tmpfile)) + (system (conc "ln -sf " tmpfile " " targfile)))) + ))) + (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs."))))) ADDED lib/libpangox-1.0.so Index: lib/libpangox-1.0.so ================================================================== --- /dev/null +++ lib/libpangox-1.0.so cannot compute difference between binary files ADDED lib/libpangox-1.0.so.0 Index: lib/libpangox-1.0.so.0 ================================================================== --- /dev/null +++ lib/libpangox-1.0.so.0 cannot compute difference between binary files ADDED lib/libxcb-xlib.so.0 Index: lib/libxcb-xlib.so.0 ================================================================== --- /dev/null +++ lib/libxcb-xlib.so.0 cannot compute difference between binary files Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -18,6 +18,6 @@ ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. ;; (declare (unit megatest-version)) -(define megatest-version 1.6577) +(define megatest-version 1.6579) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -40,10 +40,16 @@ (declare (uses api)) (declare (uses tasks)) ;; only used for debugging. (declare (uses env)) (declare (uses diff-report)) (declare (uses mutils)) +(declare (uses adjutant)) +(import adjutant) + +(declare (uses mttop)) +(import mttop) + ;; (declare (uses ftail)) ;; (import ftail) (define *db* #f) ;; this is only for the repl, do not use in general!!!! @@ -52,12 +58,12 @@ (include "db_records.scm") (include "run_records.scm") (include "megatest-fossil-hash.scm") (use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:) - readline apropos json http-client directory-utils typed-records - http-client srfi-18 extras format) + readline apropos json http-client directory-utils typed-records matchable + http-client srfi-18 extras format call-with-environment-variables) ;; Added for csv stuff - will be removed ;; (use sparse-vectors) @@ -105,10 +111,11 @@ Usage: megatest [options] -h : this help -manual : show the Megatest user manual -version : print megatest version (currently " megatest-version ") + help : help for the new Megatest interface Launching and managing runs -run : run all tests or as specified by -testpatt -remove-runs : remove the data for a run, requires -runname and -testpatt Optionally use :state and :status, use -keep-records to remove only @@ -202,11 +209,11 @@ -update-meta : update the tests metadata for all tests -setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are overwritten by values set in config files. -server -|hostname : start the server (reduces contention on megatest.db), use - to automatically figure out hostname - -adjutant C,M : start the server/adjutant with allocated cores C and Mem M (Gig), + -adjutant host-type : start the server/adjutant with given host-type use 0,0 to auto use full machine -transport http|rpc : use http or rpc for transport (default is http) -log logfile : send stdout and stderr to logfile -list-servers : list the servers -kill-servers : kill all servers @@ -271,10 +278,14 @@ Version " megatest-version ", built from " megatest-fossil-hash )) ;; -gui : start a gui interface ;; -config fname : override the runconfigs file with fname + +(mttop-run (command-line-arguments) + '("help")) + ;; process args (define remargs (args:get-args (argv) (list "-runtests" ;; run a specific test "-config" ;; override the config file name @@ -915,16 +926,47 @@ (let ((tl (launch:setup)) (transport-type (string->symbol (or (args:get-arg "-transport") "http")))) (server:launch 0 transport-type) (set! *didsomething* #t))) + +(define (naylist->alist inlst) + (map (lambda (dat) + (cons (car dat) + (or (if (list? (cdr dat)) + (if (null? (cdr dat)) "" + (cadr dat)) + (cdr dat)) + ""))) ;; we need a string for call-with-environment-variables + inlst)) + ;; The adjutant is a bit different, it does NOT run (launch:setup) as it is not necessarily tied to ;; a specific Megatest area. Detail are being hashed out and this may change. ;; (if (args:get-arg "-adjutant") - (begin - (adjutant-run) + (let* ((host-type (args:get-arg "-adjutant"))) + (launch:setup) ;; dang it, wish this wasn't needed + (print "Running the adjutant!") + (let loop ((wait-count 0)) + (if (< wait-count 10) ;; 6 x 10 seconds = one minute + (let* ((dat (rmt:no-sync-take-job host-type))) + (match dat + ((id ht vars exekey cmdline state event-time last-update) + (let ((vars-alist (with-input-from-string vars read) + )) + (print "Vars:") + (pp vars-alist) + (call-with-environment-variables + (naylist->alist vars-alist) + (lambda () + (system cmdline)))) + (loop 0)) + (else + (thread-sleep! 10) + (loop (+ wait-count 1))))) + (print "I'm bored. Exiting."))) + ;; (adjutant-run (args:get-arg "-ajutant") rmt:no-sync-take-job) (set! *didsomething* #t))) (if (or (args:get-arg "-list-servers") (args:get-arg "-kill-servers")) (let ((tl (launch:setup))) ADDED mttop.scm Index: mttop.scm ================================================================== --- /dev/null +++ mttop.scm @@ -0,0 +1,55 @@ +;; Copyright 2006-2011, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. + +;; This is from the perl world, a hash of hashes is a super easy way to keep a handle on +;; lots of disparate data +;; + +(declare (unit mttop)) + +(module mttop + * + +(import chicken scheme + ;; data-structures posix + srfi-1 + ;; srfi-13 + srfi-69 + ports + extras + regex + posix + data-structures + matchable + ) + +(define (str-is-cmd cmd all-cmds) + (let* ((rx (regexp (conc "^" cmd ".*"))) + (mx (filter string? (map (lambda (x) + (let ((res (string-match rx x))) + (if res (car res) #f))) + all-cmds)))) + (if (eq? (length mx) 1) ;; have a command + (car mx) + #f))) + +(define (mttop-run args all-cmds) + ;; any path through this call must end in exit if it is NOT an old Megatest call + (if (null? args) + #f ;; continue on and do the old Megatest stuff + (let ((cmd (str-is-cmd (car args) all-cmds))) + (if cmd + (begin + (case (string->symbol cmd) + ((help)(print "New help")) + (else (print "Command " cmd " is not implemented yet."))) + (exit)) ;; always exit here + #f)))) ;; or continue on to Megatest old stuff here + +) Index: mtut.scm ================================================================== --- mtut.scm +++ mtut.scm @@ -154,10 +154,11 @@ show [areas|contours... ] : show areas, contours or other section from megatest.config gendot : generate a graphviz dot file from pkts. Contour actions: process : runs import, rungen and dispatch + go : runs import, rungen and dispatch every five minutes forever Trigger propagation actions: tsend a=b,c=d... : send trigger info to all recpients in the [listeners] section tlisten -port N : listen for trigger info on port N @@ -773,11 +774,11 @@ (begin (print-call-chain) (print "FAILED TO RUN RUNNAME MAPPER " callname " FOR AREA " area) (print " message: " ((condition-property-accessor 'exn 'message) exn)) runname) - (print "(mapper " (string-intersperse (list runkey runname area area-path reason contour mode-patt) ", ") ")") + (print "(mapper " (string-intersperse (map conc (list runkey runname area area-path reason contour mode-patt)) ", ") ")") (mapper runkey runname area area-path reason contour mode-patt)) (case callname ((auto #f) runname) (else runtrans))))) (new-target target) ;; I believe we will want target manipulation here .. (map-targets xlatr-key runkey area contour)) @@ -1323,11 +1324,13 @@ (begin (print "ERROR: Unknown key in packet \"" key "\" with value \"" val "\"") res))))) (conc "megatest " (if (not (member action '("sync"))) (conc action " " action-param) - "")) + "") (if (member action '("-run" "-rerun-clean" "-rerun-all" "-kill-rerun")) + "-rerun DEAD,ABORT,KILLED" + "")) pkta))) ;; (use trace)(trace pkt->cmdline) (define (write-pkt pktsdir uuid pkt) @@ -1610,23 +1613,70 @@ ;; (hash-table-keys adjargs)) (let-values (((uuid pkt) (command-line->pkt *action* adjargs #f area-path: area-path new-ss: new-ss))) (print "run log @ " (conc (current-directory) "/" uuid "-" *action* ".log")) (write-pkt pktsdir uuid pkt)))) - ((dispatch import rungen process) + ((dispatch import rungen process go) (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir"))) (mtconf (car mtconfdat)) - (toppath (configf:lookup mtconf "scratchdat" "toppath"))) + (toppath (configf:lookup mtconf "scratchdat" "toppath")) + (period (configf:lookup-number mtconf "mtutil" "autorun-period" default: 300)) + (rest-time (configf:lookup-number mtconf "mtutil" "autorun-rest" default: 30))) + (print "Using period="period" and rest time="rest-time) (case (string->symbol *action*) ((process) (begin (common:load-pkts-to-db mtconf) (generate-run-pkts mtconf toppath) (common:load-pkts-to-db mtconf) (dispatch-commands mtconf toppath))) ((import) (common:load-pkts-to-db mtconf)) ;; import pkts ((rungen) (generate-run-pkts mtconf toppath)) - ((dispatch) (dispatch-commands mtconf toppath))))) + ((dispatch) (dispatch-commands mtconf toppath)) + ;; [mtutil] + ;; # approximate interval between run processing in mtutil (seconds) + ;; autorun-period 300 + ;; # minimal rest period between processing + ;; autorun-rest 30 + ((go) + ;; determine if I'm the boss + (if (file-exists? "mtutil-go.pid") + (begin + (print "ERROR: mtutil go is already running under host and pid " (with-input-from-file "mtutil-go.pid" read-line) + ". Please kill that process and remove the file \"mutil-go.pid\" and try again.") + (exit))) + (with-output-to-file "mtutil-go.pid" (lambda ()(print (get-host-name) " " (current-process-id)))) + (print "Starting long running import, rungen, and process loop") + (if (file-exists? "do-not-run-mtutil-go") + (begin + (print "NOTE: Removing flag file "(current-directory)"/do-not-run-mtutil-go") + (delete-file* "do-not-run-mtutil-go"))) + (let loop ((last-run (- (current-seconds) (+ period 10))) ;; fake out first time in + (this-run (current-seconds))) + (if (file-exists? "do-not-run-mtutil-go") + (begin + (print "File do-not-run-mtutil-go exists, exiting.") + (delete-file* "mtutil-go.pid") + (exit))) + (let ((delta (- this-run last-run))) + (if (>= delta period) + (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir"))) + (mtconf (car mtconfdat))) + (print "Running import at " (current-seconds)) + (common:load-pkts-to-db mtconf) + (print "Running generate run pkts at " (current-seconds)) + (generate-run-pkts mtconf toppath) + (print "Running run dispatch at " (current-seconds)) + (common:load-pkts-to-db mtconf) + (dispatch-commands mtconf toppath) + (print "Done running import, generate, and dispatch done in " (- (current-seconds) this-run)) + (print "NOTE: touch " (current-directory) "/do-not-run-mtutil-go to kill this runner.") + (loop this-run (current-seconds))) + (let ((now (current-seconds))) + (print "Sleeping " rest-time " seconds, next run in aproximately " (- period (- now last-run)) " seconds") + (thread-sleep! rest-time) + (loop last-run (current-seconds)))))) + (delete-file* "mtutil-go.pid"))))) ;; misc ((show) (if (> (length remargs) 0) (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir"))) (mtconf (car mtconfdat)) @@ -1805,46 +1855,52 @@ ) ))) (loop (nn-recv rep)))) (print "ERROR: Port " portnum " already in use. Try another port"))))))) - - - - - ((tlisten) - (if (null? remargs) - (print "ERROR: useage for tlisten is \"mtutil tlisten portnum\"") - (let ((portnum (string->number (car remargs)))) - - (if (not portnum) - (print "ERROR: the portnumber parameter must be a number, you gave: " (car remargs)) - (begin - (if (not (is-port-in-use portnum)) - (let* ((rep (start-nn-server portnum)) - (mtconfdat (simple-setup (args:get-arg "-start-dir"))) - (mtconf (car mtconfdat)) - (contact (configf:lookup mtconf "listener" "owner")) - (script (configf:lookup mtconf "listener" "script"))) - (print "Listening on port " portnum " for messages.") - (set-signal-handler! signal/int (lambda (signum) - (set! *time-to-exit* #t) - (debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting !!") - (let ((email-body (mtut:stml->string (s:body - (s:p (conc "Received signal " signum ". Lister has been terminated on host " (get-environment-variable "HOST") ". ")))))) - (sendmail contact "Listner has been terminated." email-body use_html: #t)) - (exit))) - (set-signal-handler! signal/term (lambda (signum) - (set! *time-to-exit* #t) - (debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting !!") - (let ((email-body (mtut:stml->string (s:body - (s:p (conc "Received signal " signum ". Lister has been terminated on host " (get-environment-variable "HOST") ". ")))))) - (sendmail contact "Listner has been terminated." email-body use_html: #t)) - (exit))) - - ;(set-signal-handler! signal/term special-signal-handler) - + + ((tlisten) + (if (null? remargs) + (print "ERROR: useage for tlisten is \"mtutil tlisten portnum\"") + (let ((portnum (string->number (car remargs)))) + + (if (not portnum) + (print "ERROR: the portnumber parameter must be a number, you gave: " (car remargs)) + (begin + (if (not (is-port-in-use portnum)) + (let* ((rep (start-nn-server portnum)) + (mtconfdat (simple-setup (args:get-arg "-start-dir"))) + (mtconf (car mtconfdat)) + (contact (configf:lookup mtconf "listener" "owner")) + (script (configf:lookup mtconf "listener" "script"))) + (print "Listening on port " portnum " for messages.") + (set-signal-handler! signal/int + (lambda (signum) + (set! *time-to-exit* #t) + (debug:print-error 0 *default-log-port* "Received signal " signum + " sending email befor exiting !!") + (let ((email-body (mtut:stml->string + (s:body + (s:p (conc "Received signal " signum + ". Lister has been terminated on host " + (get-environment-variable "HOST") ". ")))))) + (sendmail contact "Listner has been terminated." email-body use_html: #t)) + (exit))) + (set-signal-handler! signal/term (lambda (signum) + (set! *time-to-exit* #t) + (debug:print-error 0 *default-log-port* "Received signal " + signum " sending email befor exiting !!") + (let ((email-body (mtut:stml->string + (s:body + (s:p (conc "Received signal " signum + ". Lister has been terminated on host " + (get-environment-variable "HOST") ". ")))))) + (sendmail contact "Listner has been terminated." email-body use_html: #t)) + (exit))) + + ;; (set-signal-handler! signal/term special-signal-handler) + (let loop ((instr (nn-recv rep))) (nn-send rep "ok") (let ((ctime (date->string (current-date)))) (if (equal? instr "time-to-die") (begin Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -969,10 +969,19 @@ (define (rmt:no-sync-del! var) (rmt:send-receive 'no-sync-del! #f `(,var))) (define (rmt:no-sync-get-lock keyname) (rmt:send-receive 'no-sync-get-lock #f `(,keyname))) + +(define (rmt:no-sync-add-job host-type vars-list exekey cmdline) + (rmt:send-receive 'no-sync-add-job #f `(,host-type ,vars-list ,exekey ,cmdline))) + +(define (rmt:no-sync-take-job host-type) + (rmt:send-receive 'no-sync-take-job #f `(,host-type))) + +(define (rmt:no-sync-job-records-clean) + (rmt:set-receive 'no-sync-job-records-clean #f '())) ;;====================================================================== ;; A R C H I V E S ;;====================================================================== Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -350,15 +350,14 @@ (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat))) (let* ((num-running (rmt:get-count-tests-running run-id)) (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)) - (job-group-limit (let ((jobg-count (configf:lookup *configdat* "jobgroups" jobgroup))) - (if (string? jobg-count) - (string->number jobg-count) - jobg-count)))) - (if (> (+ num-running num-running-in-jobgroup) 0) + (job-group-limit (configf:lookup-number *configdat* "jobgroups" jobgroup))) + (if (and (number? num-running) ;; checking for number - had a crash where a non-number was returned. Not sure why. + (number? num-running-in-jobgroup) ;; probably can remove this when rmt switches away from http. + (> (+ num-running num-running-in-jobgroup) 0)) (runs:inc-can-run-more-tests-count runsdat)) ;; (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1))) (if (not (eq? *last-num-running-tests* num-running)) (begin (debug:print 2 *default-log-port* "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) (set! *last-num-running-tests* num-running))) @@ -419,11 +418,13 @@ (debug:print 0 *default-log-port* "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file)) (debug:print-info 0 *default-log-port* "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf) (system (conc run-pre-hook " >> " actual-logf " 2>&1")) (debug:print-info 0 *default-log-port* "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run."))) (debug:print 0 *default-log-port* "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run."))))) - + + + (define (runs:run-post-hook run-id) (let* ((run-post-hook (configf:lookup *configdat* "runs" "post-hook")) (existing-tests (if run-post-hook (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses #f #f ;; offset limit @@ -458,10 +459,65 @@ (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (debug:print 0 *default-log-port* "ERROR: failed to run post-hook " run-post-hook ", check the log " log-file)) (debug:print-info 0 *default-log-port* "running run-post-hook: \"" run-post-hook "\", log is " actual-logf) (system (conc run-post-hook " >> " actual-logf " 2>&1")) (debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))))) + + +(define (runs:rerun-hook test-id new-test-path testdat rerunlst) + (let* ((rerun-hook (configf:lookup *configdat* "runs" "rerun-hook")) + (log-dir (conc *toppath* "/reruns/logs")) + (target (getenv "MT_TARGET")) + (runname (common:args-get-runname)) + (rundir (db:test-get-rundir testdat)) + (tarfiledir (conc *toppath* "/reruns")) + (status (db:test-get-status testdat)) + (comment (conc "\"" (db:test-get-comment testdat) "\"" )) + (testname (db:test-get-testname testdat)) + (itempath (db:test-get-item-path testdat)) + (file-body (conc status "-" (string-translate target "/" "-") "-" runname "." testname (if (not (string=? itempath "")) (conc "." (string-translate itempath "/" "-")) ""))) + (log-file (conc file-body ".log")) + ;; (log-file (conc status "-" (string-translate target "/" "-") "-" runname "." testname (if (not (string=? itempath "")) (conc "." (string-translate itempath "/" "-")) "") ".log")) + (full-log-fname (conc log-dir "/" log-file)) + (tarfilename (conc file-body ".tar")) + ;; (tarfilename (conc status "." (string-translate target "/" "-") "." runname "." testname (if (not (string=? itempath "")) (conc "." (string-translate itempath "/" "-")) "") ".tar")) + ) + (if rerun-hook + (let* ((use-log-dir (if (not (directory-exists? log-dir)) + (handle-exceptions + exn + (begin + (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir ", exn=" exn) + #f) + (create-directory log-dir #t) + #t) + #t)) + (start-time (current-seconds)) + (actual-logf (if use-log-dir full-log-fname log-file)) + (sys-call-text (conc rerun-hook " " tarfilename " " rundir " " actual-logf " " runname " " tarfiledir " " status " " target " " comment " " testname " " itempath " >> " actual-logf " 2>&1")) + ) + (debug:print 2 *default-log-port* "Found rerun-hook in config:" rerun-hook) + (handle-exceptions + exn + (begin + (print-call-chain *default-log-port*) + (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) + (debug:print 0 *default-log-port* "ERROR: failed to run rerun-hook " rerun-hook ", check the log " log-file)) + (debug:print-info 0 *default-log-port* "running rerun-hook: \"" rerun-hook "\", log is " actual-logf) + ;; call the hook + (debug:print-info 0 *default-log-port* "Calling rerun-hook for " test-id new-test-path testdat rerunlst) + (debug:print-info 0 *default-log-port* "rerun hook: " rerun-hook) + (debug:print-info 0 *default-log-port* "tarfilename: " tarfilename) + (debug:print-info 0 *default-log-port* "rundir: " rundir) + (debug:print-info 0 *default-log-port* "actual-logf: " actual-logf) + (debug:print-info 0 *default-log-port* "runname: " runname) + (debug:print-info 0 *default-log-port* "sys-call-text: " sys-call-text) + (system sys-call-text) + (debug:print-info 0 *default-log-port* "rerun-hook \"" rerun-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))))) + + + ;; return #t when all items in waitors-upon list are represented in test-patt, #f otherwise. (define (runs:testpatts-mention-waitors-upon? test-patt waitors-upon) (null? (tests:filter-test-names-not-matched waitors-upon test-patt))) @@ -493,11 +549,13 @@ (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) ;; (tdbdat (tasks:open-db)) (config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) (if x (string->number x) #f))) (allowed-tests #f) - (runconf #f)) + (runconf #f) + (cache-files (launch:get-cache-file-paths #f (common:get-toppath *toppath* ) target)) + (runstart-time (current-seconds))) ;; check if readonly (when readonly-mode (debug:print-error 0 *default-log-port* "megatest.db is readonly. Cannot proceed.") (exit 1)) @@ -631,11 +689,11 @@ (runs:update-all-test_meta #f) ;; run the run prehook if there are no tests yet run for this run: ;; (runs:run-pre-hook run-id) - ;; mark all test launced flag as false in the meta table + ;; mark all test launched flag as false in the meta table (rmt:set-var (conc "lunch-complete-" run-id) "no") (debug:print-info 1 *default-log-port* "Setting end-of-run to no") (let* ((config-reruns (let ((x (configf:lookup *configdat* "setup" "reruns"))) (if x (string->number x) #f))) (config-rerun-cnt (if config-reruns @@ -780,20 +838,25 @@ (thread-start! th2) (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry) (set! keep-going #f) (thread-join! th2) + (if (launch:cache-files-changed? cache-files runstart-time) + (begin ;; force a start-over + (launch:setup force-reread: #t) + (runs:run-tests target runname test-patts user flags run-count: 0))) + ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD (if (> run-count 0) ;; handle reruns (begin (if (not (hash-table-ref/default flags "-preclean" #f)) (hash-table-set! flags "-preclean" #t)) (if (not (hash-table-ref/default flags "-rerun" #f)) - (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS")) + (hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS")) ;; recursive call to self (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))) - (launch:end-of-run-check run-id))) + (launch:end-of-run-check run-id))) (debug:print-info 0 *default-log-port* "No tests to run"))) (debug:print-info 4 *default-log-port* "All done by here") ;; TODO: try putting post hook call here ; (debug:print-info 2 *default-log-port* " run-count " run-count) @@ -2056,25 +2119,34 @@ (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED")) (member (test:get-state testdat) '("COMPLETED")))) (debug:print-info 2 *default-log-port* "running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat)) (hash-table-set! test-registry full-test-name 'DONOTRUN) ;; COMPLETED) (set! runflag #f)) + ;; -rerun and status is one of the specifed, run it ((and rerun (let* ((rerunlst (string-split rerun ",")) (must-rerun (member (test:get-status testdat) rerunlst))) (debug:print-info 3 *default-log-port* "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun) must-rerun)) (debug:print-info 2 *default-log-port* "Rerun forced for test " test-name "/" item-path) - (set! runflag #t)) - ;; -keepgoing, do not rerun FAIL + (set! runflag #t) + (debug:print-info 2 *default-log-port* "Calling rerun hook") + (runs:rerun-hook test-id new-test-path testdat rerun) + ) + + + + ;; -keepgoing, do not rerun FAIL ((and keepgoing (member (test:get-status testdat) '("FAIL"))) (set! runflag #f)) - ((and (not rerun) + + ((and (not rerun) (member (test:get-status testdat) '("FAIL" "n/a"))) (set! runflag #t)) + (else (set! runflag #f))) (debug:print 4 *default-log-port* "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat)) (if (not runflag) (if (not parent-test) (if (runs:lownoise (conc "not starting test" full-test-name) 60) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -322,29 +322,75 @@ *my-client-signature*))) ;; wait for server=start-last to be three seconds old ;; (define (server:wait-for-server-start-last-flag areapath) + (let* ((flag-dir (conc areapath "/logs")) + (start-flag (conc flag-dir "/server-start-last")) + ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds) + (reftime (configf:lookup-number *configdat* "server" "idletime" default: 4)) + (server-key (conc (get-host-name) "-" (current-process-id))) + (create-key-file (lambda () + (with-output-to-file start-flag + (lambda () + (print server-key))))) + (check-key-file (lambda () + (let ((res (with-input-from-file start-flag + (lambda () + (read-line))))) + (equal? server-key res)))) + (get-file-age (lambda () + (let* ((fmodtime (file-modification-time start-flag))) + (- (current-seconds) fmodtime))))) + (if (not (directory-exists? flag-dir)) + (begin + (debug:print-info 0 *default-log-port* "Directory " flag-dir " does not exist! Cannot gate.") + #f) + (if (file-exists? start-flag) + (if (check-key-file) ;; is it me? + #t ;; yes, it is me, proceed + (let* ((file-age (get-file-age))) + (if (> file-age reftime) ;; let the previous guy have at least 4 seconds to do their thing + (begin ;; file is old enough, we can try to take it + (create-key-file) ;; take the file and try again + (server:wait-for-server-start-last-flag areapath)) + (let* ((remtime (max 1 (min file-age reftime)))) + (debug:print-info 0 *default-log-port* "Gating server start, waiting remtime="remtime) + (thread-sleep! remtime) + (server:wait-for-server-start-last-flag areapath))))) + (begin + (create-key-file) + (server:wait-for-server-start-last-flag areapath)))))) + + + +;; wait for server=start-last to be three seconds old +;; +(define (server:wait-for-server-start-last-flag-old areapath) (let* ((start-flag (conc areapath "/logs/server-start-last")) ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds) (reftime (configf:lookup-number *configdat* "server" "idletime" default: 4)) - (server-key (conc (get-host-name) "-" (current-process-id)))) + (server-key (conc (get-host-name) "-" (current-process-id))) + (create-key-file (lambda () + (with-output-to-file start-flag + (lambda () + (print server-key))))) + (check-key-file (lambda () + (let ((res (with-input-from-file start-flag + (lambda () + (read-line))))) + (equal? server-key res))))) ;; (thread-sleep! (/ (random 500) 1000)) ;; I don't think this made a difference (if (file-exists? start-flag) (let* ((fmodtime (file-modification-time start-flag)) (delta (- (current-seconds) fmodtime)) (all-go (> delta reftime))) (if (and all-go (begin - (with-output-to-file start-flag - (lambda () - (print server-key))) + (create-key-file) (thread-sleep! 0.25) - (let ((res (with-input-from-file start-flag - (lambda () - (read-line))))) - (equal? server-key res)))) + (check-key-file))) #t ;; (system (conc "touch " start-flag)) ;; lazy but safe (begin (debug:print-info 0 *default-log-port* "Gating server start, last start: " fmodtime ", delta: " delta ", reftime: " reftime ", all-go=" all-go) (thread-sleep! reftime) Index: tests/unittests/all-api.scm ================================================================== --- tests/unittests/all-api.scm +++ tests/unittests/all-api.scm @@ -116,11 +116,11 @@ (test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'delete-run (list 2))) 0)) ;; delete a non-existant run (test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'update-run-stats (list 1 '()))) 0)) (test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'get-main-run-stats (list 1 ))) 0)) (test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'delete-old-deleted-test-records '())) 0)) (test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'get-runs (list "%" 10 0 keypatts))) 0)) -(test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'simple-get-runs (list "%" 10 0 keypatts))) 0)) +(test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'simple-get-runs (list "%" 10 0 keypatts 0))) 0)) (test #f #(#t (1))(api:execute-requests my-dbstruct (vector 'get-all-run-ids '()))) (test #f #(#t ()) (api:execute-requests my-dbstruct (vector 'get-prev-run-ids '(1)))) (test #f #(#t "JUSTFINE") (api:execute-requests my-dbstruct (vector 'get-run-status '(1)))) (test #f #(#t "NEW") (api:execute-requests my-dbstruct (vector 'get-run-state '(1)))) (test #f #(#t (("Totals" "UNKNOWN" 1) ("bar" "UNKNOWN" 1))) (api:execute-requests my-dbstruct (vector 'get-run-stats '()))) Index: tests/unittests/all-rmt.scm ================================================================== --- tests/unittests/all-rmt.scm +++ tests/unittests/all-rmt.scm @@ -38,12 +38,12 @@ (test #f #f (server:check-if-running toppath)) ;; these are used by server:start-and-wait (test #f #t (list? (server:get-list toppath))) (test #f '() (server:get-best '())) (test #f #t (common:simple-file-lock-and-wait "test.lock" expire-time: 15)) (test #f "test.lock" (common:simple-file-release-lock "test.lock")) -(test #f #t (server:get-best-guess-address (get-host-name))) -(test #f #t (string? (common:get-homehost))) +(test #f #t (string? (server:get-best-guess-address (get-host-name)))) +(test #f #t (string? (car (common:get-homehost)))) ;; clean out any old running servers ;; (let ((servers (server:get-list toppath))) (print "Known servers: " servers) @@ -68,11 +68,11 @@ (thread-sleep! 2) ;; (test #f #t (string? (server:start-and-wait *toppath*))) (test "setup for run" #t (begin (launch:setup) (string? (getenv "MT_RUN_AREA_HOME")))) -(test #f #t (client:setup-http toppath)) +(test #f #t (vector? (client:setup-http toppath))) (test #f #t (vector? (client:setup toppath))) (test #f #t (vector? (rmt:get-connection-info toppath))) ;; TODO: push areapath down. (test #f #t (string? (server:check-if-running "."))) ;; DEF (test #f #f (rmt:send-receive-no-auto-client-setup *runremote* 'get-keys #f '())) ADDED utils/Makefile.whodunit Index: utils/Makefile.whodunit ================================================================== --- /dev/null +++ utils/Makefile.whodunit @@ -0,0 +1,9 @@ +.DEFAULT : all + +all : whodunit + +clean : + rm whodunit + +whodunit : whodunit.scm + csc -static -L -static -L -lm -L -ldl -L -lpthread -L -lssl -L -lcrypto -L -lz whodunit.scm -o whodunit Index: utils/mk_wrapper ================================================================== --- utils/mk_wrapper +++ utils/mk_wrapper @@ -19,10 +19,11 @@ prefix=$1 cmd=$2 target=$3 cfgfile="$prefix/bin/.$(lsb_release -sr)/cfg.sh" +libdir="$prefix/bin/.$(lsb_release -sr)/lib" # we wish to create a var in cfg.sh for finding sqlite3 executable chicken_bin_dir=$(dirname $(which csi)) if [[ -e $chicken_bin_dir/sqlite3 ]];then sqlite3_exe=$chicken_bin_dir/sqlite3 @@ -30,19 +31,20 @@ sqlite3_exe=$(which sqlite3) fi if [ "$LD_LIBRARY_PATH" != "" ];then echo "INFO: Using LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >&2 + echo "INFO: Writing $cfgfile" >&2 ( cat << __EOF if [ -z \$MT_ORIG_ENV ]; then export MT_ORIG_ENV=\$( $prefix/bin/serialize-env ) fi if [ "\$LD_LIBRARY_PATH" != "" ];then - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:\$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:\$LD_LIBRARY_PATH:$libdir else - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$libdir fi export MT_SQLITE3_EXE=$sqlite3_exe __EOF ) > $cfgfile Index: utils/whodunit.scm ================================================================== --- utils/whodunit.scm +++ utils/whodunit.scm @@ -13,11 +13,25 @@ ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; -(use posix srfi-69) +(module whodunit + + * + + +(import + (chicken process-context) + (chicken process) + (chicken string) + (chicken base) + (chicken sort) + (chicken io) + srfi-69 + scheme +) (define *numsamples* (or (and (> (length (argv)) 1) (string->number (cadr (argv)))) 3)) @@ -24,11 +38,11 @@ (define (topdata) (with-input-from-pipe (conc "top -b -n " *numsamples* " -d 0.1") read-lines)) -(define (cleanup-data topdat)list +(define (cleanup-data topdat) (let loop ((hed (car topdat)) (tal (cdr topdat)) (res '())) (let* ((line-list (string-split hed)) (nums (map (lambda (indat)(or (string->number indat) indat)) line-list)) @@ -37,29 +51,37 @@ (new-res (if not-data res (cons nums res)))) (if (null? tal) new-res (loop (car tal)(cdr tal) new-res))))) + +;; sum up +(define (sum-up data ht) + (for-each + (lambda (indat) + (let ((pid (car indat)) + (usr (cadr indat)) + (cpu (list-ref indat 8))) + (hash-table-set! ht usr (+ cpu (hash-table-ref/default ht usr 0))))) + data)) + +(define (print-results userhash) + (for-each + (lambda (usr) + (let* ((usage (inexact->exact (round (/ (hash-table-ref userhash usr) *numsamples*))))) + (if (> usage 0) + (print usr (if (< (string-length usr) 8) "\t\t" "\t") usage)))) + (sort (hash-table-keys userhash) + (lambda (a b) + (> (hash-table-ref userhash a) + (hash-table-ref userhash b)))))) + +) +(import whodunit srfi-69 (chicken sort)) + (print "Getting " *numsamples* " samples of cpu usage data.") (define data (cleanup-data (topdata))) (define pidhash (make-hash-table)) (define userhash (make-hash-table)) - -;; sum up and normalize the -(for-each - (lambda (indat) - (let ((pid (car indat)) - (usr (cadr indat)) - (cpu (list-ref indat 8))) - (hash-table-set! userhash usr (+ cpu (hash-table-ref/default userhash usr 0))))) - data) - -(for-each - (lambda (usr) - (print usr - (if (< (string-length usr) 8) "\t\t" "\t") - (inexact->exact (round (/ (hash-table-ref userhash usr) *numsamples*))))) - (sort (hash-table-keys userhash) - (lambda (a b) - (> (hash-table-ref userhash a) - (hash-table-ref userhash b))))) - +(sum-up data userhash) + +(print-results userhash)