Index: TODO ================================================================== --- TODO +++ TODO @@ -16,10 +16,17 @@ # along with Megatest. If not, see . TODO ==== +WW38 +. Add test_rundat to no-sync ==> correction, put in /.meta/test-run.dat +. Add STATE/STATUS transitions to .meta/test-run.dat or similar +. Swizzle update-test-rundat to operate on no-sync +. Swizzle update-run-duration, -uname-host and cpuload-diskfree to no-sync +. On state/status change update tests table with duration + WW15 . fill newview matrix with data, filter pipeline gui elements . improve [script], especially indent handling WW16 @@ -35,11 +42,10 @@ . break command line into sections; all, run control, queries, utilities etc. . pull in ftfplan (not integrated, just code pulled in) WW20 . ./configure => ubuntu, sles11, sles12, rh7 -. Jenkins junit XML support . Add output flushing in teamcity support . Switch to using simple runs query everywhere . Add end_time to runs and add a rollup call that sets state, status and end_time Future Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -567,11 +567,11 @@ (- num-logs max-allowed)))) (for-each (lambda (file) (let* ((fullname (conc "logs/" file))) (if (directory? fullname) - (debug:print-error 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") + (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") (handle-exceptions exn (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn) (delete-file* fullname))))) files) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -212,10 +212,12 @@ (hash-table-set! (dboard:commondat-tabdats commondat) tabnum tabdat)) +(define *updater-running* #f) ;; move this into one of the stucts + ;; gets and calls updater list based on curr-tab-num ;; (define (dboard:common-run-curr-updaters commondat #!key (tab-num #f)) (if (dboard:common-get-tabdat commondat tab-num: tab-num) ;; only update if there is a tabdat (let* ((tnum (or tab-num (dboard:commondat-curr-tab-num commondat))) @@ -240,11 +242,13 @@ tnum (cons updater curr-updaters)))) ;; data for each specific tab goes here ;; -(defstruct dboard:tabdat +(defstruct dboard:tabdat + ;; flags + ((already-running #f) : boolean) ;; the updater is already running. skip ;; runs ((allruns '()) : list) ;; list of dboard:rundat records ((allruns-by-id (make-hash-table)) : hash-table) ;; hash of run-id -> dboard:rundat records ((done-runs '()) : list) ;; list of runs already drawn ((not-done-runs '()) : list) ;; list of runs not yet drawn @@ -645,11 +649,11 @@ ;; (define (dboard:get-tests-for-run-duplicate tabdat run-id run testnamepatt key-vals) (let* ((start-time (current-seconds)) (access-mode (dboard:tabdat-access-mode tabdat)) (num-to-get (string->number (or (configf:lookup *configdat* "setup" "num-tests-to-get") - "200"))) + "60"))) ;; was 200, which is fine in a normal run area. (states (hash-table-keys (dboard:tabdat-state-ignore-hash tabdat))) (statuses (hash-table-keys (dboard:tabdat-status-ignore-hash tabdat))) (do-not-use-db-file-timestamps #t) ;; (configf:lookup *configdat* "setup" "do-not-use-db-file-timestamps")) ;; this still hosts runs-summary-tab (do-not-use-query-timestamps #t) ;; (configf:lookup *configdat* "setup" "do-not-use-query-timestamps")) ;; this no longer troubles runs-summary-tab (sort-info (get-curr-sort)) @@ -716,10 +720,11 @@ (if got-all (begin (dboard:rundat-last-update-set! run-dat (- start-time 2)) (dboard:rundat-run-data-offset-set! run-dat 0)) (begin + ;;; (thread-sleep! 0.25) ;; give the rest of the gui some time to update. <-- this did NOT help (dboard:rundat-run-data-offset-set! run-dat (+ num-to-get (dboard:rundat-run-data-offset run-dat))))) (for-each (lambda (tdat) @@ -833,11 +838,13 @@ ;; ;; create a virtual table of all the tests ;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") ) ;; (define (dboard:update-rundat tabdat runnamepatt numruns testnamepatt keypatts) - (let* ((access-mode (dboard:tabdat-access-mode tabdat)) + (dboard:tabdat-already-running-set! tabdat #t) + (let* (;; (already-running (dboard:tabdat-already-running tabdat)) + (access-mode (dboard:tabdat-access-mode tabdat)) (keys (dboard:tabdat-keys tabdat)) ;; (db:dispatch-query access-mode rmt:get-keys db:get-keys))) (last-runs-update (- (dboard:tabdat-last-runs-update tabdat) 2)) (allruns (rmt:get-runs runnamepatt numruns (dboard:tabdat-start-run-offset tabdat) keypatts)) ;;(allruns-tree (rmt:get-runs-by-patt (dboard:tabdat-keys tabdat) "%" #f #f #f #f)) (allruns-tree (rmt:get-runs-by-patt keys "%" #f #f #f #f 0)) ;; last-runs-update));;'("id" "runname") @@ -901,27 +908,26 @@ (hash-table-delete! (dboard:tabdat-allruns-by-id tabdat) run-id) (hash-table-set! (dboard:tabdat-allruns-by-id tabdat) run-id run-struct)) (if (or (null? tal) (> elapsed-time 2)) ;; stop loading data after 5 seconds, on the next call more data *should* be loaded since get-tests-for-run uses last update (begin - (when (> elapsed-time 2) + #;(when (> elapsed-time 2) (debug:print 0 *default-log-port* "NOTE: updates are taking a long time, " elapsed-time "s elapsed.") (let* ((old-val (iup:attribute *tim* "TIME")) (new-val (number->string (inexact->exact (floor (* 2 (string->number old-val))))))) (if (< (string->number new-val) 5000) - ((debug:print 0 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val) - (iup:attribute-set! *tim* "TIME" new-val)))) - - - ) + (begin + (debug:print 0 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val) + (iup:attribute-set! *tim* "TIME" new-val))))) (dboard:tabdat-allruns-set! tabdat new-res) maxtests) (if (> (dboard:rundat-run-data-offset run-struct) 0) (loop run tal new-res newmaxtests) ;; not done getting data for this run (loop (car tal)(cdr tal) new-res newmaxtests))))))) (dboard:tabdat-filters-changed-set! tabdat #f) - (dboard:update-tree tabdat runs-hash header tb))) + (dboard:update-tree tabdat runs-hash header tb) + (dboard:tabdat-already-running-set! tabdat #f))) (define *collapsed* (make-hash-table)) (define (toggle-hide lnum uidat) ; fulltestname) (let* ((btn (vector-ref (dboard:uidat-get-lftcol uidat) lnum)) @@ -2505,14 +2511,10 @@ ;; insert extra widget here (if extra-widget extra-widget (iup:hbox)) ;; empty widget - - - - ))) (let* ((status-toggles (map (lambda (status) (iup:toggle (conc status) #:fontsize 8 ;; btn-fontsz ;; "10" @@ -3725,28 +3727,36 @@ ;; (define (tabdat-values tabdat) ;; runs update-rundat using the various filters from the gui ;; (define (dashboard:do-update-rundat tabdat) + ;; this seems like a good place to check for already running and skip if so + ;; + ;; (set! *updater-running* #t) +;;(if (dboard:tabdat-already-running tabdat) +;; (begin +;; (debug:print-info 0 *default-log-port* "Dashboard overloaded - updates will be slow, skipping update.") +;; (dboard:tabdat-target tabdat)) (dboard:update-rundat tabdat (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "runname" "%") (dboard:tabdat-numruns tabdat) (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "test-name" "%/%") ;; generate key patterns from the target stored in tabdat (let* ((dbkeys (dboard:tabdat-dbkeys tabdat))) (let ((fres (if (dboard:tabdat-target tabdat) - (let ((ptparts (append (dboard:tabdat-target tabdat)(make-list (length dbkeys) "%")))) - (map (lambda (k v)(list k v)) dbkeys ptparts)) - (let ((res '())) - (for-each (lambda (key) - (if (not (equal? key "runname")) - (let ((val (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) key #f))) - (if val (set! res (cons (list key val) res)))))) - dbkeys) - res)))) - fres)))) + (let ((ptparts (append (dboard:tabdat-target tabdat)(make-list (length dbkeys) "%")))) + (map (lambda (k v)(list k v)) dbkeys ptparts)) + (let ((res '())) + (for-each (lambda (key) + (if (not (equal? key "runname")) + (let ((val (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) key #f))) + (if val (set! res (cons (list key val) res)))))) + dbkeys) + res)))) + fres))) + #;(set! *updater-running* #f)) (define (dashboard:runs-tab-updater commondat tab-num) (debug:catch-and-dump (lambda () (let* ((tabdat (dboard:common-get-tabdat commondat tab-num: tab-num)) @@ -3801,22 +3811,25 @@ (dashboard:runs-tab-updater commondat 1)) tab-num: 2) (iup:callback-set! *tim* "ACTION_CB" (lambda (time-obj) - (let ((update-is-running #f)) - (mutex-lock! (dboard:commondat-update-mutex commondat)) - (set! update-is-running (dboard:commondat-updating commondat)) - (if (not update-is-running) - (dboard:commondat-updating-set! commondat #t)) - (mutex-unlock! (dboard:commondat-update-mutex commondat)) - (if (not update-is-running) ;; we know that the update was not running and we now have a lock on doing an update - (begin - (dboard:common-run-curr-updaters commondat) ;; (dashboard:run-update commondat) - (mutex-lock! (dboard:commondat-update-mutex commondat)) - (dboard:commondat-updating-set! commondat #f) - (mutex-unlock! (dboard:commondat-update-mutex commondat))) + (if (not *updater-running*) + (begin + ;; (mutex-lock! (dboard:commondat-update-mutex commondat)) + ;; (set! update-is-running (dboard:commondat-updating commondat)) + ;;(if (not update-is-running) + ;; (dboard:commondat-updating-set! commondat #t)) + ;;(mutex-unlock! (dboard:commondat-update-mutex commondat)) + ;;(if (not update-is-running) ;; we know that the update was not running and we now have a lock on doing an update + ;; (begin + (set! *updater-running* #t) + (dboard:common-run-curr-updaters commondat) ;; (dashboard:run-update commondat) + (set! *updater-running* #f) + ;; (mutex-lock! (dboard:commondat-update-mutex commondat)) + ;; (dboard:commondat-updating-set! commondat #f) + ;; (mutex-unlock! (dboard:commondat-update-mutex commondat))) )) 1)))) (let ((th1 (make-thread (lambda () (thread-sleep! 1) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -1534,11 +1534,11 @@ state TEXT DEFAULT 'new', status TEXT DEFAULT 'n/a', archive_type TEXT DEFAULT 'bup', du INTEGER, archive_path TEXT);"))) - (print "creating trigges from init") + ;; (print "creating trigges from init") (db:create-triggers db) db)) ;; ) ;;====================================================================== ;; A R C H I V E S @@ -3516,11 +3516,11 @@ (lambda (run-id) (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id))) (db:prep-megatest.db-adj-test-ids (db:dbdat-get-db mtdb) run-id testrecs))) run-ids))) -;; Get test data using test_id, run-id is not used +;; Get test data using test_id, run-id is not used - but it will be! ;; (define (db:get-test-info-by-id dbstruct run-id test-id) (db:with-db dbstruct #f ;; run-id Index: docs/manual/megatest_manual.html ================================================================== --- docs/manual/megatest_manual.html +++ docs/manual/megatest_manual.html @@ -769,13 +769,13 @@

The Megatest Users Manual

Matt Welland
<matt@kiatoa.com>
version 1.5, June 2020 -
-
Table of Contents
- +
+
Table of Contents
+

Preface

@@ -3440,10 +3440,48 @@

+
+
+

Test Plan

+
+
+

Tests

+

itemwait|33

+

rerun-downstream-item|20

+

rerunclean|20

+

fullrun|18

+

goodtests|18

+

kill-rerun|17

+

items-runconfigvars|16

+

ro_test|16

+

runconfig-tests|16

+

env-pollution|13

+

itemmap|11

+

testpatt_envvar|10

+

toprun|10

+

chained-waiton|8

+

skip-on-fileexists|8

+

killrun_preqfail|7

+

subrun|6

+

dependencies|5

+

itemwait-simple|4

+

rollup|4

+

end-of-run|3

+

killrun|3

+

listener|3

+

test2|3

+

testpatt|3

+

env-pollution-usecacheno|2

+

set-values|2 +envvars|1 +listruns-tests|1 +subrun-usecases|1

+
+

Megatest Internals

@@ -3461,11 +3499,11 @@

Index: docs/manual/megatest_manual.txt ================================================================== --- docs/manual/megatest_manual.txt +++ docs/manual/megatest_manual.txt @@ -116,10 +116,12 @@ include::writing_tests.txt[] include::howto.txt[] include::reference.txt[] + +include::testplan.txt[] Megatest Internals ------------------ ["graphviz", "server.png"] ADDED docs/manual/testplan.txt Index: docs/manual/testplan.txt ================================================================== --- /dev/null +++ docs/manual/testplan.txt @@ -0,0 +1,81 @@ +// This file is part of Megatest. +// +// Megatest is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Megatest is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Megatest. If not, see . + +// Copyright 2006-2020, Matthew Welland. + +Test Plan +--------- + +Tests +~~~~~ + +itemwait|33 + +rerun-downstream-item|20 + +rerunclean|20 + +fullrun|18 + +goodtests|18 + +kill-rerun|17 + +items-runconfigvars|16 + +ro_test|16 + +runconfig-tests|16 + +env-pollution|13 + +itemmap|11 + +testpatt_envvar|10 + +toprun|10 + +chained-waiton|8 + +skip-on-fileexists|8 + +killrun_preqfail|7 + +subrun|6 + +dependencies|5 + +itemwait-simple|4 + +rollup|4 + +end-of-run|3 + +killrun|3 + +listener|3 + +test2|3 + +testpatt|3 + +env-pollution-usecacheno|2 + +set-values|2 +envvars|1 +listruns-tests|1 +subrun-usecases|1 + + Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -205,17 +205,19 @@ (current-seconds) start-seconds))))) (kill-tries 0)) ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area) ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) - (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) + (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10 update-db: #t) (let loop ((minutes (calc-minutes)) (cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) (disk-free (get-df (current-directory))) (last-sync (current-seconds))) - (common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync)) + ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - + ;; top of loop encountered at "(current-seconds)" with + ;; last-sync="last-sync)) (let* ((over-time (> (current-seconds) (+ last-sync update-period))) (new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f))) (delta (abs (- load cpu-load)))) (if (> delta 0.1) ;; don't bother updating with small changes load @@ -233,33 +235,28 @@ (test-info (rmt:get-test-info-by-id run-id test-id)) (state (db:test-get-state test-info)) (status (db:test-get-status test-info)) (kill-reason "no kill reason specified") (kill-job? #f)) - (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period)) + #;(common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period)) (cond ((test-get-kill-request run-id test-id) (set! kill-reason "KILLING TEST since received kill request (KILLREQ)") (set! kill-job? #t)) ((and runtlim (> (- (current-seconds) start-seconds) runtlim)) (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim)) (set! kill-job? #t)) ((equal? status "DEAD") - (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) + (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f update-db: #t) (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.") ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING (set! kill-job? #f))) (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync) (launch:handle-zombie-tests run-id) - (when do-sync - ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append) - ;; (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes))))) - (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync started at "(current-seconds))) - (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f) - (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))) - + (if do-sync ;; save meta data about the running of this test + (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)) (if kill-job? (begin (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason) (mutex-lock! m) ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this @@ -312,11 +309,11 @@ (if (hash-table-ref/default misc-flags 'keep-going #f) ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta (loop (calc-minutes) (or new-cpu-load cpu-load) (or new-disk-free disk-free) (if do-sync (current-seconds) last-sync))))))) - (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional + (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f update-db: #t))) ;; NOTE: Checking twice for keep-going is intentional (define (launch:execute encoded-cmd) (let* ((cmdinfo (common:read-encoded-string encoded-cmd)) (tconfigreg #f)) @@ -465,11 +462,13 @@ (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.") (exit)))) (test-pid (db:test-get-process_id test-info))) (cond ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag. - ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun + ((or (member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun + (and (equal? (db:test-get-state test-info) "COMPLETED") ;; completed/abort => rerun if asked + (member (db:test-get-status test-info) '("ABORT")))) (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request") ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") (rmt:general-call 'set-test-start-time #f test-id) (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -18,6 +18,6 @@ ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. ;; (declare (unit megatest-version)) -(define megatest-version 1.6568) +(define megatest-version 1.6569) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -39,10 +39,13 @@ (declare (uses mt)) (declare (uses api)) (declare (uses tasks)) ;; only used for debugging. (declare (uses env)) (declare (uses diff-report)) + +(declare (uses mutils)) +(import mutils) (declare (uses adjutant)) (import adjutant) ;; (declare (uses ftail)) @@ -62,11 +65,11 @@ ;; Added for csv stuff - will be removed ;; (use sparse-vectors) -(require-library mutils) +;; (require-library mutils) (define *usage-log-file* #f) ;; put path to file for logging usage in this var in the ~/.megatestrc file (define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file ;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -22,10 +22,12 @@ (declare (unit rmt)) (declare (uses api)) (declare (uses http-transport)) (include "common_records.scm") +(include "db_records.scm") + ;; (declare (uses rmtmod)) ;; (import rmtmod) ;; @@ -54,18 +56,39 @@ (client:setup areapath) #f)))) (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id +(define *rmt-query-last-call-time* 0) +(define *rmt-query-last-rest-time* 0) ;; last time there was at least a 1/2 second rest - giving other processes access to the db + +;; NOTE: This query rest algorythm will not adapt to long query times. REDESIGN NEEDED. TODO. FIXME. +;; +(define (rmt:query-rest) + (let* ((now (current-milliseconds))) + (cond + ((> (- now *rmt-query-last-call-time*) 500) ;; it's been a while since last query - no need to rest + (set! *rmt-query-last-rest-time* now) + (set! *rmt-query-last-call-time* now)) + ((> (- now *rmt-query-last-rest-time*) 5000) ;; no natural rests have happened + (debug:print 0 *default-log-port* "query rest needed. blocking for 1/2 second.") + (thread-sleep! 0.5) ;; force a rest of a half second + (set! *rmt-query-last-rest-time* now) + (set! *rmt-query-last-call-time* now)) + (else ;; sufficient rests have occurred, just record the last query time + (set! *rmt-query-last-call-time* now))))) + ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected #;(common:telemetry-log (conc "rmt:"(->string cmd)) payload: `((rid . ,rid) (params . ,params))) - + (if (not (equal? (configf:lookup *configdat* "setup" "query-rest") "no")) + (rmt:query-rest)) + (if (> attemptnum 2) (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum)) (cond ((> attemptnum 2) (thread-sleep! 0.05)) @@ -525,15 +548,24 @@ (rmt:general-call 'register-test run-id run-id test-name item-path)) (define (rmt:get-test-id run-id testname item-path) (rmt:send-receive 'get-test-id run-id (list run-id testname item-path))) -;; run-id is NOT used +;; run-id is NOT used - but it will be! ;; (define (rmt:get-test-info-by-id run-id test-id) (if (number? test-id) - (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id)) + (let* ((testdat (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))) + (trundatf (conc (db:test-get-rundir testdat) "/.mt_data/test-run.dat"))) + ;; now we can update a couple fields from the filesystem + (if (and (db:test-get-rundir testdat) + (file-exists? trundatf)) + (let* ((duration (db:test-get-run_duration testdat)) + (event-time (db:test-get-event_time testdat)) + (last-touch (file-modification-time trundatf))) + (db:test-set-run_duration! testdat (max duration (- last-touch event-time))))) + testdat) (begin (debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id) (print-call-chain (current-error-port)) #f))) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -585,14 +585,17 @@ (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) (common:snapshot-file mtdbfile subdir: ".db-snapshot")) (delete-file* staging-file) (let* ((start-time (current-milliseconds)) (res (system sync-cmd)) + (dbbackupfile (conc mtdbfile ".backup")) (res2 (cond - ((eq? 0 res) - (delete-file* (conc mtdbfile ".backup")) + ((eq? 0 res ) + (if (file-exists? dbbackupfile) + (delete-file* dbbackupfile) + ) (if (eq? 0 (file-size sync-log)) (delete-file sync-log)) (system (conc "/bin/mv " staging-file " " mtdbfile)) (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000)) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -1944,52 +1944,53 @@ tdb "SELECT count(id) FROM test_rundat;") res)) 0) -(define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname) - (rmt:general-call 'update-test-rundat run-id test-id (current-seconds) (or cpuload -1)(or diskfree -1) -1 (or minutes -1)) - (if (and cpuload diskfree) - (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id)) - (if minutes - (rmt:general-call 'update-run-duration run-id minutes test-id)) - (if (and uname hostname) - (rmt:general-call 'update-uname-host run-id uname hostname test-id))) +;; +(define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname #!key (update-db #f)(tmpfree #f)) + (if (get-environment-variable "MT_TEST_RUN_DIR") + (let* ((dest-dir (conc (get-environment-variable "MT_TEST_RUN_DIR") "/.mt_data")) + (or-dash (lambda (instr) + (cond + ((not instr) "") ;; #f -> blank, indicates value unchanged since last measurement taken + ((string? instr)(if (string-search " " instr) (conc "\"" instr "\"") instr)) + (else instr)))) + (file-new (not (directory-exists? dest-dir)))) + (if file-new (create-directory dest-dir #t)) + (let* ((outp (open-output-file (conc dest-dir "/test-run.dat") #:append))) + (with-output-to-port outp + (lambda () + (if file-new + (print "epoch_time,run_id,test_id,cpuload,diskfree,tmpfree,run_minutes,hostname,uname")) + (print (current-seconds) "," (or-dash run-id) "," (or-dash test-id) "," + (or-dash cpuload) "," (or-dash diskfree) "," (or-dash tmpfree) "," + (or-dash minutes) "," (or-dash hostname) "," + (or-dash uname)))) ;; put uname last as it has spaces in it + (close-output-port outp))) + (begin + (rmt:general-call 'update-test-rundat run-id test-id (current-seconds) (or cpuload -1)(or diskfree -1) -1 (or minutes -1)))) + (if update-db + (begin + (if (and cpuload diskfree) + (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id)) + (if minutes + (rmt:general-call 'update-run-duration run-id minutes test-id)) + (if (and uname hostname) + (rmt:general-call 'update-uname-host run-id uname hostname test-id))))) ;; This one is for running with no db access (i.e. via rmt: internally) -(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries) +(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries #!key (update-db #f)) ;; (define (tests:set-full-meta-info test-id run-id minutes work-area) ;; (let ((remtries 10)) (let* ((cpuload (get-cpu-load)) (diskfree (get-df (current-directory))) + (tmpfree (get-df "/tmp")) (uname (get-uname "-srvpio")) (hostname (get-host-name))) - (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname))) - -;; (define (tests:set-partial-meta-info test-id run-id minutes work-area) -#;(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries) - (let* ((cpuload (get-cpu-load)) - (diskfree (get-df (current-directory))) - (remtries 10)) - (handle-exceptions - exn - (if (> remtries 0) - (begin - (print-call-chain (current-error-port)) - (debug:print-info 0 *default-log-port* "WARNING: failed to set meta info. Will try " remtries " more times") - (set! remtries (- remtries 1)) - (thread-sleep! 10) - (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1))) - (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) - (debug:print-error 0 *default-log-port* "tried for over a minute to update meta info and failed. Giving up") - (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) - (print-call-chain (current-error-port)))) - (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes) - ))) + (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname update-db: update-db tmpfree: tmpfree))) + ;;====================================================================== ;; A R C H I V I N G ;;======================================================================