Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -38,11 +38,12 @@ ARCHSTR=$(shell lsb_release -sr) # ARCHSTR=$(shell bash -c "echo \$$MACHTYPE") PNGFILES = $(shell cd docs/manual;ls *png) -all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut ndboard +#all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut ndboard +all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut mtest: $(OFILES) readline-fix.scm megatest.o csc $(CSCOPTS) $(OFILES) megatest.o -o mtest dboard : $(OFILES) $(GOFILES) dashboard.scm @@ -234,11 +235,12 @@ install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \ $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \ $(PREFIX)/bin/nbfind $(PREFIX)/bin/loadrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \ $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \ - $(PREFIX)/share/db/mt-pg.sql $(PREFIX)/bin/.$(ARCHSTR)/ndboard $(PREFIX)/bin/tcmt + $(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql +# $(PREFIX)/bin/.$(ARCHSTR)/ndboard # $(PREFIX)/bin/newdashboard $(PREFIX)/bin/.$(ARCHSTR) : mkdir -p $(PREFIX)/bin/.$(ARCHSTR) Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -28,10 +28,27 @@ ;; (define (exit . code) ;; (if (null? code) ;; (old-exit) ;; (old-exit code))) + +;; execute thunk, return value. If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* . +;; arguments - thunk, message +(define (common:fail-safe thunk warning-message-on-exception) + (handle-exceptions + exn + (begin + (debug:print-info 0 *default-log-port* "notable but nonfatal condition - "warning-message-on-exception) + (debug:print-info 0 *default-log-port* + (string-substitute "\n?Error:" "nonfatal condition:" + (with-output-to-string + (lambda () + (print-error-message exn) )))) + (debug:print-info 0 *default-log-port* " -- continuing after nonfatal condition...") + #f) + (thunk))) + (define getenv get-environment-variable) (define (safe-setenv key val) (if (substring-index ":" key) ;; variables containing : are for internal use and cannot be environment variables. (debug:print-error 4 *default-log-port* "skip setting internal use only variables containing \":\"") (if (and (string? val) @@ -43,10 +60,12 @@ (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val)))) (define home (getenv "HOME")) (define user (getenv "USER")) + +;; returns list of fd count, socket count (define (get-file-descriptor-count #!key (pid (current-process-id ))) (list (length (glob (conc "/proc/" pid "/fd/*"))) (length (filter identity (map socket? (glob (conc "/proc/" pid "/fd/*"))))) ) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -802,11 +802,19 @@ (hash-table-delete! (dboard:tabdat-allruns-by-id tabdat) run-id) (hash-table-set! (dboard:tabdat-allruns-by-id tabdat) run-id run-struct)) (if (or (null? tal) (> elapsed-time 2)) ;; stop loading data after 5 seconds, on the next call more data *should* be loaded since get-tests-for-run uses last update (begin - (if (> elapsed-time 2)(print "NOTE: updates are taking a long time, " elapsed-time "s elapsed.")) + (when (> elapsed-time 2) + (debug:print 0 *default-log-port* "NOTE: updates are taking a long time, " elapsed-time "s elapsed.") + (let* ((old-val (iup:attribute *tim* "TIME")) + (new-val (number->string (inexact->exact (floor (* 2 (string->number old-val))))))) + (debug:print 0 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val) + (iup:attribute-set! *tim* "TIME" new-val)) + + + ) (dboard:tabdat-allruns-set! tabdat new-res) maxtests) (if (> (dboard:rundat-run-data-offset run-struct) 0) (loop run tal new-res newmaxtests) ;; not done getting data for this run (loop (car tal)(cdr tal) new-res newmaxtests))))))) @@ -1631,35 +1639,35 @@ (define (tree-path->run-id tabdat path) (if (not (null? path)) (hash-table-ref/default (dboard:tabdat-path-run-ids tabdat) path #f) #f)) -(define (dboard:get-tests-dat tabdat run-id last-update) - (let* ((access-mode (dboard:tabdat-access-mode tabdat)) - (tdat (if run-id (db:dispatch-query access-mode rmt:get-tests-for-run db:get-tests-for-run - run-id - (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "test-name" "%/%") - (hash-table-keys (dboard:tabdat-state-ignore-hash tabdat)) ;; '() - (hash-table-keys (dboard:tabdat-status-ignore-hash tabdat)) ;; '() - #f #f ;; offset limit - (dboard:tabdat-hide-not-hide tabdat) ;; not-in - #f #f ;; sort-by sort-order - #f ;; get all? "id,testname,item_path,state,status,event_time,run_duration" ;; qryval - (if (dboard:tabdat-filters-changed tabdat) - 0 - last-update) - *dashboard-mode*) - '()))) ;; get 'em all - ;; (debug:print 0 *default-log-port* "dboard:get-tests-dat: got " (length tdat) " test records for run " run-id) - (sort tdat (lambda (a b) - (let* ((aval (vector-ref a 2)) - (bval (vector-ref b 2)) - (anum (string->number aval)) - (bnum (string->number bval))) - (if (and anum bnum) - (< anum bnum) - (string<= aval bval))))))) +;; (define (dboard:get-tests-dat tabdat run-id last-update) +;; (let* ((access-mode (dboard:tabdat-access-mode tabdat)) +;; (tdat (if run-id (db:dispatch-query access-mode rmt:get-tests-for-run db:get-tests-for-run +;; run-id +;; (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "test-name" "%/%") +;; (hash-table-keys (dboard:tabdat-state-ignore-hash tabdat)) ;; '() +;; (hash-table-keys (dboard:tabdat-status-ignore-hash tabdat)) ;; '() +;; #f #f ;; offset limit +;; (dboard:tabdat-hide-not-hide tabdat) ;; not-in +;; #f #f ;; sort-by sort-order +;; #f ;; get all? "id,testname,item_path,state,status,event_time,run_duration" ;; qryval +;; (if (dboard:tabdat-filters-changed tabdat) +;; 0 +;; last-update) +;; *dashboard-mode*) +;; '()))) ;; get 'em all +;; ;; (debug:print 0 *default-log-port* "dboard:get-tests-dat: got " (length tdat) " test records for run " run-id) +;; (sort tdat (lambda (a b) +;; (let* ((aval (vector-ref a 2)) +;; (bval (vector-ref b 2)) +;; (anum (string->number aval)) +;; (bnum (string->number bval))) +;; (if (and anum bnum) +;; (< anum bnum) +;; (string<= aval bval))))))) (define (dashboard:safe-cadr-assoc name lst) (let ((res (assoc name lst))) (if (and res (> (length res) 1)) @@ -2437,11 +2445,11 @@ ;; (rmt:test-set-state-status-by-id run-id test-id "KILLREQ" #f #f) (common:run-a-command (conc "megatest -set-state-status KILLREQ,n/a -target " target " -runname " runname " -testpatt " item-test-path - " -state RUNNING,REMOTEHOSTSTART,LAUNCHED")))) + " -state RUNNING,REMOTEHOSTSTART,LAUNCHED,NOT_STARTED")))) (iup:menu-item "Run" (iup:menu @@ -2479,11 +2487,11 @@ (lambda (obj) (common:run-a-command (conc "megatest -set-state-status KILLREQ,n/a -target " target " -runname " runname " -testpatt % " - " -state RUNNING,REMOTEHOSTSTART,LAUNCHED")))) + " -state RUNNING,REMOTEHOSTSTART,LAUNCHED,NOT_STARTED")))) (iup:menu-item "Delete Run Data" #:action (lambda (obj) (common:run-a-command Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -479,20 +479,22 @@ '("run_duration" #f) '("comment" #f) '("event_time" #f) '("fail_count" #f) '("pass_count" #f) - '("archived" #f)) + '("archived" #f) + '("last_update" #f)) (list "test_steps" '("id" #f) '("test_id" #f) '("stepname" #f) '("state" #f) '("status" #f) '("event_time" #f) '("comment" #f) - '("logfile" #f)) + '("logfile" #f) + '("last_update" #f)) (list "test_data" '("id" #f) '("test_id" #f) '("category" #f) '("variable" #f) @@ -500,11 +502,12 @@ '("expected" #f) '("tol" #f) '("units" #f) '("comment" #f) '("status" #f) - '("type" #f)))) + '("type" #f) + '("last_update" #f)))) ;; needs db to get keys, this is for syncing all tables ;; (define (db:sync-main-list dbstruct) (let ((keys (db:get-keys dbstruct))) @@ -516,11 +519,11 @@ (list "metadat" '("var" #f) '("val" #f)) (append (list "runs" '("id" #f)) (map (lambda (k)(list k #f)) (append keys - (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour")))) + (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")))) (list "test_meta" '("id" #f) '("testname" #f) '("owner" #f) '("description" #f) DELETED emergency-patch-1.scm Index: emergency-patch-1.scm ================================================================== --- emergency-patch-1.scm +++ /dev/null @@ -1,203 +0,0 @@ - - -;; These are called by the server on recipt of /api calls -;; - keep it simple, only return the actual result of the call, i.e. no meta info here -;; -;; - returns #( flag result ) -;; -(define (api:execute-requests dbstruct dat) - (handle-exceptions - exn - (let ((call-chain (get-call-chain))) - (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat) - (print-call-chain (current-error-port)) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens - (cond - ((not (vector? dat)) ;; it is an error to not receive a vector - (vector #f (vector #f "remote must be called with a vector"))) - ((> *api-process-request-count* 20) ;; 20) - (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.") - (set! *server-overloaded* #t) - (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor! - (else - (let* ((cmd-in (vector-ref dat 0)) - (cmd (if (symbol? cmd-in) - cmd-in - (string->symbol cmd-in))) - (params (vector-ref dat 1)) - (start-t (current-milliseconds)) - (readonly-mode (dbr:dbstruct-read-only dbstruct)) - (readonly-command (member cmd api:read-only-queries)) - (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))) - (res - (if writecmd-in-readonly-mode - (conc "attempt to run write command "cmd" on a read-only database") - (case cmd - ;;=============================================== - ;; READ/WRITE QUERIES - ;;=============================================== - - ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl - - ;; SERVERS - ((start-server) (apply server:kind-run params)) - ((kill-server) (set! *server-run* #f)) - - ;; TESTS - - ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) - ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items. - ((test-set-state-status-by-id) - - ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) - (db:set-state-status-and-roll-up-items - dbstruct - (list-ref params 0) ; run-id - (list-ref params 1) ; test-name - #f ; item-path - (list-ref params 2) ; state - (list-ref params 3) ; status - (list-ref params 4) ; comment - )) - - ((delete-test-records) (apply db:delete-test-records dbstruct params)) - ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) - ((test-set-state-status) (apply db:test-set-state-status dbstruct params)) - ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) - ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params)) - ((top-test-set-per-pf-counts) (apply db:top-test-set-per-pf-counts dbstruct params)) - ((test-set-archive-block-id) (apply db:test-set-archive-block-id dbstruct params)) - - ;; RUNS - ((register-run) (apply db:register-run dbstruct params)) - ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params)) - ((delete-run) (apply db:delete-run dbstruct params)) - ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) - ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) - ((update-run-stats) (apply db:update-run-stats dbstruct params)) - ((set-var) (apply db:set-var dbstruct params)) - ((del-var) (apply db:del-var dbstruct params)) - - ;; STEPS - ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) - - ;; TEST DATA - ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) - ((csv->test-data) (apply db:csv->test-data dbstruct params)) - - ;; MISC - ((sync-inmem->db) (let ((run-id (car params))) - (db:sync-touched dbstruct run-id force-sync: #t))) - ((mark-incomplete) (apply db:find-and-mark-incomplete dbstruct params)) - - ;; TESTMETA - ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params)) - ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params)) - ((get-tests-tags) (db:get-tests-tags dbstruct)) - - ;; TASKS - ((tasks-add) (apply tasks:add dbstruct params)) - ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params)) - ((tasks-get-last) (apply tasks:get-last dbstruct params)) - - ;; NO SYNC DB - ((no-sync-set) (apply db:no-sync-set *no-sync-db* params)) - ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params)) - ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params)) - - ;; ARCHIVES - ;; ((archive-get-allocations) - ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) - ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) - ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey)) - - ;;====================================================================== - ;; READ ONLY QUERIES - ;;====================================================================== - - ;; KEYS - ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) - ((get-keys) (db:get-keys dbstruct)) - ((get-key-vals) (apply db:get-key-vals dbstruct params)) - ((get-target) (apply db:get-target dbstruct params)) - ((get-targets) (db:get-targets dbstruct)) - - ;; ARCHIVES - ((test-get-archive-block-info) (apply db:test-get-archive-block-info dbstruct params)) - - ;; TESTS - ((test-toplevel-num-items) (apply db:test-toplevel-num-items dbstruct params)) - ((get-test-info-by-id) (apply db:get-test-info-by-id dbstruct params)) - ((test-get-rundir-from-test-id) (apply db:test-get-rundir-from-test-id dbstruct params)) - ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params)) - ((get-count-tests-running) (apply db:get-count-tests-running dbstruct params)) - ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params)) - ;; ((delete-test-step-records) (apply db:delete-test-step-records dbstruct params)) - ((get-previous-test-run-record) (apply db:get-previous-test-run-record dbstruct params)) - ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params)) - ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params)) - ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params)) - ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params)) - ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params)) - ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params)) - ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params)) - ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params)) - ((synchash-get) (apply synchash:server-get dbstruct params)) - ((get-raw-run-stats) (apply db:get-raw-run-stats dbstruct params)) - - ;; RUNS - ((get-run-info) (apply db:get-run-info dbstruct params)) - ((get-run-status) (apply db:get-run-status dbstruct params)) - ((set-run-status) (apply db:set-run-status dbstruct params)) - ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params)) - ((get-test-id) (apply db:get-test-id dbstruct params)) - ((get-tests-for-run-mindata) (apply db:get-tests-for-run-mindata dbstruct params)) - ((get-runs) (apply db:get-runs dbstruct params)) - ((get-num-runs) (apply db:get-num-runs dbstruct params)) - ((get-all-run-ids) (db:get-all-run-ids dbstruct)) - ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) - ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) - ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) - ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params)) - ((get-main-run-stats) (apply db:get-main-run-stats dbstruct params)) - ((get-var) (apply db:get-var dbstruct params)) - ((get-run-stats) (apply db:get-run-stats dbstruct params)) - - ;; STEPS - ((get-steps-data) (apply db:get-steps-data dbstruct params)) - ((get-steps-for-test) (apply db:get-steps-for-test dbstruct params)) - - ;; TEST DATA - ((read-test-data) (apply db:read-test-data dbstruct params)) - ((read-test-data*) (apply db:read-test-data* dbstruct params)) - - ;; MISC - ((get-latest-host-load) (apply db:get-latest-host-load dbstruct params)) - ((have-incompletes?) (apply db:have-incompletes? dbstruct params)) - ((login) (apply db:login dbstruct params)) - ((general-call) (let ((stmtname (car params)) - (run-id (cadr params)) - (realparams (cddr params))) - (db:general-call dbstruct stmtname realparams))) - ((sdb-qry) (apply sdb:qry params)) - ((ping) (current-process-id)) - ((get-changed-record-ids) (apply db:get-changed-record-ids dbstruct params)) - - ;; TESTMETA - ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) - - ;; TASKS - ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params)) - (else - (debug:print 0 *default-log-port* "ERROR: bad api call " cmd) - (conc "ERROR: BAD api call " cmd)))))) - - ;; save all stats - (let ((delta-t (- (current-milliseconds) - start-t))) - (hash-table-set! *db-api-call-time* cmd - (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '())))) - (if writecmd-in-readonly-mode - (vector #f res) - (vector #t res))))))) DELETED emergency-patch-2.scm Index: emergency-patch-2.scm ================================================================== --- emergency-patch-2.scm +++ /dev/null @@ -1,311 +0,0 @@ -(include "common_records.scm") -(include "key_records.scm") -(include "db_records.scm") -(include "run_records.scm") -(include "test_records.scm") - -(define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f)) - (let* ((loadavg (common:get-cpu-load remote-host)) - (first (car loadavg)) - (next (cadr loadavg)) - (adjload (* maxload numcpus)) - (loadjmp (- first next))) - (cond - ((and (> first adjload) - (> count 0)) - (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load " first " exceeding max of " adjload " " (if msg msg "")) - (thread-sleep! waitdelay) - (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1))) - ((and (> loadjmp numcpus) - (> count 0)) - (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg "")) - (thread-sleep! waitdelay) - (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1)))))) - -(define (common:wait-for-homehost-load maxload msg) - (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. - #f - (common:get-homehost))) - (hh (if hh-dat (car hh-dat) #f)) - (numcpus (common:get-num-cpus hh))) - (common:wait-for-normalized-load maxload msg: msg remote-host: hh))) - -;; wait for normalized cpu load to drop below maxload -;; -(define (common:wait-for-normalized-load maxload #!key (msg #f)(remote-host #f)) - (let ((num-cpus (common:get-num-cpus remote-host))) - (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host))) - -;; hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps) -(define (runs:process-expanded-tests runsdat testdat) - ;; unroll the contents of runsdat and testdat (due to ongoing refactoring). - (let* ((hed (runs:testdat-hed testdat)) - (tal (runs:testdat-tal testdat)) - (reg (runs:testdat-reg testdat)) - (reruns (runs:testdat-reruns testdat)) - (test-name (runs:testdat-test-name testdat)) - (item-path (runs:testdat-item-path testdat)) - (jobgroup (runs:testdat-jobgroup testdat)) - (waitons (runs:testdat-waitons testdat)) - (item-path (runs:testdat-item-path testdat)) - (testmode (runs:testdat-testmode testdat)) - (newtal (runs:testdat-newtal testdat)) - (itemmaps (runs:testdat-itemmaps testdat)) - (test-record (runs:testdat-test-record testdat)) - (prereqs-not-met (runs:testdat-prereqs-not-met testdat)) - - (reglen (runs:dat-reglen runsdat)) - (regfull (runs:dat-regfull runsdat)) - (runname (runs:dat-runname runsdat)) - (max-concurrent-jobs (runs:dat-max-concurrent-jobs runsdat)) - (run-id (runs:dat-run-id runsdat)) - (test-patts (runs:dat-test-patts runsdat)) - (required-tests (runs:dat-required-tests runsdat)) - (test-registry (runs:dat-test-registry runsdat)) - (registry-mutex (runs:dat-registry-mutex runsdat)) - (flags (runs:dat-flags runsdat)) - (keyvals (runs:dat-keyvals runsdat)) - (run-info (runs:dat-run-info runsdat)) - (all-tests-registry (runs:dat-all-tests-registry runsdat)) - (run-limits-info (runs:dat-can-run-more-tests runsdat)) - ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running - (have-resources (car run-limits-info)) - (num-running (list-ref run-limits-info 1)) - (num-running-in-jobgroup(list-ref run-limits-info 2)) - (max-concurrent-jobs (list-ref run-limits-info 3)) - (job-group-limit (list-ref run-limits-info 4)) - ;; (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) - ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) - (fails (if (list? prereqs-not-met) - (runs:calc-fails prereqs-not-met) - (begin - (debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met) - '()))) - (non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed! - (not (equal? x hed))) - (runs:calc-not-completed prereqs-not-met))) - (loop-list (list hed tal reg reruns)) - ;; configure the load runner - (numcpus (common:get-num-cpus #f)) - (maxload (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0"))) ;; use a non-number string to disable - (maxhomehostload (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "1.2"))) ;; use a non-number string to disable - (waitdelay (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60")))) - (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" - (string-intersperse - (map (lambda (t) - (if (vector? t) - (conc (db:test-get-state t) "/" (db:test-get-status t)) - (conc " WARNING: t is not a vector=" t ))) - prereqs-not-met) - ", ") ") fails: " fails - "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) - - - - (if (and (not (null? prereqs-not-met)) - (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) - (debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", "))) - - ;; Don't know at this time if the test have been launched at some time in the past - ;; i.e. is this a re-launch? - (debug:print-info 4 *default-log-port* "run-limits-info = " run-limits-info) - - (cond - - ;; Check item path against item-patts, - ;; - ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run - ;; else the run is stuck, temporarily or permanently - ;; but should check if it is due to lack of resources vs. prerequisites - (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) - (if (or (not (null? tal))(not (null? reg))) - (list (runs:queue-next-hed tal reg reglen regfull) - (runs:queue-next-tal tal reg reglen regfull) - (runs:queue-next-reg tal reg reglen regfull) - reruns) - #f)) - - ;; Register tests - ;; - ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) - (debug:print-info 4 *default-log-port* "Pre-registering test " test-name "/" item-path " to create placeholder" ) - ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs - (let register-loop ((numtries 15)) - (rmt:register-test run-id test-name item-path) - (if (rmt:get-test-id run-id test-name item-path) - (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done) - (if (> numtries 0) - (begin - (thread-sleep! 0.5) - (register-loop (- numtries 1))) - (debug:print-error 0 *default-log-port* "failed to register test " (db:test-make-full-name test-name item-path))))) - (if (not (eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f) 'done)) - (begin - (rmt:register-test run-id test-name "") - (if (rmt:get-test-id run-id test-name "") - (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done)))) - (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) - (if (and (null? tal)(null? reg)) - (list hed tal (append reg (list hed)) reruns) - (list (runs:queue-next-hed tal reg reglen regfull) - (runs:queue-next-tal tal reg reglen regfull) - ;; NB// Here we are building reg as we register tests - ;; if regfull we must pop the front item off reg - (if regfull - (append (cdr reg) (list hed)) - (append reg (list hed))) - reruns))) - - ;; At this point hed test registration must be completed. - ;; - ((eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f) - 'start) - (debug:print-info 0 *default-log-port* "Waiting on test registration(s): " - (string-intersperse - (filter (lambda (x) - (eq? (hash-table-ref/default test-registry x #f) 'start)) - (hash-table-keys test-registry)) - ", ")) - (thread-sleep! 0.051) - (list hed tal reg reruns)) - - ;; If no resources are available just kill time and loop again - ;; - ((not have-resources) ;; simply try again after waiting a second - (if (runs:lownoise "no resources" 60) - (debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ...")) - ;; Have gone back and forth on this but db starvation is an issue. - ;; wait one second before looking again to run jobs. - (thread-sleep! 1) - ;; could have done hed tal here but doing car/cdr of newtal to rotate tests - (list (car newtal)(cdr newtal) reg reruns)) - - ;; This is the final stage, everything is in place so launch the test - ;; - ((and have-resources - (or (null? prereqs-not-met) - (and (member 'toplevel testmode) ;; 'toplevel) - (null? non-completed) - (not (member 'exclusive testmode))))) - ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path)) - ;; we are going to reset all the counters for test retries by setting a new hash table - ;; this means they will increment only when nothing can be run - (set! *max-tries-hash* (make-hash-table)) - ;; well, first lets see if cpu load throttling is enabled. If so wait around until the - ;; average cpu load is under the threshold before continuing - (if maxload ;; only gate if maxload is specified - (common:wait-for-cpuload maxload numcpus waitdelay)) - (if maxhomehostload - (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload))) - - (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry) - (runs:incremental-print-results run-id) - (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) - (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) - ;; (thread-sleep! *global-delta*) - (if (or (not (null? tal))(not (null? reg))) - (list (runs:queue-next-hed tal reg reglen regfull) - (runs:queue-next-tal tal reg reglen regfull) - (runs:queue-next-reg tal reg reglen regfull) - reruns) - #f)) - - ;; must be we have unmet prerequisites - ;; - (else - (debug:print 4 *default-log-port* "FAILS: " fails) - ;; If one or more of the prereqs-not-met are FAIL then we can issue - ;; a message and drop hed from the items to be processed. - ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) - (if (and (not (null? prereqs-not-met)) - (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) - (debug:print-info 1 *default-log-port* "waiting on tests; " (string-intersperse - (runs:mixed-list-testname-and-testrec->list-of-strings - prereqs-not-met) ", "))) - (if (or (null? fails) - (member 'toplevel testmode)) - (begin - ;; couldn't run, take a breather - (if (runs:lownoise "Waiting for more work to do..." 60) - (debug:print-info 0 *default-log-port* "Waiting for more work to do...")) - (thread-sleep! 1) - (list (car newtal)(cdr newtal) reg reruns)) - ;; the waiton is FAIL so no point in trying to run hed ever again - (if (or (not (null? reg))(not (null? tal))) - (if (vector? hed) - (begin - (debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path - " from the launch list as it has prerequistes that are FAIL") - (let ((test-id (rmt:get-test-id run-id hed ""))) - (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))) - (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) - ;; (thread-sleep! *global-delta*) - ;; This next is for the items - (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) - (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed) - (list (runs:queue-next-hed tal reg reglen regfull) - (runs:queue-next-tal tal reg reglen regfull) - (runs:queue-next-reg tal reg reglen regfull) - reruns ;; WAS: (cons hed reruns) ;; but that makes no sense? - )) - (let ((nth-try (hash-table-ref/default test-registry hed 0))) - (cond - ((member "RUNNING" (map db:test-get-state prereqs-not-met)) - (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60) - (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet.")) - (thread-sleep! 4) - (list (runs:queue-next-hed newtal reg reglen regfull) - (runs:queue-next-tal newtal reg reglen regfull) - (runs:queue-next-reg newtal reg reglen regfull) - reruns)) - ((or (not nth-try) - (and (number? nth-try) - (< nth-try 10))) - (hash-table-set! test-registry hed (if (number? nth-try) - (+ nth-try 1) - 0)) - (if (runs:lownoise (conc "not removing test " hed) 60) - (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites")) - ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") - (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) - ;; (list hed tal reg reruns) - ;; (list (car newtal)(cdr newtal) reg reruns) - ;; (hash-table-set! test-registry hed 'removed) - (list (runs:queue-next-hed newtal reg reglen regfull) - (runs:queue-next-tal newtal reg reglen regfull) - (runs:queue-next-reg newtal reg reglen regfull) - reruns)) - ((symbol? nth-try) - (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW - (if (null? tal) - #f ;; yes, really - (list (car tal)(cdr tal) reg reruns)) - (begin - (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) - (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry.")) - (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f) - (hash-table-set! test-registry hed 0) - (list (runs:queue-next-hed newtal reg reglen regfull) - (runs:queue-next-tal newtal reg reglen regfull) - (runs:queue-next-reg newtal reg reglen regfull) - reruns)))) - (else - (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60) - (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")) - ;; (debug:print 0 *default-log-port* " prereqs: " prereqs-not-met) - (hash-table-set! test-registry hed 'removed) - (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) - ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. - (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL - (list (if (null? tal)(car newtal)(car tal)) - tal - reg - reruns))))) - ;; can't drop this - maybe running? Just keep trying - (let ((runable-tests (runs:runable-tests prereqs-not-met))) - (if (null? runable-tests) - #f ;; I think we are truly done here - (list (runs:queue-next-hed newtal reg reglen regfull) - (runs:queue-next-tal newtal reg reglen regfull) - (runs:queue-next-reg newtal reg reglen regfull) - reruns))))))))) DELETED emergency-patch-3.scm Index: emergency-patch-3.scm ================================================================== --- emergency-patch-3.scm +++ /dev/null @@ -1,81 +0,0 @@ - ;; To build patch: - ;;;;;;;;;;;;;;;;;;;;;;;;; - ;; ldd /p/foundry/env/pkgs/megatest/1.64/19/bin/.11/mtest - ;; linux-vdso.so.1 => (0x00002aaaaaaab000) - ;; libchicken.so.7 => /p/foundry/env/pkgs/megatest/1.64/chicken-4.10.0//lib/libchicken.so.7 (0x00002aaaaaaad000) - ;; libm.so.6 => /lib64/libm.so.6 (0x00002aaaab0a6000) - ;; libdl.so.2 => /lib64/libdl.so.2 (0x00002aaaab31f000) - ;; libc.so.6 => /lib64/libc.so.6 (0x00002aaaab523000) - ;; /lib64/ld-linux-x86-64.so.2 (0x0000555555554000) - ;; - ;; /p/foundry/env/pkgs/megatest/1.64/chicken-4.10.0/bin/csc -s emergency-patch-3.scm - ;; - - - ;; to test patch: - ;;;;;;;;;;;;;;;;;;;;;;;;; - ;; in .megatestrc, add: - ;; (if (and (> megatest-version 1.64) - ;; (< megatest-version 1.6421)) - ;; (begin - ;; (load "/p/foundry/env/pkgs/megatest/1.64/19/share/epatch-1.so") - ;; (load "/p/foundry/env/pkgs/megatest/1.64/19/share/epatch-2.so"))) - ;; - - - ;; to productize patch: - ;;;;;;;;;;;;;;;;;;;;;;;;; - ;; -(use directory-utils regex) - -(include "common_records.scm") -(include "key_records.scm") -(include "db_records.scm") -(include "run_records.scm") -(include "test_records.scm") - -;; Given a run id start a server process ### NOTE ### > file 2>&1 -;; if the run-id is zero and the target-host is set -;; try running on that host -;; incidental: rotate logs in logs/ dir. -;; -(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area - (let* ((curr-host (get-host-name)) - ;; (attempt-in-progress (server:start-attempted? areapath)) - ;; (dot-server-url (server:check-if-running areapath)) - (curr-ip (server:get-best-guess-address curr-host)) - (curr-pid (current-process-id)) - (homehost (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" )) - (target-host (car homehost)) - (testsuite (common:get-testsuite-name)) - (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) - (cmdln (conc (common:get-megatest-exe) - " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") - " -daemonize " - "") - ;; " -log " logfile - " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &"))))) - (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) - (load-limit (configf:lookup-number *configdat* "server" "load-limit" default: 0.9))) - ;; we want the remote server to start in *toppath* so push there - (push-directory areapath) - (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") - (thread-start! log-rotate) - - ;; host.domain.tld match host? - (if (and target-host - ;; look at target host, is it host.domain.tld or ip address and does it - ;; match current ip or hostname - (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) - (not (equal? curr-ip target-host))) - (begin - (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) - (setenv "TARGETHOST" target-host))) - - (setenv "TARGETHOST_LOGF" logfile) - (common:wait-for-normalized-load load-limit " delaying server start due to load" remote-host: (get-environment-variable "TARGETHOST")) ;; do not try starting servers on an already overloaded machine, just wait forever - (system (conc "nbfake " cmdln)) - (unsetenv "TARGETHOST_LOGF") - (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) - (thread-join! log-rotate) - (pop-directory))) ADDED emergency-patches/emergency-patch-1.scm Index: emergency-patches/emergency-patch-1.scm ================================================================== --- /dev/null +++ emergency-patches/emergency-patch-1.scm @@ -0,0 +1,203 @@ + + +;; These are called by the server on recipt of /api calls +;; - keep it simple, only return the actual result of the call, i.e. no meta info here +;; +;; - returns #( flag result ) +;; +(define (api:execute-requests dbstruct dat) + (handle-exceptions + exn + (let ((call-chain (get-call-chain))) + (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat) + (print-call-chain (current-error-port)) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens + (cond + ((not (vector? dat)) ;; it is an error to not receive a vector + (vector #f (vector #f "remote must be called with a vector"))) + ((> *api-process-request-count* 20) ;; 20) + (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.") + (set! *server-overloaded* #t) + (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor! + (else + (let* ((cmd-in (vector-ref dat 0)) + (cmd (if (symbol? cmd-in) + cmd-in + (string->symbol cmd-in))) + (params (vector-ref dat 1)) + (start-t (current-milliseconds)) + (readonly-mode (dbr:dbstruct-read-only dbstruct)) + (readonly-command (member cmd api:read-only-queries)) + (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))) + (res + (if writecmd-in-readonly-mode + (conc "attempt to run write command "cmd" on a read-only database") + (case cmd + ;;=============================================== + ;; READ/WRITE QUERIES + ;;=============================================== + + ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl + + ;; SERVERS + ((start-server) (apply server:kind-run params)) + ((kill-server) (set! *server-run* #f)) + + ;; TESTS + + ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) + ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items. + ((test-set-state-status-by-id) + + ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) + (db:set-state-status-and-roll-up-items + dbstruct + (list-ref params 0) ; run-id + (list-ref params 1) ; test-name + #f ; item-path + (list-ref params 2) ; state + (list-ref params 3) ; status + (list-ref params 4) ; comment + )) + + ((delete-test-records) (apply db:delete-test-records dbstruct params)) + ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) + ((test-set-state-status) (apply db:test-set-state-status dbstruct params)) + ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) + ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params)) + ((top-test-set-per-pf-counts) (apply db:top-test-set-per-pf-counts dbstruct params)) + ((test-set-archive-block-id) (apply db:test-set-archive-block-id dbstruct params)) + + ;; RUNS + ((register-run) (apply db:register-run dbstruct params)) + ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params)) + ((delete-run) (apply db:delete-run dbstruct params)) + ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) + ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) + ((update-run-stats) (apply db:update-run-stats dbstruct params)) + ((set-var) (apply db:set-var dbstruct params)) + ((del-var) (apply db:del-var dbstruct params)) + + ;; STEPS + ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) + + ;; TEST DATA + ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) + ((csv->test-data) (apply db:csv->test-data dbstruct params)) + + ;; MISC + ((sync-inmem->db) (let ((run-id (car params))) + (db:sync-touched dbstruct run-id force-sync: #t))) + ((mark-incomplete) (apply db:find-and-mark-incomplete dbstruct params)) + + ;; TESTMETA + ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params)) + ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params)) + ((get-tests-tags) (db:get-tests-tags dbstruct)) + + ;; TASKS + ((tasks-add) (apply tasks:add dbstruct params)) + ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params)) + ((tasks-get-last) (apply tasks:get-last dbstruct params)) + + ;; NO SYNC DB + ((no-sync-set) (apply db:no-sync-set *no-sync-db* params)) + ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params)) + ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params)) + + ;; ARCHIVES + ;; ((archive-get-allocations) + ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) + ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) + ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey)) + + ;;====================================================================== + ;; READ ONLY QUERIES + ;;====================================================================== + + ;; KEYS + ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) + ((get-keys) (db:get-keys dbstruct)) + ((get-key-vals) (apply db:get-key-vals dbstruct params)) + ((get-target) (apply db:get-target dbstruct params)) + ((get-targets) (db:get-targets dbstruct)) + + ;; ARCHIVES + ((test-get-archive-block-info) (apply db:test-get-archive-block-info dbstruct params)) + + ;; TESTS + ((test-toplevel-num-items) (apply db:test-toplevel-num-items dbstruct params)) + ((get-test-info-by-id) (apply db:get-test-info-by-id dbstruct params)) + ((test-get-rundir-from-test-id) (apply db:test-get-rundir-from-test-id dbstruct params)) + ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params)) + ((get-count-tests-running) (apply db:get-count-tests-running dbstruct params)) + ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params)) + ;; ((delete-test-step-records) (apply db:delete-test-step-records dbstruct params)) + ((get-previous-test-run-record) (apply db:get-previous-test-run-record dbstruct params)) + ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params)) + ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params)) + ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params)) + ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params)) + ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params)) + ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params)) + ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params)) + ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params)) + ((synchash-get) (apply synchash:server-get dbstruct params)) + ((get-raw-run-stats) (apply db:get-raw-run-stats dbstruct params)) + + ;; RUNS + ((get-run-info) (apply db:get-run-info dbstruct params)) + ((get-run-status) (apply db:get-run-status dbstruct params)) + ((set-run-status) (apply db:set-run-status dbstruct params)) + ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params)) + ((get-test-id) (apply db:get-test-id dbstruct params)) + ((get-tests-for-run-mindata) (apply db:get-tests-for-run-mindata dbstruct params)) + ((get-runs) (apply db:get-runs dbstruct params)) + ((get-num-runs) (apply db:get-num-runs dbstruct params)) + ((get-all-run-ids) (db:get-all-run-ids dbstruct)) + ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) + ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) + ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) + ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params)) + ((get-main-run-stats) (apply db:get-main-run-stats dbstruct params)) + ((get-var) (apply db:get-var dbstruct params)) + ((get-run-stats) (apply db:get-run-stats dbstruct params)) + + ;; STEPS + ((get-steps-data) (apply db:get-steps-data dbstruct params)) + ((get-steps-for-test) (apply db:get-steps-for-test dbstruct params)) + + ;; TEST DATA + ((read-test-data) (apply db:read-test-data dbstruct params)) + ((read-test-data*) (apply db:read-test-data* dbstruct params)) + + ;; MISC + ((get-latest-host-load) (apply db:get-latest-host-load dbstruct params)) + ((have-incompletes?) (apply db:have-incompletes? dbstruct params)) + ((login) (apply db:login dbstruct params)) + ((general-call) (let ((stmtname (car params)) + (run-id (cadr params)) + (realparams (cddr params))) + (db:general-call dbstruct stmtname realparams))) + ((sdb-qry) (apply sdb:qry params)) + ((ping) (current-process-id)) + ((get-changed-record-ids) (apply db:get-changed-record-ids dbstruct params)) + + ;; TESTMETA + ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) + + ;; TASKS + ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params)) + (else + (debug:print 0 *default-log-port* "ERROR: bad api call " cmd) + (conc "ERROR: BAD api call " cmd)))))) + + ;; save all stats + (let ((delta-t (- (current-milliseconds) + start-t))) + (hash-table-set! *db-api-call-time* cmd + (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '())))) + (if writecmd-in-readonly-mode + (vector #f res) + (vector #t res))))))) ADDED emergency-patches/emergency-patch-2.scm Index: emergency-patches/emergency-patch-2.scm ================================================================== --- /dev/null +++ emergency-patches/emergency-patch-2.scm @@ -0,0 +1,311 @@ +(include "common_records.scm") +(include "key_records.scm") +(include "db_records.scm") +(include "run_records.scm") +(include "test_records.scm") + +(define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f)) + (let* ((loadavg (common:get-cpu-load remote-host)) + (first (car loadavg)) + (next (cadr loadavg)) + (adjload (* maxload numcpus)) + (loadjmp (- first next))) + (cond + ((and (> first adjload) + (> count 0)) + (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load " first " exceeding max of " adjload " " (if msg msg "")) + (thread-sleep! waitdelay) + (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1))) + ((and (> loadjmp numcpus) + (> count 0)) + (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg "")) + (thread-sleep! waitdelay) + (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1)))))) + +(define (common:wait-for-homehost-load maxload msg) + (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. + #f + (common:get-homehost))) + (hh (if hh-dat (car hh-dat) #f)) + (numcpus (common:get-num-cpus hh))) + (common:wait-for-normalized-load maxload msg: msg remote-host: hh))) + +;; wait for normalized cpu load to drop below maxload +;; +(define (common:wait-for-normalized-load maxload #!key (msg #f)(remote-host #f)) + (let ((num-cpus (common:get-num-cpus remote-host))) + (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host))) + +;; hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps) +(define (runs:process-expanded-tests runsdat testdat) + ;; unroll the contents of runsdat and testdat (due to ongoing refactoring). + (let* ((hed (runs:testdat-hed testdat)) + (tal (runs:testdat-tal testdat)) + (reg (runs:testdat-reg testdat)) + (reruns (runs:testdat-reruns testdat)) + (test-name (runs:testdat-test-name testdat)) + (item-path (runs:testdat-item-path testdat)) + (jobgroup (runs:testdat-jobgroup testdat)) + (waitons (runs:testdat-waitons testdat)) + (item-path (runs:testdat-item-path testdat)) + (testmode (runs:testdat-testmode testdat)) + (newtal (runs:testdat-newtal testdat)) + (itemmaps (runs:testdat-itemmaps testdat)) + (test-record (runs:testdat-test-record testdat)) + (prereqs-not-met (runs:testdat-prereqs-not-met testdat)) + + (reglen (runs:dat-reglen runsdat)) + (regfull (runs:dat-regfull runsdat)) + (runname (runs:dat-runname runsdat)) + (max-concurrent-jobs (runs:dat-max-concurrent-jobs runsdat)) + (run-id (runs:dat-run-id runsdat)) + (test-patts (runs:dat-test-patts runsdat)) + (required-tests (runs:dat-required-tests runsdat)) + (test-registry (runs:dat-test-registry runsdat)) + (registry-mutex (runs:dat-registry-mutex runsdat)) + (flags (runs:dat-flags runsdat)) + (keyvals (runs:dat-keyvals runsdat)) + (run-info (runs:dat-run-info runsdat)) + (all-tests-registry (runs:dat-all-tests-registry runsdat)) + (run-limits-info (runs:dat-can-run-more-tests runsdat)) + ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running + (have-resources (car run-limits-info)) + (num-running (list-ref run-limits-info 1)) + (num-running-in-jobgroup(list-ref run-limits-info 2)) + (max-concurrent-jobs (list-ref run-limits-info 3)) + (job-group-limit (list-ref run-limits-info 4)) + ;; (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) + ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) + (fails (if (list? prereqs-not-met) + (runs:calc-fails prereqs-not-met) + (begin + (debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met) + '()))) + (non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed! + (not (equal? x hed))) + (runs:calc-not-completed prereqs-not-met))) + (loop-list (list hed tal reg reruns)) + ;; configure the load runner + (numcpus (common:get-num-cpus #f)) + (maxload (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0"))) ;; use a non-number string to disable + (maxhomehostload (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "1.2"))) ;; use a non-number string to disable + (waitdelay (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60")))) + (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" + (string-intersperse + (map (lambda (t) + (if (vector? t) + (conc (db:test-get-state t) "/" (db:test-get-status t)) + (conc " WARNING: t is not a vector=" t ))) + prereqs-not-met) + ", ") ") fails: " fails + "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) + + + + (if (and (not (null? prereqs-not-met)) + (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) + (debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", "))) + + ;; Don't know at this time if the test have been launched at some time in the past + ;; i.e. is this a re-launch? + (debug:print-info 4 *default-log-port* "run-limits-info = " run-limits-info) + + (cond + + ;; Check item path against item-patts, + ;; + ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run + ;; else the run is stuck, temporarily or permanently + ;; but should check if it is due to lack of resources vs. prerequisites + (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) + (if (or (not (null? tal))(not (null? reg))) + (list (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns) + #f)) + + ;; Register tests + ;; + ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)) + (debug:print-info 4 *default-log-port* "Pre-registering test " test-name "/" item-path " to create placeholder" ) + ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs + (let register-loop ((numtries 15)) + (rmt:register-test run-id test-name item-path) + (if (rmt:get-test-id run-id test-name item-path) + (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done) + (if (> numtries 0) + (begin + (thread-sleep! 0.5) + (register-loop (- numtries 1))) + (debug:print-error 0 *default-log-port* "failed to register test " (db:test-make-full-name test-name item-path))))) + (if (not (eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f) 'done)) + (begin + (rmt:register-test run-id test-name "") + (if (rmt:get-test-id run-id test-name "") + (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done)))) + (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) + (if (and (null? tal)(null? reg)) + (list hed tal (append reg (list hed)) reruns) + (list (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + ;; NB// Here we are building reg as we register tests + ;; if regfull we must pop the front item off reg + (if regfull + (append (cdr reg) (list hed)) + (append reg (list hed))) + reruns))) + + ;; At this point hed test registration must be completed. + ;; + ((eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f) + 'start) + (debug:print-info 0 *default-log-port* "Waiting on test registration(s): " + (string-intersperse + (filter (lambda (x) + (eq? (hash-table-ref/default test-registry x #f) 'start)) + (hash-table-keys test-registry)) + ", ")) + (thread-sleep! 0.051) + (list hed tal reg reruns)) + + ;; If no resources are available just kill time and loop again + ;; + ((not have-resources) ;; simply try again after waiting a second + (if (runs:lownoise "no resources" 60) + (debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ...")) + ;; Have gone back and forth on this but db starvation is an issue. + ;; wait one second before looking again to run jobs. + (thread-sleep! 1) + ;; could have done hed tal here but doing car/cdr of newtal to rotate tests + (list (car newtal)(cdr newtal) reg reruns)) + + ;; This is the final stage, everything is in place so launch the test + ;; + ((and have-resources + (or (null? prereqs-not-met) + (and (member 'toplevel testmode) ;; 'toplevel) + (null? non-completed) + (not (member 'exclusive testmode))))) + ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path)) + ;; we are going to reset all the counters for test retries by setting a new hash table + ;; this means they will increment only when nothing can be run + (set! *max-tries-hash* (make-hash-table)) + ;; well, first lets see if cpu load throttling is enabled. If so wait around until the + ;; average cpu load is under the threshold before continuing + (if maxload ;; only gate if maxload is specified + (common:wait-for-cpuload maxload numcpus waitdelay)) + (if maxhomehostload + (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload))) + + (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry) + (runs:incremental-print-results run-id) + (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) + (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! *global-delta*) + (if (or (not (null? tal))(not (null? reg))) + (list (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns) + #f)) + + ;; must be we have unmet prerequisites + ;; + (else + (debug:print 4 *default-log-port* "FAILS: " fails) + ;; If one or more of the prereqs-not-met are FAIL then we can issue + ;; a message and drop hed from the items to be processed. + ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) + (if (and (not (null? prereqs-not-met)) + (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) + (debug:print-info 1 *default-log-port* "waiting on tests; " (string-intersperse + (runs:mixed-list-testname-and-testrec->list-of-strings + prereqs-not-met) ", "))) + (if (or (null? fails) + (member 'toplevel testmode)) + (begin + ;; couldn't run, take a breather + (if (runs:lownoise "Waiting for more work to do..." 60) + (debug:print-info 0 *default-log-port* "Waiting for more work to do...")) + (thread-sleep! 1) + (list (car newtal)(cdr newtal) reg reruns)) + ;; the waiton is FAIL so no point in trying to run hed ever again + (if (or (not (null? reg))(not (null? tal))) + (if (vector? hed) + (begin + (debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path + " from the launch list as it has prerequistes that are FAIL") + (let ((test-id (rmt:get-test-id run-id hed ""))) + (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))) + (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! *global-delta*) + ;; This next is for the items + (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) + (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed) + (list (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns ;; WAS: (cons hed reruns) ;; but that makes no sense? + )) + (let ((nth-try (hash-table-ref/default test-registry hed 0))) + (cond + ((member "RUNNING" (map db:test-get-state prereqs-not-met)) + (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60) + (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet.")) + (thread-sleep! 4) + (list (runs:queue-next-hed newtal reg reglen regfull) + (runs:queue-next-tal newtal reg reglen regfull) + (runs:queue-next-reg newtal reg reglen regfull) + reruns)) + ((or (not nth-try) + (and (number? nth-try) + (< nth-try 10))) + (hash-table-set! test-registry hed (if (number? nth-try) + (+ nth-try 1) + 0)) + (if (runs:lownoise (conc "not removing test " hed) 60) + (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites")) + ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") + (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) + ;; (list hed tal reg reruns) + ;; (list (car newtal)(cdr newtal) reg reruns) + ;; (hash-table-set! test-registry hed 'removed) + (list (runs:queue-next-hed newtal reg reglen regfull) + (runs:queue-next-tal newtal reg reglen regfull) + (runs:queue-next-reg newtal reg reglen regfull) + reruns)) + ((symbol? nth-try) + (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW + (if (null? tal) + #f ;; yes, really + (list (car tal)(cdr tal) reg reruns)) + (begin + (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60) + (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry.")) + (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f) + (hash-table-set! test-registry hed 0) + (list (runs:queue-next-hed newtal reg reglen regfull) + (runs:queue-next-tal newtal reg reglen regfull) + (runs:queue-next-reg newtal reg reglen regfull) + reruns)))) + (else + (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60) + (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now.")) + ;; (debug:print 0 *default-log-port* " prereqs: " prereqs-not-met) + (hash-table-set! test-registry hed 'removed) + (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f) + ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug. + (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL + (list (if (null? tal)(car newtal)(car tal)) + tal + reg + reruns))))) + ;; can't drop this - maybe running? Just keep trying + (let ((runable-tests (runs:runable-tests prereqs-not-met))) + (if (null? runable-tests) + #f ;; I think we are truly done here + (list (runs:queue-next-hed newtal reg reglen regfull) + (runs:queue-next-tal newtal reg reglen regfull) + (runs:queue-next-reg newtal reg reglen regfull) + reruns))))))))) ADDED emergency-patches/emergency-patch-3.scm Index: emergency-patches/emergency-patch-3.scm ================================================================== --- /dev/null +++ emergency-patches/emergency-patch-3.scm @@ -0,0 +1,81 @@ + ;; To build patch: + ;;;;;;;;;;;;;;;;;;;;;;;;; + ;; ldd /p/foundry/env/pkgs/megatest/1.64/19/bin/.11/mtest + ;; linux-vdso.so.1 => (0x00002aaaaaaab000) + ;; libchicken.so.7 => /p/foundry/env/pkgs/megatest/1.64/chicken-4.10.0//lib/libchicken.so.7 (0x00002aaaaaaad000) + ;; libm.so.6 => /lib64/libm.so.6 (0x00002aaaab0a6000) + ;; libdl.so.2 => /lib64/libdl.so.2 (0x00002aaaab31f000) + ;; libc.so.6 => /lib64/libc.so.6 (0x00002aaaab523000) + ;; /lib64/ld-linux-x86-64.so.2 (0x0000555555554000) + ;; + ;; /p/foundry/env/pkgs/megatest/1.64/chicken-4.10.0/bin/csc -s emergency-patch-3.scm + ;; + + + ;; to test patch: + ;;;;;;;;;;;;;;;;;;;;;;;;; + ;; in .megatestrc, add: + ;; (if (and (> megatest-version 1.64) + ;; (< megatest-version 1.6421)) + ;; (begin + ;; (load "/p/foundry/env/pkgs/megatest/1.64/19/share/epatch-1.so") + ;; (load "/p/foundry/env/pkgs/megatest/1.64/19/share/epatch-2.so"))) + ;; + + + ;; to productize patch: + ;;;;;;;;;;;;;;;;;;;;;;;;; + ;; +(use directory-utils regex) + +(include "common_records.scm") +(include "key_records.scm") +(include "db_records.scm") +(include "run_records.scm") +(include "test_records.scm") + +;; Given a run id start a server process ### NOTE ### > file 2>&1 +;; if the run-id is zero and the target-host is set +;; try running on that host +;; incidental: rotate logs in logs/ dir. +;; +(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area + (let* ((curr-host (get-host-name)) + ;; (attempt-in-progress (server:start-attempted? areapath)) + ;; (dot-server-url (server:check-if-running areapath)) + (curr-ip (server:get-best-guess-address curr-host)) + (curr-pid (current-process-id)) + (homehost (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" )) + (target-host (car homehost)) + (testsuite (common:get-testsuite-name)) + (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) + (cmdln (conc (common:get-megatest-exe) + " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") + " -daemonize " + "") + ;; " -log " logfile + " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &"))))) + (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) + (load-limit (configf:lookup-number *configdat* "server" "load-limit" default: 0.9))) + ;; we want the remote server to start in *toppath* so push there + (push-directory areapath) + (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") + (thread-start! log-rotate) + + ;; host.domain.tld match host? + (if (and target-host + ;; look at target host, is it host.domain.tld or ip address and does it + ;; match current ip or hostname + (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) + (not (equal? curr-ip target-host))) + (begin + (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) + (setenv "TARGETHOST" target-host))) + + (setenv "TARGETHOST_LOGF" logfile) + (common:wait-for-normalized-load load-limit " delaying server start due to load" remote-host: (get-environment-variable "TARGETHOST")) ;; do not try starting servers on an already overloaded machine, just wait forever + (system (conc "nbfake " cmdln)) + (unsetenv "TARGETHOST_LOGF") + (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) + (thread-join! log-rotate) + (pop-directory))) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -305,11 +305,11 @@ exn (begin (print-call-chain *default-log-port*) (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn))) (close-connection! api-dat) - (close-idle-connections!) + ;;(close-idle-connections!) #t)) #f))) (define (make-http-transport:server-dat)(make-vector 6)) @@ -503,39 +503,48 @@ (server-start (conc tmp-area "/.server-start")) (server-started (conc tmp-area "/.server-started")) (start-time (common:lazy-modification-time server-start)) (started-time (common:lazy-modification-time server-started)) (server-starting (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting - (start-time-old (> (- (current-seconds) start-time) 5))) + (start-time-old (> (- (current-seconds) start-time) 5)) + (cleanup-proc (lambda (msg) + (let* ((serv-fname (conc "server-" (current-process-id) "-" (get-host-name) ".log")) + (full-serv-fname (conc *toppath* "/logs/" serv-fname)) + (new-serv-fname (conc *toppath* "/logs/" "defunct-" serv-fname))) + (debug:print 0 *default-log-port* msg) + (if (common:file-exists? full-serv-fname) + (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname)) + (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname)) + (exit))))) (if (and (not start-time-old) ;; last server start try was less than five seconds ago (not server-starting)) (begin - (debug:print-info 0 *default-log-port* "NOT starting server, there is either a recently started server or a server in process of starting") - (exit)))) - ;; lets not even bother to start if there are already three or more server files ready to go - (let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) - (if (> num-alive 3) - (begin - (debug:print 0 *default-log-port* "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up") - (exit)))) - (let* ((th2 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server run thread started") - (http-transport:run - (if (args:get-arg "-server") - (args:get-arg "-server") - "-") - )) "Server run")) - (th3 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server monitor thread started") - (http-transport:keep-running) - "Keep running")))) - (thread-start! th2) - (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. - (thread-start! th3) - (set! *didsomething* #t) - (thread-join! th2) - (exit))) + (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting") + (exit))) + ;; lets not even bother to start if there are already three or more server files ready to go + (let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) + (if (> num-alive 3) + (begin + (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")) + (exit)))) + (let* ((th2 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server run thread started") + (http-transport:run + (if (args:get-arg "-server") + (args:get-arg "-server") + "-") + )) "Server run")) + (th3 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server monitor thread started") + (http-transport:keep-running) + "Keep running")))) + (thread-start! th2) + (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. + (thread-start! th3) + (set! *didsomething* #t) + (thread-join! th2) + (exit)))) ;; (define (http-transport:server-signal-handler signum) ;; (signal-mask! signum) ;; (handle-exceptions ;; exn Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -945,12 +945,24 @@ (read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ... sections: sections))) (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) (mtcachef (car cachefiles)) (rccachef (cdr cachefiles))) - (if rccachef (configf:write-alist runconfigdat rccachef)) - (if mtcachef (configf:write-alist *configdat* mtcachef)) + ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342 + ;; TODO - consider 1) using simple-lock to bracket cache write + ;; 2) cache in hash on server, since need to do rmt: anyway to lock. + + (if rccachef + (common:fail-safe + (lambda () + (configf:write-alist runconfigdat rccachef)) + (conc "Could not write cache file - "rccachef))) + (if mtcachef + (common:fail-safe + (lambda () + (configf:write-alist *configdat* mtcachef)) + (conc "Could not write cache file - "mtcachef))) (set! *runconfigdat* runconfigdat) (if (and rccachef mtcachef) (set! *configstatus* 'fulldata)))) ;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table (set! *configdat* (make-hash-table)) ))) @@ -1015,12 +1027,26 @@ ;; one more attempt to cache the configs for future reading (let* ((cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) (mtcachef (car cachefiles)) (rccachef (cdr cachefiles))) - (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef))) (configf:write-alist *runconfigdat* rccachef)) - (if (and mtcachef *configdat* (not (common:file-exists? mtcachef))) (configf:write-alist *configdat* mtcachef)) + + ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342 + ;; TODO - consider 1) using simple-lock to bracket cache write + ;; 2) cache in hash on server, since need to do rmt: anyway to lock. + (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef))) + (common:fail-safe + (lambda () + (configf:write-alist *runconfigdat* rccachef)) + (conc "Could not write cache file - "rccachef)) + ) + (if (and mtcachef *configdat* (not (common:file-exists? mtcachef))) + (common:fail-safe + (lambda () + (configf:write-alist *configdat* mtcachef)) + (conc "Could not write cache file - "mtcachef)) + ) (if (and rccachef mtcachef *runconfigdat* *configdat*) (set! *configstatus* 'fulldata))) ;; if have -append-config then read and append here (let ((cfname (args:get-arg "-append-config"))) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -52,46 +52,46 @@ (include "db_records.scm") (include "run_records.scm") (include "megatest-fossil-hash.scm") (define *usage-log-file* #f) ;; put path to file for logging usage in this var in the ~/.megatestrc file -(define *usage-use-seconds* #f) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file +(define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file ;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file ;; (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc"))) (if (common:file-exists? debugcontrolf) (load debugcontrolf))) ;; usage logging, careful with this, it is not designed to deal with all real world challenges! ;; -(if (and (common:file-exists? *usage-log-file*) - (file-write-access? *usage-log-file*)) - (with-output-to-file - *usage-log-file* - (lambda () - (print - (if *usage-use-seconds* - (current-seconds) - (time->string - (seconds->local-time (current-seconds)) - "%Yww%V.%w %H:%M:%S")) - " " - (current-user-name) " " - (current-directory) " " - "\"" (string-intersperse (argv) " ") "\"")) - #:append)) +(if (and *usage-log-file* + (file-write-access? *usage-log-file*)) + (with-output-to-file + *usage-log-file* + (lambda () + (print + (if *usage-use-seconds* + (current-seconds) + (time->string + (seconds->local-time (current-seconds)) + "%Yww%V.%w %H:%M:%S")) + " " + (current-user-name) " " + (current-directory) " " + "\"" (string-intersperse (argv) " ") "\"")) + #:append)) ;; Disabled help items ;; -rollup : (currently disabled) fill run (set by :runname) with latest test(s) ;; from prior runs with same keys ;; -daemonize : fork into background and disconnect from stdin/out (define help (conc " Megatest, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version " - license GPL, Copyright Matt Welland 2006-2015 + license GPL, Copyright Matt Welland 2006-2017 Usage: megatest [options] -h : this help -manual : show the Megatest user manual -version : print megatest version (currently " megatest-version ") Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -117,11 +117,11 @@ " -daemonize " "") ;; " -log " logfile " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &"))))) (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) - (load-limit (configf:lookup-number *configdat* "server" "load-limit" default: 0.9))) + (load-limit (configf:lookup-number *configdat* "jobtools" "maxhomehostload" default: 3.0))) ;; we want the remote server to start in *toppath* so push there (push-directory areapath) (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") (thread-start! log-rotate) @@ -249,11 +249,11 @@ (mod-time (list-ref rec 0))) ;; (print "start-time: " start-time " mod-time: " mod-time) (and start-time mod-time (> (- now start-time) 0) ;; been running at least 0 seconds (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds - (< (- now start-time) + (< (- now start-time) (+ (- (string->number (or (configf:lookup *configdat* "server" "runtime") "3600")) 180) (random 360))) ;; under one hour running time +/- 180 )) #f)) @@ -448,11 +448,11 @@ ;; timeout is hms string: 1h 5m 3s, default is 1 minute ;; (define (server:expiration-timeout) (let ((tmo (configf:lookup *configdat* "server" "timeout"))) (if (and (string? tmo) - (common:hms-string->seconds tmo)) + (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below (* 3600 (string->number tmo)) 60))) (define (server:get-best-guess-address hostname) (let ((res #f)) @@ -470,10 +470,11 @@ ;; moving this here as it needs access to db and cannot be in common. ;; (define (server:writable-watchdog dbstruct) (thread-sleep! 0.05) ;; delay for startup (let ((legacy-sync (common:run-sync?)) + (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) (debug-mode (debug:debug-mode 1)) (last-time (current-seconds)) (no-sync-db (db:open-no-sync-db)) (sync-duration 0) ;; run time of the sync in milliseconds (this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))) @@ -492,12 +493,13 @@ ;; sync for filesystem local db writes ;; (mutex-lock! *db-multi-sync-mutex*) (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write (sync-in-progress *db-sync-in-progress*) + (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5)) (should-sync (and (not *time-to-exit*) - (> (- (current-seconds) *db-last-sync*) 5))) ;; sync every five seconds minimum, deprecated logic, can probably be removed + (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed (start-time (current-seconds)) (cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f))) (mt-mod-time (file-modification-time mtpath)) (last-sync-start (if (common:file-exists? start-file) (file-modification-time start-file) @@ -507,13 +509,14 @@ 10)) (sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period (recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db! (< mt-mod-time last-sync-start))) (sync-done (<= last-sync-start last-sync-end)) + (sync-stale (> start-time (+ last-sync-start sync-stale-seconds))) (will-sync (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting (or need-sync should-sync) - sync-done + (or sync-done sync-stale) (not sync-in-progress) (not recently-synced)))) (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync " sync-done=" sync-done " sync-period=" sync-period) @@ -523,47 +526,49 @@ ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) (if will-sync (set! *db-sync-in-progress* #t)) (mutex-unlock! *db-multi-sync-mutex*) (if will-sync - (let ((sync-start (current-milliseconds))) + (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK! + (sync-start (current-milliseconds))) (with-output-to-file start-file (lambda ()(print (current-process-id)))) ;; put lock here - (if (< sync-duration 3000) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally + ;; (if (or (not max-sync-duration) + ;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally (let ((res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive (set! sync-duration (- (current-milliseconds) sync-start)) (if (> res 0) ;; some records were transferred, keep the db alive (begin (mutex-lock! *heartbeat-mutex*) (set! *db-last-access* (current-seconds)) (mutex-unlock! *heartbeat-mutex*) (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) - (debug:print-info 2 *default-log-port* "sync called but zero records transferred"))) - ;; TODO: factor this next routine out into a function - (with-input-from-pipe ;; this should not block other threads but need to verify this - (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*) - (lambda () - (let loop ((inl (read-line)) - (res #f)) - (if (eof-object? inl) - (begin - (set! sync-duration (- (current-milliseconds) sync-start)) - (cond - ((not res) - (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\"")) - ((> res 0) - (mutex-lock! *heartbeat-mutex*) - (set! *db-last-access* (current-seconds)) - (mutex-unlock! *heartbeat-mutex*)))) - (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl))) - (if matches - (string->number (cadr matches)) - #f)))) - (loop (read-line) - (or num-synced res)))))))))) + (debug:print-info 2 *default-log-port* "sync called but zero records transferred"))))) +;; ;; TODO: factor this next routine out into a function +;; (with-input-from-pipe ;; this should not block other threads but need to verify this +;; (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*) +;; (lambda () +;; (let loop ((inl (read-line)) +;; (res #f)) +;; (if (eof-object? inl) +;; (begin +;; (set! sync-duration (- (current-milliseconds) sync-start)) +;; (cond +;; ((not res) +;; (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\"")) +;; ((> res 0) +;; (mutex-lock! *heartbeat-mutex*) +;; (set! *db-last-access* (current-seconds)) +;; (mutex-unlock! *heartbeat-mutex*)))) +;; (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl))) +;; (if matches +;; (string->number (cadr matches)) +;; #f)))) +;; (loop (read-line) +;; (or num-synced res)))))))))) (if will-sync (begin (mutex-lock! *db-multi-sync-mutex*) (set! *db-sync-in-progress* #f) (set! *db-last-sync* start-time) Index: tcmt.scm ================================================================== --- tcmt.scm +++ tcmt.scm @@ -41,12 +41,13 @@ ;; ##teamcity[testStdOut name='suite.testName' out='text'] ;; ##teamcity[testStdErr name='suite.testName' out='error text'] ;; ##teamcity[testFailed name='suite.testName' message='failure message' details='message and stack trace'] ;; ##teamcity[testFinished name='suite.testName' duration='50'] ;; - -(define (print-changes-since data run-ids last-update tsname target runname) +;; flush; #f, normal call. #t, last call, print out something for NOT_STARTED, etc. +;; +(define (print-changes-since data run-ids last-update tsname target runname flowid flush) ;; (let ((now (current-seconds))) (handle-exceptions exn (begin (print-call-chain) (print "Error message: " ((condition-property-accessor 'exn 'message) exn))) (for-each @@ -63,18 +64,22 @@ (status (db:test-get-status testdat)) (duration (or (any->number (db:test-get-run_duration testdat)) 0)) (comment (db:test-get-comment testdat)) (logfile (db:test-get-final_logf testdat)) (prevstat (hash-table-ref/default data testn #f)) - (newstat (if (equal? state "RUNNING") - "RUNNING" - (if (equal? state "COMPLETED") - status - "UNK"))) - (cmtstr (if comment + (newstat (cond + ((equal? state "RUNNING") "RUNNING") + ((equal? state "COMPLETED") status) + (flush (conc state "/" status)) + (else "UNK"))) + (cmtstr (if (and (not flush) comment) (conc " message='" comment "' ") - " ")) + (if flush + (conc "message='Test ended in state/status=" state "/" status (if (string-match "^\\s*$" comment) + ", no Megatest comment found.' " + (conc ", Megatest comment='" comment "' "))) ;; special case, we are handling stragglers + " "))) (details (if (string-match ".*html$" logfile) (conc " details='" *toppath* "/lt/" target "/" runname "/" testname (if (equal? itempath "") "/" (conc "/" itempath "/")) logfile "' ") ""))) ;; (print "DEBUG: testn=" testn " state=" state " status=" status " prevstat=" prevstat " newstat=" newstat) @@ -81,35 +86,36 @@ (if (or (not prevstat) (not (equal? prevstat newstat))) (begin (case (string->symbol newstat) ((UNK) ) ;; do nothing - ((RUNNING) (print "##teamcity[testStarted name='" tctname "']")) - ((PASS SKIP WARN WAIVED) (print "##teamcity[testFinished name='" tctname "' duration='" (* 1e3 duration) "'" cmtstr details " ]")) + ((RUNNING) (print "##teamcity[testStarted name='" tctname "' flowId='" flowid "']")) + ((PASS SKIP WARN WAIVED) (print "##teamcity[testFinished name='" tctname "' duration='" (* 1e3 duration) "'" cmtstr details " flowId='" flowid "']")) (else - (print "##teamcity[testFailed name='" tctname "' " cmtstr details " ]"))) + (print "##teamcity[testFailed name='" tctname "' " cmtstr details " flowId='" flowid "']"))) (flush-output) (hash-table-set! data testn newstat))))) tests))) run-ids)) now)) (define (monitor pid) - (let ((run-ids #f) - (testdat (make-hash-table)) - (keys #f) - (last-update 0) - (target (or (args:get-arg "-target") - (args:get-arg "-reqtarg"))) - (runname (args:get-arg "-runname")) - (tsname #f)) + (let* ((run-ids #f) + (testdat (make-hash-table)) + (keys #f) + (last-update 0) + (target (or (args:get-arg "-target") + (args:get-arg "-reqtarg"))) + (runname (args:get-arg "-runname")) + (tsname #f) + (flowid (conc target "/" runname))) (if (and target runname) (begin (launch:setup) (set! keys (rmt:get-keys)))) (set! tsname (common:get-testsuite-name)) - (print "TCMT: for testsuite=" tsname " found runname=" runname ", target=" target ", keys=" keys " and successfully ran launch:setup.") + (print "TCMT: for testsuite=" tsname " found runname=" runname ", target=" target ", keys=" keys " and successfully ran launch:setup. Using " flowid " as the flowId.") (let loop () (handle-exceptions exn ;; (print "Process done.") (begin (print-call-chain) (print "Error message: " ((condition-property-accessor 'exn 'message) exn))) @@ -131,18 +137,20 @@ (run-ids-in (map (lambda (row) (db:get-value-by-header row header "id")) rows))) (set! run-ids run-ids-in))) ;; (print "TCMT: pidres=" pidres " exittype=" exittype " exitstatus=" exitstatus " run-ids=" run-ids) - (if keys - (set! last-update (print-changes-since testdat run-ids last-update tsname target runname))) (if (eq? pidres 0) (begin + (if keys + (set! last-update (print-changes-since testdat run-ids last-update tsname target runname flowid #f))) (thread-sleep! 3) (loop)) (begin ;; (print "TCMT: pidres=" pidres " exittype=" exittype " exitstatus=" exitstatus " run-ids=" run-ids) + (print "TCMT: processing any tests that did not formally complete.") + (print-changes-since testdat run-ids 0 tsname target runname flowid #t) ;; call in flush mode (print "TCMT: All done.") ))))))) ;; (if (not (eq? pidres 0)) ;; (not exitstatus)) ;; (begin Index: utils/Makefile.git.installall ================================================================== --- utils/Makefile.git.installall +++ utils/Makefile.git.installall @@ -47,11 +47,11 @@ PROXY= # http://code.call-cc.org/dev-snapshots/2015/06/07/chicken-4.10.0rc1.tar.gz # http://code.call-cc.org/releases/4.10.0/chicken-4.10.0.tar.gz # Select version of chicken, sqlite3 etc -CHICKEN_VERSION=4.10.1 +CHICKEN_VERSION=4.12.0rc2 SQLITE3_VERSION=3090200 # http://www.sqlite.org/2014/sqlite-autoconf-3080500.tar.gz # http://www.sqlite.org/2015/sqlite-autoconf-3081101.tar.gz # Override IUPBRANCH to use other than trunk IUPBRANCH=trunk @@ -174,10 +174,13 @@ cd chicken-core; pwd cd chicken-core; fossil open ../chicken-scheme.fossil cd chicken-core; fossil up 337f5be # wget http://code.call-cc.org/dev-snapshots/2015/08/29/chicken-4.10.1.tar.gz +chicken-4.12.0rc2.tar.gz : + wget https://code.call-cc.org/dev-snapshots/2017/02/06/chicken-4.12.0rc2.tar.gz + # git clone git://code.call-cc.org/chicken-core # git clone http://code.call-cc.org/git/chicken-core.git $(PRODCHICKEN)/bin/chicken : wget http://code.call-cc.org/dev-snapshots/2015/08/29/chicken-4.10.1.tar.gz Index: utils/Makefile.installall ================================================================== --- utils/Makefile.installall +++ utils/Makefile.installall @@ -45,11 +45,11 @@ # http://code.call-cc.org/dev-snapshots/2015/06/07/chicken-4.10.0rc1.tar.gz # http://code.call-cc.org/releases/4.10.0/chicken-4.10.0.tar.gz # Select version of chicken, sqlite3 etc # CHICKEN_VERSION=4.10.0 -CHICKEN_VERSION=4.11.0rc2 +CHICKEN_VERSION=4.11.0 SQLITE3_VERSION=3090200 # http://www.sqlite.org/2014/sqlite-autoconf-3080500.tar.gz # http://www.sqlite.org/2015/sqlite-autoconf-3081101.tar.gz # Override IUPBRANCH to use other than trunk IUPBRANCH=trunk @@ -56,11 +56,11 @@ IUPCONFIG=ubuntu-15.04.inc # iup-3.15 # Eggs to install (straightforward ones) EGGS=matchable readline apropos base64 regex-literals format regex-case test coops trace csv \ - dot-locking posix-utils posix-extras directory-utils hostinfo tcp-server rpc csv-xml fmt \ + dot-locking posix-utils posix-extras hostinfo tcp-server rpc csv-xml fmt \ json md5 awful http-client spiffy uri-common intarweb spiffy-request-vars pathname-expand \ spiffy-directory-listing ssax sxml-serializer sxml-modifications sql-de-lite \ srfi-19 refdb ini-file sparse-vectors z3 call-with-environment-variables hahn linenoise \ crypt parley @@ -97,11 +97,11 @@ else ARCHSIZE=64_ endif CSCLIBS=$(shell echo $(LD_LIBRARY_PATH) | sed 's/:/ -L/g') -CSC_OPTIONS="-I$(PREFIX)/include -L$(CSCLIBS) -C \"-fPIC\"" +CSC_OPTIONS="-I$(PREFIX)/include -L$(CSCLIBS) -C -fPIC" # CSC_OPTIONS=-I $(PREFIX)/include -L $(CSCLIBS) nogui : base mutils #all : nogui libiup $(PREFIX)/lib/sqlite3.so @@ -137,11 +137,12 @@ mkdir -p eggflags touch $(EGGFLAGS) # some setup stuff # -$(PREFIX)/setup-chicken4x.sh : $(EGGFLAGS) +#$(PREFIX)/setup-chicken4x.sh : $(EGGFLAGS) +$(PREFIX)/setup-chicken4x.sh : mkdir -p $(PREFIX) (echo 'export PATH=$(PREFIX)/bin:$$PATH' > $(PREFIX)/setup-chicken4x.sh) (echo "export LD_LIBRARY_PATH=$(LD_LIBRARY_PATH)" >> $(PREFIX)/setup-chicken4x.sh) $(PREFIX)/setup-chicken4x.csh : $(EGGFLAGS) @@ -149,35 +150,26 @@ (echo "setenv PATH $(PREFIX):'$$'PATH" > $(PREFIX)/setup-chicken4x.csh) (echo "setenv LD_LIBRARY_PATH $(LD_LIBRARY_PATH)" >> $(PREFIX)/setup-chicken4x.csh) # NOTE: the touch chicken-core/chicken.scm compensates for the time stamp from the tar file chicken-core/chicken.scm : chicken-$(CHICKEN_VERSION).tar.gz - tar xf chicken-$(CHICKEN_VERSION).tar.gz + tar xzf chicken-$(CHICKEN_VERSION).tar.gz ln -sf chicken-$(CHICKEN_VERSION) chicken-core if [[ -e chicken-core/chicken.scm ]];then touch chicken-core/chicken.scm;fi -chicken-4.9.0rc1.tar.gz : - wget http://code.call-cc.org/dev-snapshots/2014/04/17/chicken-4.9.0rc1.tar.gz - -chicken-4.9.0.1.tar.gz : - wget http://code.call-cc.org/releases/4.9.0/chicken-4.9.0.1.tar.gz - -chicken-4.10.0rc1.tar.gz : - wget http://code.call-cc.org/dev-snapshots/2015/06/07/chicken-4.10.0rc1.tar.gz - -chicken-4.10.0.tar.gz : - wget http://code.call-cc.org/releases/4.10.0/chicken-4.10.0.tar.gz - -chicken-4.11.0rc2.tar.gz : - wget http://code.call-cc.org/dev-snapshots/2016/04/28/chicken-4.11.0rc2.tar.gz +chicken-4.11.0.tar.gz : + wget http://code.call-cc.org/releases/4.11.0/chicken-4.11.0.tar.gz # git clone git://code.call-cc.org/chicken-core # git clone http://code.call-cc.org/git/chicken-core.git $(CHICKEN_INSTALL) : chicken-core/chicken.scm $(PREFIX)/setup-chicken4x.sh $(PREFIX)/setup-chicken4x.csh - cd chicken-core;make PLATFORM=linux PREFIX=$(PREFIX) - cd chicken-core;make PLATFORM=linux PREFIX=$(PREFIX) install + pwd; env; which make + cd chicken-core; make PLATFORM=linux PREFIX=$(PREFIX) + cd chicken-core; make PLATFORM=linux PREFIX=$(PREFIX) install + #cd chicken-core;env -i PATH=${PATH} LD_LIBRARY_PATH=${LD_LIBRARY_PATH} make PLATFORM=linux PREFIX=$(PREFIX) + #cd chicken-core;env -i PATH=${PATH} LD_LIBRARY_PATH=${LD_LIBRARY_PATH} make PLATFORM=linux PREFIX=$(PREFIX) install #====================================================================== # S Q L I T E 3 #====================================================================== # https://www.sqlite.org/2015/sqlite-autoconf-3090200.tar.gz @@ -235,11 +227,12 @@ $(PREFIX)/lib/chicken/8/margs.so : opensrc/margs/margs.scm cd opensrc/margs;chicken-install opensrc/histstore/hs : opensrc/histstore/histstore.scm chkn eggs $(CHICKEN_EGG_DIR)/sqlite3.so - cd opensrc/histstore;$(PREFIX)/bin/csc histstore.scm -o hs + env | grep CSC + cd opensrc/histstore; $(PREFIX)/bin/csc histstore.scm -o hs $(PREFIX)/bin/hs : opensrc/histstore/hs cp -f opensrc/histstore/hs $(PREFIX)/bin/hs # stml @@ -254,11 +247,11 @@ stml/requirements.scm : stml/requirements.scm.template cp stml/install.cfg.template stml/install.cfg cp stml/requirements.scm.template stml/requirements.scm $(PREFIX)/lib/chicken/8/stml.so : stml/requirements.scm - cd stml;make + cd stml; make #====================================================================== # F F C A L L (Used by IUP) #====================================================================== @@ -280,34 +273,47 @@ iuplib.fossil : fossil clone http://www.kiatoa.com/fossils/iuplib iuplib.fossil cd-5.9_Linux26g4_64_lib.tar.gz : - wget -c http://sourceforge.net/projects/canvasdraw/files/5.9/Linux%20Libraries/cd-5.9_Linux26g4_64_lib.tar.gz/download + wget --no-check-certificate -c http://sourceforge.net/projects/canvasdraw/files/5.9/Linux%20Libraries/cd-5.9_Linux26g4_64_lib.tar.gz/download mv download cd-5.9_Linux26g4_64_lib.tar.gz +cd-5.10_Linux26g4_64_lib.tar.gz : + cp /p/fdk/gwa/jmoon18/cd-5.10_Linux26g4_64_lib.tar.gz cd-5.10_Linux26g4_64_lib.tar.gz + iup-3.17_Linux26g4_64_lib.tar.gz : - wget -c http://sourceforge.net/projects/iup/files/3.17/Linux%20Libraries/iup-3.17_Linux26g4_64_lib.tar.gz/download - mv download iup-3.17_Linux26g4_64_lib.tar.gz + cp /p/fdk/gwa/jmoon18/iup-3.17_Linux26g4_64_lib.tar.gz iup-3.17_Linux26g4_64_lib.tar.gz +# wget --no-check-certificate -c http://sourceforge.net/projects/iup/files/3.17/Linux%20Libraries/iup-3.17_Linux26g4_64_lib.tar.gz/download +# mv download iup-3.17_Linux26g4_64_lib.tar.gz + +iup-3.19.1_Linux26g4_64_lib.tar.gz : + cp /p/fdk/gwa/jmoon18/iup-3.19.1_Linux26g4_64_lib.tar.gz iup-3.19.1_Linux26g4_64_lib.tar.gz im-3.10_Linux26g4_64_lib.tar.gz : - wget -c http://sourceforge.net/projects/imtoolkit/files/3.10/Linux%20Libraries/im-3.10_Linux26g4_64_lib.tar.gz/download + wget --no-check-certificate -c http://sourceforge.net/projects/imtoolkit/files/3.10/Linux%20Libraries/im-3.10_Linux26g4_64_lib.tar.gz/download mv download im-3.10_Linux26g4_64_lib.tar.gz +im-3.11_Linux26g4_64_lib.tar.gz : + cp /p/fdk/gwa/jmoon18/im-3.11_Linux26g4_64_lib.tar.gz im-3.11_Linux26g4_64_lib.tar.gz + lua-5.3.2_Linux26g4_64_lib.tar.gz : - wget -c http://sourceforge.net/projects/luabinaries/files/5.3.2/Linux%20Libraries/lua-5.3.2_Linux26g4_64_lib.tar.gz/download + wget --no-check-certificate -c http://sourceforge.net/projects/luabinaries/files/5.3.2/Linux%20Libraries/lua-5.3.2_Linux26g4_64_lib.tar.gz/download mv download lua-5.3.2_Linux26g4_64_lib.tar.gz +lua-5.3.3_Linux26g4_64_lib.tar.gz : + cp /p/fdk/gwa/jmoon18/lua-5.3.3_Linux26g4_64_lib.tar.gz lua-5.3.3_Linux26g4_64_lib.tar.gz + iup/installall.sh : $(PREFIX)/lib/libiup.so \ - cd-5.9_Linux26g4_64_lib.tar.gz \ + cd-5.10_Linux26g4_64_lib.tar.gz \ iup-3.17_Linux26g4_64_lib.tar.gz \ - im-3.10_Linux26g4_64_lib.tar.gz \ - lua-5.3.2_Linux26g4_64_lib.tar.gz # iuplib.fossil + im-3.11_Linux26g4_64_lib.tar.gz \ + lua-5.3.3_Linux26g4_64_lib.tar.gz # iuplib.fossil mkdir -p iup pwd - tar -xzvf cd-5.9_Linux26g4_64_lib.tar.gz -C iup/ - tar -xzvf im-3.10_Linux26g4_64_lib.tar.gz -C iup/ + tar -xzvf cd-5.10_Linux26g4_64_lib.tar.gz -C iup/ + tar -xzvf im-3.11_Linux26g4_64_lib.tar.gz -C iup/ tar -xzvf iup-3.17_Linux26g4_64_lib.tar.gz -C iup/ mkdir -p $(PREFIX)/include/ $(PREFIX)/lib/ cp iup/include/* $(PREFIX)/include/ cp iup/*.so $(PREFIX)/lib/ cp iup/*.a $(PREFIX)/lib/ @@ -331,6 +337,6 @@ $(CHICKEN_EGG_DIR)/canvas-draw.so : $(PREFIX)/lib/libiup.so $(PREFIX)/lib/libavcall.a CSC_OPTIONS=$(CSC_OPTIONS) $(CHICKEN_INSTALL) $(PROX) -D no-library-checks canvas-draw clean : - rm -rf chicken-4.8.0 eggflags ffcall sqlite-autoconf-$(SQLITE3_VERSION) + rm -rf chicken-4.11.0 eggflags ffcall sqlite-autoconf-$(SQLITE3_VERSION) ADDED utils/checkPreReqs Index: utils/checkPreReqs ================================================================== --- /dev/null +++ utils/checkPreReqs @@ -0,0 +1,30 @@ +#!/bin/bash +SYSTEM_TYPE=$(lsb_release -irs |tr ' ' '_' |tr '\n' '-')$(uname -i) +file=`/bin/mktemp` +case $SYSTEM_TYPE in +Ubuntu-17.04-x86_64-std) + apt list --installed | cut -d/ -f 1 > $file + ;; +Ubuntu-16.04-x86_64) + apt list --installed | cut -d/ -f 1 > $file + ;; +Ubuntu-16.04-i686) + apt list --installed | cut -d/ -f 1 > $file + ;; +SUSE_LINUX_11-x86_64) + rpm -qa > $file + ;; +CentOS_5.11-x86_64-std) + rpm -qa > $file + ;; +esac + + + +for package in libmysqlclient-dev libsqlite3-dev sqlite3 postgresql libreadline-dev libwebkitgtk-dev libpangox-1.0-0 zlib1g-dev libfreetype6 cmake libssl-dev uuid-dev libmotif3 mysql-client; do + grep --silent $package $file + if [ "$?" != "0" ]; then + echo "sudo apt install $package" + fi +done +rm $file Index: utils/installall.sh ================================================================== --- utils/installall.sh +++ utils/installall.sh @@ -43,10 +43,18 @@ CHICKEN_BASEVER=4.11.0 # Set up variables # case $SYSTEM_TYPE in +Ubuntu-17.04-x86_64-std) + KTYPE=32 + CDVER=5.10 + IUPVER=3.17 + IMVER=3.11 + CHICKEN_VERSION=4.12.0 + CHICKEN_BASEVER=4.12.0 + ;; Ubuntu-16.04-x86_64-std) KTYPE=32 CDVER=5.10 IUPVER=3.17 IMVER=3.11 @@ -162,22 +170,23 @@ make PLATFORM=linux PREFIX=$PREFIX make PLATFORM=linux PREFIX=$PREFIX install cd $BUILDHOME fi cd $BUILDHOME -if [[ ! -e 1.0.0.tar.gz ]];then - wget --no-check-certificate https://github.com/nanomsg/nanomsg/archive/1.0.0.tar.gz - mv 1.0.0 1.0.0.tar.gz -fi +#if [[ ! -e 1.0.0.tar.gz ]];then +# wget --no-check-certificate https://github.com/nanomsg/nanomsg/archive/1.0.0.tar.gz +# mv 1.0.0 1.0.0.tar.gz +#fi if ! [[ -e $PREFIX/lib64/libnanomsg.so.1.0.0 ]]; then wget --no-check-certificate https://github.com/nanomsg/nanomsg/archive/1.0.0.tar.gz - mv 1.0.0 1.0.0.tar.gz + #mv 1.0.0 1.0.0.tar.gz tar xf 1.0.0.tar.gz cd nanomsg-1.0.0 ./configure --prefix=$PREFIX make make install + CSC_OPTIONS="-I$PREFIX/include -L$PREFIX/lib" $CHICKEN_INSTALL $PROX nanomsg fi cd $BUILDHOME export SQLITE3_VERSION=3090200 if ! [[ -e $PREFIX/bin/sqlite3 ]]; then @@ -213,11 +222,11 @@ # Some eggs are quoted since they are reserved to Bash # for f in matchable readline apropos base64 regex-literals format "regex-case" "test" coops trace csv dot-locking posix-utils posix-extras directory-utils hostinfo tcp rpc csv-xml fmt json md5; do # $CHICKEN_INSTALL $PROX -keep-installed matchable readline apropos base64 regex-literals format "regex-case" "test" coops trace csv dot-locking posix-utils posix-extras directory-utils hostinfo tcp rpc csv-xml fmt json md5 awful http-client spiffy uri-common intarweb http-client spiffy-request-vars md5 message-digest http-client spiffy-directory-listing for egg in matchable readline apropos base64 regex-literals format "regex-case" "test" \ coops trace csv dot-locking posix-utils posix-extras directory-utils hostinfo \ - tcp rpc csv-xml fmt json md5 awful http-client spiffy uri-common intarweb http-client \ + tcp rpc csv-xml fmt json md5 awful http-client:0.7.1 spiffy uri-common intarweb http-client \ spiffy-request-vars s md5 message-digest spiffy-directory-listing ssax sxml-serializer \ sxml-modifications logpro z3 call-with-environment-variables \ pathname-expand typed-records simple-exceptions numbers crypt parley srfi-42 \ alist-lib ansi-escape-sequences args basic-sequences bindings chicken-doc chicken-doc-cmd \ cock condition-utils debug define-record-and-printer easyffi easyffi-base \ ADDED utils/mtrept.sh Index: utils/mtrept.sh ================================================================== --- /dev/null +++ utils/mtrept.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# +# Rollup counts of calls to Megatest from a logging dat file +# +# Usage: mtrept.sh file [host] + +if [[ "$2"x != "x" ]];then + host_name_grep="grep $2 | " +else + host_name_grep="" +fi +if [[ "$1"x == "x" ]];then + datfile=/p/fdk/gwa/$USER/.logger/all.dat +else + datfile=$1 +fi +datcopy=/tmp/$USER/all.$PID.dat + +if [[ -e $datfile ]];then + cp $datfile $datcopy + list_runs=$(grep list-runs $datcopy |$host_name_grep wc -l) + show_config=$(grep show-config $datcopy |$host_name_grep wc -l) + list_targets=$(grep list-targets $datcopy |$host_name_grep wc -l) + mt_run=$(grep ' -run ' $datcopy |$host_name_grep wc -l) + execute=$(grep ' -execute' $datcopy|$host_name_grep wc -l) + server=$(grep ' -server' $datcopy|$host_name_grep wc -l) + sync_to=$(grep ' -sync-to' $datcopy|$host_name_grep wc -l) + step=$(grep ' -step' $datcopy|$host_name_grep wc -l) + state_status=$(grep ' -set-state-status' $datcopy|$host_name_grep wc -l) + test_status=$(grep ' -test-status' $datcopy|$host_name_grep wc -l) + other=$(egrep -v ' -(list-runs|show-config|list-targets|run|execute|server|sync-to|step|set-state-status|test-status)' $datcopy |$host_name_grep wc -l) + start_time=$(head -1 $datcopy|awk '{print $1}') + end_time=$(tail -1 $datcopy | awk '{print $1}') + minutes=$(echo "($end_time-$start_time)/60.0" | bc) + hours=$(echo "($minutes/60)"|bc) + total_calls=$(cat $datcopy |$host_name_grep wc -l) + + if [[ $hours -gt 2 ]];then + echo "Over $hours hour period we have;" + else + echo "Over $minutes minutes we have;" + fi + echo " list-runs: $list_runs" + echo " show-config: $show_config" + echo " list-targets: $list_targets" + echo " execute: $execute" + echo " run: $mt_run" + echo " server: $server" + echo " step: $step" + echo " test-status: $test_status" + echo " sync-to: $sync_to" + echo " state-status: $state_status" + echo " other: $other" + echo " total: $total_calls" +else + echo "Could not find input file $datfile" +fi +