Overview
Comment: | sync the trunk with 1.65 latest |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
e13fd02294b69a14a650d1e69467ba2a |
User & Date: | matt on 2020-02-19 21:16:05 |
Other Links: | manifest | tags |
Context
2020-08-03
| ||
14:40 | Merging v1.65 to trunk check-in: 94cb260871 user: mrwellan tags: trunk | |
2020-05-31
| ||
22:34 | Merged v1.66 to trunk - it is now the stable leading edge. check-in: ea3cea484e user: mrwellan tags: trunk-v1.66 | |
2020-02-19
| ||
21:16 | sync the trunk with 1.65 latest check-in: e13fd02294 user: matt tags: trunk | |
2020-02-12
| ||
12:40 | Fixed modulo typo check-in: 7458dc962f user: jmoon18 tags: v1.65, v.16543 | |
2019-12-08
| ||
16:46 | Merged long-running v1.65 branch to trunk check-in: a11b626717 user: matt tags: trunk | |
Changes
Modified common.scm from [4250e720f5] to [22542b9de5].
︙ | ︙ | |||
1671 1672 1673 1674 1675 1676 1677 | ;; (car load-res)) ;; cpu-load)) ;; get values from cached info from dropping file in logs dir ;; e.g. key is host and dtype is normalized-load ;; (define (common:get-cached-info key dtype #!key (age 5)) | | | > | > | 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 | ;; (car load-res)) ;; cpu-load)) ;; get values from cached info from dropping file in logs dir ;; e.g. key is host and dtype is normalized-load ;; (define (common:get-cached-info key dtype #!key (age 5)) (let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log"))) (if (and (file-exists? fullpath) (file-read-access? fullpath)) (handle-exceptions exn #f (debug:print 2 *default-log-port* "reading file " fullpath) (let ((real-age (- (current-seconds)(file-change-time fullpath)))) (if (< real-age age) (with-input-from-file fullpath read) (begin (debug:print 1 *default-log-port* "file " fullpath " is too old (" real-age" seconds) to trust, skipping reading it") #f)))) (begin (debug:print 2 *default-log-port* "not reading file " fullpath) #f)))) (define (common:write-cached-info key dtype dat) (let* ((fulldir (conc *toppath* "/.sysdata")) (fullpath (conc fulldir "/" key "-" dtype ".log"))) (if (not (file-exists? fulldir))(create-directory fulldir #t)) (handle-exceptions exn #f (with-output-to-file fullpath (lambda ()(pp dat)))))) ;; get cpu load by reading from /proc/loadavg, return all three values ;; |
︙ | ︙ | |||
1742 1743 1744 1745 1746 1747 1748 | (else default)))) (define (common:get-normalized-cpu-load-raw remote-host) (let* ((actual-host (or remote-host (get-host-name)))) ;; #f is localhost (or (common:get-cached-info actual-host "normalized-load") (let ((data (if remote-host (with-input-from-pipe | | | 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 | (else default)))) (define (common:get-normalized-cpu-load-raw remote-host) (let* ((actual-host (or remote-host (get-host-name)))) ;; #f is localhost (or (common:get-cached-info actual-host "normalized-load") (let ((data (if remote-host (with-input-from-pipe (conc "ssh " remote-host " \"cat /proc/loadavg;cat /proc/cpuinfo;echo end\"") read-lines) (append (with-input-from-file "/proc/loadavg" read-lines) (with-input-from-file "/proc/cpuinfo" read-lines) (list "end")))) |
︙ | ︙ | |||
1764 1765 1766 1767 1768 1769 1770 | #f (let loop ((hed (car data)) (tal (cdr data)) (loads #f) (proc-num 0) ;; processor includes threads (phys-num 0) ;; physical chip on motherboard (core-num 0)) ;; core | | | 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 | #f (let loop ((hed (car data)) (tal (cdr data)) (loads #f) (proc-num 0) ;; processor includes threads (phys-num 0) ;; physical chip on motherboard (core-num 0)) ;; core ;;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num) (if (null? tal) ;; have all our data, calculate normalized load and return result (let* ((act-proc (+ proc-num 1)) (act-phys (+ phys-num 1)) (act-core (+ core-num 1)) (adj-proc-load (/ (car loads) act-proc)) (adj-core-load (/ (car loads) act-core)) (result |
︙ | ︙ | |||
1917 1918 1919 1920 1921 1922 1923 | (if new-best (begin ;; found a host, return it (debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate) (host-last-used-set! rec curr-time) new-best) (if (null? tal) #f (loop (car tal)(cdr tal) best-host))))))))) | < < < < < < < < < < < < < < < < < < < < < < < < < < < | < | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 | (if new-best (begin ;; found a host, return it (debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate) (host-last-used-set! rec curr-time) new-best) (if (null? tal) #f (loop (car tal)(cdr tal) best-host))))))))) (define (common:wait-for-homehost-load maxload msg) (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. #f (common:get-homehost))) (hh (if hh-dat (car hh-dat) #f))) (common:wait-for-normalized-load maxload msg hh))) (define (common:get-num-cpus remote-host) (let* ((actual-host (or remote-host (get-host-name)))) (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600))) ;; hosts had better not be changing the number of cpus too often! (let* ((proc (lambda () (let loop ((numcpu 0) (inl (read-line))) (if (eof-object? inl) (begin (common:write-cached-info actual-host "num-cpus" numcpu) numcpu) (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) (+ numcpu 1) numcpu) (read-line)))))) (result (if remote-host (with-input-from-pipe (conc "ssh " remote-host " cat /proc/cpuinfo") proc) (with-input-from-file "/proc/cpuinfo" proc)))) (common:write-cached-info actual-host "num-cpus" result) result)))) ;; wait for normalized cpu load to drop below maxload ;; (define (common:wait-for-normalized-load maxload msg remote-host) (let ((num-cpus (common:get-num-cpus remote-host))) (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host))) ;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load ;; (define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f)) (let* ((loadavg (common:get-cpu-load remote-host)) (numcpus (if (<= 1 numcpus-in) ;; not possible to have zero. If we get 1, it's possible that we got the previous default, and we should check again (common:get-num-cpus remote-host) numcpus-in)) (maxload (if force-maxload maxload-in (max maxload-in 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME? (first (car loadavg)) (next (cadr loadavg)) (adjload (* maxload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude fallback is to at least use 1 (loadjmp (- first next)) (adjwait (min (+ 300 (random 10)) (abs (* (+ (random 10)(/ (- 1000 count) 10) waitdelay) (- first adjload) )) )));; add some randomness to the time to break any alignment where netbatch dumps many jobs to machines simultaneously (debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload ", load: " first ", adjload: " adjload ", loadjmp: " loadjmp) (cond ((and (> first adjload) (> count 0)) (debug:print-info 0 *default-log-port* "server start delayed " adjwait " seconds due to load " first " exceeding max of " adjload " on server " (or remote-host (get-host-name)) " (normalized load-limit: " maxload ") " (if msg msg "")) (thread-sleep! adjwait) (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)) ((and (> loadjmp numcpus) (> count 0)) (debug:print-info 0 *default-log-port* "waiting " adjwait " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg "")) (thread-sleep! adjwait) (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))))) (define (get-uname . params) (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params))))) (uname #f)) (if (null? (car uname-res)) "unknown" (caar uname-res)))) |
︙ | ︙ |
Modified megatest-version.scm from [3dd6b10f75] to [8e7922af4c].
︙ | ︙ | |||
16 17 18 19 20 21 22 | ;; along with Megatest. If not, see <http://www.gnu.org/licenses/>. ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. (declare (unit megatest-version)) | | | 16 17 18 19 20 21 22 23 | ;; along with Megatest. If not, see <http://www.gnu.org/licenses/>. ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. (declare (unit megatest-version)) (define megatest-version 1.6543) |
Modified rmt.scm from [fff4f6b45b] to [8c67a7dcdf].
︙ | ︙ | |||
20 21 22 23 24 25 26 | (use format typed-records) ;; RADT => purpose of json format?? (declare (unit rmt)) (declare (uses api)) (declare (uses http-transport)) (include "common_records.scm") | | | | 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | (use format typed-records) ;; RADT => purpose of json format?? (declare (unit rmt)) (declare (uses api)) (declare (uses http-transport)) (include "common_records.scm") ;; (declare (uses rmtmod)) ;; (import rmtmod) ;; ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! ;; ;; generate entries for ~/.megatestrc with the following ;; |
︙ | ︙ | |||
56 57 58 59 60 61 62 | (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected | | > > > > > > > > > > > | 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected #;(common:telemetry-log (conc "rmt:"(->string cmd)) payload: `((rid . ,rid) (params . ,params))) (if (> attemptnum 2) (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum)) (cond ((> attemptnum 2) (thread-sleep! 0.05)) ((> attemptnum 10) (thread-sleep! 0.5)) ((> attemptnum 20) (thread-sleep! 1))) (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15))) (begin (rmt:start-server rid) (thread-sleep! 3))) ;;DOT digraph megatest_state_status { ;;DOT ranksep=0; ;;DOT // rankdir=LR; ;;DOT node [shape="box"]; ;;DOT "rmt:send-receive" -> MUTEXLOCK; ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; } ;; do all the prep locked under the rmt-mutex (mutex-lock! *rmt-mutex*) ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. ;; 3. do the query, if on homehost use local access ;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas (runremote (or area-dat *runremote*)) (attemptnum (+ 1 attemptnum)) (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))) ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"]; ;; DOT INIT_RUNREMOTE -> MUTEXLOCK; ;; ensure we have a record for our connection for given area (if (not runremote) ;; can remove this one. should never get here. |
︙ | ︙ | |||
101 102 103 104 105 106 107 | (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little (remote-hh-dat-set! runremote (common:get-homehost))) ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) (cond ;;DOT EXIT; ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } | | | | | 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little (remote-hh-dat-set! runremote (common:get-homehost))) ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) (cond ;;DOT EXIT; ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } ;; give up if more than 150 attempts ((> attemptnum 150) (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.") (exit 1)) ;;DOT CASE2 [label="local\nreadonly\nquery"]; ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2} ;;DOT CASE2 -> "rmt:open-qry-close-locally"; ;; readonly mode, read request- handle it - case 2 ((and readonly-mode |
︙ | ︙ | |||
929 930 931 932 933 934 935 | (define (rmt:test-set-archive-block-id run-id test-id archive-block-id) (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id))) (define (rmt:test-get-archive-block-info archive-block-id) (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id))) | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 | (define (rmt:test-set-archive-block-id run-id test-id archive-block-id) (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id))) (define (rmt:test-get-archive-block-info archive-block-id) (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id))) (define (rmtmod:calc-ro-mode runremote *toppath*) (if (and runremote (remote-ro-mode-checked runremote)) (remote-ro-mode runremote) (let* ((dbfile (conc *toppath* "/megatest.db")) (ro-mode (not (file-write-access? dbfile)))) ;; TODO: use dbstruct or runremote to figure this out in future (if runremote (begin (remote-ro-mode-set! runremote ro-mode) (remote-ro-mode-checked-set! runremote #t) ro-mode) ro-mode)))) (define (extras-readonly-mode rmt-mutex log-port cmd params) (mutex-unlock! rmt-mutex) (debug:print-info 12 log-port "rmt:send-receive, case 3") (debug:print 0 log-port "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params) #f) (define (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params) (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) (mutex-lock! *rmt-mutex*) (remote-conndat-set! runremote #f) (http-transport:close-connections area-dat: runremote) (remote-server-url-set! runremote #f) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1") (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) (define (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) (if (and (vector? res) (eq? (vector-length res) 2) (eq? (vector-ref res 1) 'overloaded)) ;; since we are ;; looking at the ;; data to carry the ;; error we'll use a ;; fairly obtuse ;; combo to minimise ;; the chances of ;; some sort of ;; collision. this ;; is the case where ;; the returned data ;; is bad or the ;; server is ;; overloaded and we ;; want to ease off ;; the queries (let ((wait-delay (+ attemptnum (* attemptnum 10)))) (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.") (mutex-lock! *rmt-mutex*) (http-transport:close-connections area-dat: runremote) (set! *runremote* #f) ;; force starting over (mutex-unlock! *rmt-mutex*) (thread-sleep! wait-delay) (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) res)) ;; All good, return res #;(set-functions rmt:send-receive remote-server-url-set! http-transport:close-connections remote-conndat-set! debug:print debug:print-info remote-ro-mode remote-ro-mode-set! remote-ro-mode-checked-set! remote-ro-mode-checked) |
Modified rmtmod.scm from [8f5d01960e] to [7098937896].
︙ | ︙ | |||
54 55 56 57 58 59 60 | (set! debug:print dbgp) (set! debug:print-info dbgpinfo) (set! remote-ro-mode ro-mode) (set! remote-ro-mode-set! ro-mode-set) (set! remote-ro-mode-checked-set! ro-mode-checked-set) (set! remote-ro-mode-checked ro-mode-checked)) | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 54 55 56 57 58 59 60 61 | (set! debug:print dbgp) (set! debug:print-info dbgpinfo) (set! remote-ro-mode ro-mode) (set! remote-ro-mode-set! ro-mode-set) (set! remote-ro-mode-checked-set! ro-mode-checked-set) (set! remote-ro-mode-checked ro-mode-checked)) ) |
Modified runs.scm from [81684437ca] to [650378342c].
︙ | ︙ | |||
1015 1016 1017 1018 1019 1020 1021 | (debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met) '()))) (non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed! (not (equal? x hed))) (runs:calc-not-completed prereqs-not-met))) (loop-list (list hed tal reg reruns)) ;; configure the load runner | < | 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 | (debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met) '()))) (non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed! (not (equal? x hed))) (runs:calc-not-completed prereqs-not-met))) (loop-list (list hed tal reg reruns)) ;; configure the load runner (maxload (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0"))) ;; use a non-number string to disable (maxhomehostload (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "2.0"))) ;; use a non-number string to disable (waitdelay (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60")))) (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" (string-intersperse (map (lambda (t) (if (vector? t) |
︙ | ︙ | |||
1117 1118 1119 1120 1121 1122 1123 1124 1125 | (and (member 'toplevel testmode) ;; 'toplevel) (null? non-completed) (not (member 'exclusive testmode))))) ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path)) ;; we are going to reset all the counters for test retries by setting a new hash table ;; this means they will increment only when nothing can be run (set! *max-tries-hash* (make-hash-table)) ;; well, first lets see if cpu load throttling is enabled. If so wait around until the ;; average cpu load is under the threshold before continuing | > > > | > | > | 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 | (and (member 'toplevel testmode) ;; 'toplevel) (null? non-completed) (not (member 'exclusive testmode))))) ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path)) ;; we are going to reset all the counters for test retries by setting a new hash table ;; this means they will increment only when nothing can be run (set! *max-tries-hash* (make-hash-table)) ;; well, first lets see if cpu load throttling is enabled. If so wait around until the ;; average cpu load is under the threshold before continuing ;; jobtools maxload is useful for where the full Megatest run is done on one machine (if maxload ;; only gate if maxload is specified, NOTE: maxload is normalized, i.e. load=1 means all cpus fully utilized (common:wait-for-normalized-load maxload "Waiting for load to drop before starting more tests" #f)) ;; jobtools maxhomehostload is intended to prevent overloading on the homehost which can cause database corruption issues (if maxhomehostload (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload))) (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry) (runs:incremental-print-results run-id) (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running) (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?) |
︙ | ︙ | |||
1741 1742 1743 1744 1745 1746 1747 1748 1749 | (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t)))) lst)) ;; parent-test is there as a placeholder for when parent-tests can be run as a setup step ;; (define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry) ;; All these vars might be referenced by the testconfig file reader (let* ((test-name (tests:testqueue-get-testname test-record)) (test-waitons (tests:testqueue-get-waitons test-record)) | > > > > > < < < < < < > > > > > > > > > > > > > < | 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 | (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t)))) lst)) ;; parent-test is there as a placeholder for when parent-tests can be run as a setup step ;; (define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry) ;; All these vars might be referenced by the testconfig file reader ;; ;; NEED to reprocess testconfig here, ensuring that item variables are available. ;; This is for Tal's issue with item-specific env vars not being set for use in skip. ;; HSD https://hsdes.intel.com/appstore/icf/index.html#/article?articleId=1408763273 ;; (let* ((test-name (tests:testqueue-get-testname test-record)) (test-waitons (tests:testqueue-get-waitons test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) (item-path "") (db #f) (full-test-name #f)) ;; setting itemdat to a list if it is #f (if (not itemdat)(set! itemdat '())) (set! item-path (item-list->path itemdat)) (set! full-test-name (db:test-make-full-name test-name item-path)) (runs:set-megatest-env-vars run-id inrunname: runname testname: test-name itempath: item-path) ;; these may be needed by the launching process (let* ((test-conf ;; re-instate the tests:get-testconfig once the kinks are worked out. FIXME!!! ;; (tests:get-testconfig test-name item-path all-tests-registry #t force-create: #t)) (tests:testqueue-get-testconfig test-record )) (test-path (hash-table-ref all-tests-registry test-name)) ;; (conc *toppath* "/tests/" test-name)) ;; could use tests:get-testconfig here ... (force (hash-table-ref/default flags "-force" #f)) (rerun (hash-table-ref/default flags "-rerun" #f)) (keepgoing (hash-table-ref/default flags "-keepgoing" #f)) (incomplete-timeout (string->number (or (configf:lookup *configdat* "setup" "incomplete-timeout") "x"))) ) (debug:print-info 4 *default-log-port* "\nTESTNAME: " full-test-name "\n test-config: " (hash-table->alist test-conf) "\n itemdat: " itemdat ) (debug:print 2 *default-log-port* "Attempting to launch test " full-test-name) ;; (setenv "MT_TEST_NAME" test-name) ;; ;; (setenv "MT_ITEMPATH" item-path) ;; (setenv "MT_RUNNAME" runname) (change-directory *toppath*) ;; Here is where the test_meta table is best updated ;; Yes, another use of a global for caching. Need a better way? ;; ;; There is now a single call to runs:update-all-test_meta and this ;; per-test call is not needed. Given the delicacy of the move to |
︙ | ︙ | |||
1879 1880 1881 1882 1883 1884 1885 | ((and skip-check (configf:lookup test-conf "skip" "prevrunning")) ;; run-ids = #f means *all* runs (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))) (if (not (null? running-tests)) ;; have to skip (set! skip-test "Skipping due to previous tests running")))) | > | | > > | | | | > > | | | | 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 | ((and skip-check (configf:lookup test-conf "skip" "prevrunning")) ;; run-ids = #f means *all* runs (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))) (if (not (null? running-tests)) ;; have to skip (set! skip-test "Skipping due to previous tests running")))) ;; split the string and OR of file-exists? ((and skip-check (configf:lookup test-conf "skip" "fileexists")) (let* ((files (string-split (configf:lookup test-conf "skip" "fileexists"))) (existing (filter common:file-exists? files))) (if (not (null? existing)) ;; (common:file-exists? (configf:lookup test-conf "skip" "fileexists")) (set! skip-test (conc "Skipping due to existance of file(s) " (string-intersperse existing ", ")))))) ;; (configf:lookup test-conf "skip" "fileexists"))))) ((and skip-check (configf:lookup test-conf "skip" "filenotexists")) (let* ((files (string-split (configf:lookup test-conf "skip" "filenotexists"))) (existing (filter common:file-exists? files))) (if (null? existing) ;; (common:file-exists? (configf:lookup test-conf "skip" "filenotexists"))) (set! skip-test (conc "Skipping due to non existance of files " (string-intersperse files ", ")))))) ;; (configf:lookup test-conf "skip" "filenotexists"))))) ((and skip-check (configf:lookup test-conf "skip" "script")) (if (= (system (configf:lookup test-conf "skip" "script")) 0) (set! skip-test (conc "Skipping due to zero return value of script " (configf:lookup test-conf "skip" "script"))))) ((and skip-check (configf:lookup test-conf "skip" "rundelay")) ;; run-ids = #f means *all* runs |
︙ | ︙ | |||
1938 1939 1940 1941 1942 1943 1944 | ;; (debug:print 2 *default-log-port* "NOTE: " test-name " is already running"))) (else (debug:print-error 0 *default-log-port* "Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat)) (case (string->symbol (test:get-state testdat)) ((COMPLETED INCOMPLETE) (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) (else | | | 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 | ;; (debug:print 2 *default-log-port* "NOTE: " test-name " is already running"))) (else (debug:print-error 0 *default-log-port* "Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat)) (case (string->symbol (test:get-state testdat)) ((COMPLETED INCOMPLETE) (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) (else (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN))))))))) ;;====================================================================== ;; END OF NEW STUFF ;;====================================================================== (define (get-dir-up-n dir . params) (let ((dparts (string-split dir "/")) |
︙ | ︙ |
Modified server.scm from [78810e8804] to [b1a1de6e53].
|
| < | 1 2 3 4 5 6 7 | ;; Copyright 2006-2017, Matthew Welland. ;; ;; This file is part of Megatest. ;; ;; Megatest is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation, either version 3 of the License, or |
︙ | ︙ | |||
149 150 151 152 153 154 155 | (not (equal? curr-ip target-host))) (begin (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) (setenv "TARGETHOST" target-host))) (setenv "TARGETHOST_LOGF" logfile) (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time | | > | 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | (not (equal? curr-ip target-host))) (begin (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) (setenv "TARGETHOST" target-host))) (setenv "TARGETHOST_LOGF" logfile) (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time ;; (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever (common:wait-for-homehost-load load-limit (conc " delaying server start due to load on homehost. limit is " load-limit)) (system (conc "nbfake " cmdln)) (unsetenv "TARGETHOST_LOGF") (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) (thread-join! log-rotate) (pop-directory))) ;; given a path to a server log return: host port startseconds |
︙ | ︙ |