Megatest

Changes On Branch v1.64-forked-launch
Login

Changes In Branch v1.64-forked-launch Excluding Merge-Ins

This is equivalent to a diff from 5be250d6fc to 1cc676595e

2018-03-29
16:43
error check MTESTHASH check-in: cb3bbc9d2e user: bjbarcla tags: v1.64
2018-02-06
23:12
wip Leaf check-in: 1cc676595e user: bb tags: v1.64-forked-launch
18:33
wip check-in: 8f16df638a user: bb tags: v1.64-forked-launch
2018-02-02
17:28
wip check-in: 2c853b3d8d user: bjbarcla tags: v1.64-farmedout-runtest
2018-01-17
21:03
Merged in some of Jeff's changes to Makefile.deploy check-in: 6275b9b5c5 user: matt tags: v1.65
2017-12-14
13:32
Updated deploy Makefile check-in: 5be250d6fc user: jmoon18 tags: v1.64
2017-12-12
14:34
updated itemmap section in manual; hopefully it is clearer now check-in: c607976150 user: bjbarcla tags: v1.64

Modified launch.scm from [800f933448] to [9a6f12138e].

1246
1247
1248
1249
1250
1251
1252















































1253
1254
1255
1256
1257
1258
1259
	(if (and test-src-path (> remtries 0))
	    (begin
	      (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries)
	      ;; 
	      (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1)))
	    (list #f #f)))))
















































;; 1. look though disks list for disk with most space
;; 2. create run dir on disk, path name is meaningful
;; 3. create link from run dir to megatest runs area 
;; 4. remotely run the test on allocated host
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
	(if (and test-src-path (> remtries 0))
	    (begin
	      (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries)
	      ;; 
	      (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1)))
	    (list #f #f)))))

;;
(define (launch-test-standalone test-work-dir)
  (when (not (directory-exists? test-work-dir))
    (debug:print-error 0 *default-log-port* "Cannot launch.  test-work-dir for lauched test does not exist, cannot proceed with launch: "test-work-dir)
    (exit 1))
  (change-directory test-work-dir)
  (let* ((launch-dat-file (conc test-work-dir "/launch.dat")))
    (if (not (common:file-exists? launch-dat-file))
        ;; error and exit
        #f
        (let* ((launch-info  (with-input-from-file launch-dat-file read))
               (run-id       (alist-ref 'run-id     launch-info))
               (test-id      (alist-ref 'test-id    launch-info))
               (work-area    (alist-ref 'work-area  launch-info))
               (fullcmd      (alist-ref 'fullcmd    launch-info))
               (launchwait   (alist-ref 'launchwait launch-info))
               (useshell     (alist-ref 'useshell   launch-info))
               (launch-results-prev
                (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed.
                           process:cmd-run-with-stderr-and-exitcode->list
                           process-run)
                       (if useshell
                           (let ((cmdstr (string-intersperse fullcmd " ")))
                             (if launchwait
                                 cmdstr
                                 (conc cmdstr " >> mt_launch.log 2>&1 &")))
                           (car fullcmd))
                       (if useshell
					'()
					(cdr fullcmd))))
               (success
                (if launchwait (equal? 0 (cadr launch-results-prev)) #t))
               (exit-code (if launchwait (cadr launch-results-prev) 0))
               )
          (if success
              (tests:test-set-status! run-id test-id "LAUNCHED" "enqueued" #f #f) 
              (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f))
          (with-output-to-file "mt_launch.log"
            (lambda ()
              (print "LAUNCHCMD: " (string-intersperse fullcmd " "))
              (print "exit code => "exit-code)
              
              #:append))
          (debug:print 2 *default-log-port* "Launching completed, updating db")
          (debug:print 2 *default-log-port* "Launcher exit code: " exit-code)
          success))))

;; 1. look though disks list for disk with most space
;; 2. create run dir on disk, path name is meaningful
;; 3. create link from run dir to megatest runs area 
;; 4. remotely run the test on allocated host
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1)))
      (if (> launch-delay delta)
	  (begin
	    (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
		(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
	    (thread-sleep! (- launch-delay delta))
	    (loop (- (current-seconds) *last-launch*) launch-delay))))
    (change-directory *toppath*)
    (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
     (append
      (list
       (list "MT_RUN_AREA_HOME" *toppath*)
       (list "MT_TEST_NAME" test-name)
       (list "MT_RUNNAME"   runname)
       (list "MT_ITEMPATH"  item-path)







|







1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1)))
      (if (> launch-delay delta)
	  (begin
	    (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
		(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
	    (thread-sleep! (- launch-delay delta))
	    (loop (- (current-seconds) *last-launch*) launch-delay))))
    ;;(change-directory *toppath*)
    (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
     (append
      (list
       (list "MT_RUN_AREA_HOME" *toppath*)
       (list "MT_TEST_NAME" test-name)
       (list "MT_RUNNAME"   runname)
       (list "MT_ITEMPATH"  item-path)
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457

1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477


1478
1479
1480
1481
1482
1483
1484
					    (list "MT_ITEMPATH"  item-path)
					    )
				      itemdat)))
	     (testprevvals   (alist->env-vars
			      (hash-table-ref/default tconfig "pre-launch-env-overrides" '())))
	     ;; Launchwait defaults to true, must override it to turn off wait
	     (launchwait     (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t))
	     (launch-results (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed.
					process:cmd-run-with-stderr->list
					process-run)
				    (if useshell
					(let ((cmdstr (string-intersperse fullcmd " ")))
					  (if launchwait
					      cmdstr
					      (conc cmdstr " >> mt_launch.log 2>&1 &")))
					(car fullcmd))
				    (if useshell
					'()
					(cdr fullcmd)))))
        (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork.
	;; (rmt:no-sync-del! lock-key)         ;; release the lock for starting this test
	(if (not launchwait) ;; give the OS a little time to allow the process to start
	    (thread-sleep! 0.01))
	(with-output-to-file "mt_launch.log"
	  (lambda ()
	    (print "LAUNCHCMD: " (string-intersperse fullcmd " "))
	    (if (list? launch-results)

		(apply print launch-results)
		(print "NOTE: launched \"" fullcmd "\"\n  but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n  if you have problems with this"))
	    #:append))
	(debug:print 2 *default-log-port* "Launching completed, updating db")
	(debug:print 2 *default-log-port* "Launch results: " launch-results)
	(if (not launch-results)
	    (begin
	      (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now")
	      ;; (sqlite3:finalize! db)
	      ;; good ole "exit" seems not to work
	      ;; (_exit 9)
	      ;; but this hack will work! Thanks go to Alan Post of the Chicken email list
	      ;; NB// Is this still needed? Should be safe to go back to "exit" now?
	      (process-signal (current-process-id) signal/kill)
	      ))
	(alist->env-vars miscprevvals)
	(alist->env-vars testprevvals)
	(alist->env-vars commonprevvals)
	launch-results))
    (change-directory *toppath*)))



;; recover a test where the top controlling mtest may have died
;;
(define (launch:recover-test run-id test-id)
  ;; this function is called on the test run host via ssh
  ;;
  ;; 1. look at the process from pid







|
|
|
|
|
|
<
<
<
|
<
<
<
<
|
<
|
|
<
|
>
|
<
<
<
<
|
|
<
<
<
<
<
<
<
<
|


<
|
>
>







1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490



1491




1492

1493
1494

1495
1496
1497




1498
1499








1500
1501
1502

1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
					    (list "MT_ITEMPATH"  item-path)
					    )
				      itemdat)))
	     (testprevvals   (alist->env-vars
			      (hash-table-ref/default tconfig "pre-launch-env-overrides" '())))
	     ;; Launchwait defaults to true, must override it to turn off wait
	     (launchwait     (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t))
             (launch-info    (list
                              (cons 'run-id     run-id)
                              (cons 'test-id    test-id)
                              (cons 'work-area  work-area)
                              (cons 'fullcmd    fullcmd)
                              (cons 'launchwait launchwait)



                              (cons 'useshell   useshell)))




             (launch-dat-file (conc work-area "/launch.dat"))

             (write-result    (with-output-to-file launch-dat-file
                               (lambda () (pp launch-info))))

             (launch-cmd             (conc "megatest -start-dir "*toppath*" -internal-launch-test "work-area" &"))
	     )





        (system launch-cmd)









        (alist->env-vars miscprevvals)
	(alist->env-vars testprevvals)
	(alist->env-vars commonprevvals)

        (change-directory *toppath*)
        (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork.
        write-result))))

;; recover a test where the top controlling mtest may have died
;;
(define (launch:recover-test run-id test-id)
  ;; this function is called on the test run host via ssh
  ;;
  ;; 1. look at the process from pid

Modified megatest.scm from [4ccc1620b9] to [0f7fc288e2].

275
276
277
278
279
280
281

282
283
284
285
286
287
288
			"-runstep"
			"-logpro"
			"-m"
			"-rerun"
			"-days"
			"-rename-run"
			"-to"

			;; values and messages
			":category"
			":variable"
			":value"
			":expected"
			":tol"
			":units"







>







275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
			"-runstep"
			"-logpro"
			"-m"
			"-rerun"
			"-days"
			"-rename-run"
			"-to"
                        "-internal-launch-test" 
			;; values and messages
			":category"
			":variable"
			":value"
			":expected"
			":tol"
			":units"
1686
1687
1688
1689
1690
1691
1692











1693
1694
1695
1696
1697
1698
1699
     "-rollup" 
     "rollup tests" 
     (lambda (target runname keys keyvals)
       (runs:rollup-run keys
			keyvals
			(or (args:get-arg "-runname")(args:get-arg ":runname") )
			user))))












;;======================================================================
;; Lock or unlock a run
;;======================================================================

(if (or (args:get-arg "-lock")(args:get-arg "-unlock"))
    (general-run-call 







>
>
>
>
>
>
>
>
>
>
>







1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
     "-rollup" 
     "rollup tests" 
     (lambda (target runname keys keyvals)
       (runs:rollup-run keys
			keyvals
			(or (args:get-arg "-runname")(args:get-arg ":runname") )
			user))))

;;======================================================================
;; launch test in separate call; takes test run dir as argument.
;;======================================================================
(if (args:get-arg "-internal-launch-test")
    (let ((toppath  (launch:setup)))
      (launch-test-standalone (args:get-arg "-internal-launch-test"))
      (set! *didsomething* #t)))




;;======================================================================
;; Lock or unlock a run
;;======================================================================

(if (or (args:get-arg "-lock")(args:get-arg "-unlock"))
    (general-run-call 

Modified process.scm from [36b394cc1e] to [70c3ca9d10].

11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

;;======================================================================
;; Process convience utils
;;======================================================================

(use regex)
(declare (unit process))
;;(declare (uses common))

(define (process:conservative-read port)
  (let loop ((res ""))
    (if (not (eof-object? (peek-char port)))
	(loop (conc res (read-char port)))
	res)))








<







11
12
13
14
15
16
17

18
19
20
21
22
23
24

;;======================================================================
;; Process convience utils
;;======================================================================

(use regex)
(declare (unit process))


(define (process:conservative-read port)
  (let loop ((res ""))
    (if (not (eof-object? (peek-char port)))
	(loop (conc res (read-char port)))
	res)))

43
44
45
46
47
48
49


























50
51
52
53
54
55
56
	   (loop (read-line fh)
		 (append result (list curr)))
	   (begin
	     (close-input-port fh)
	     (close-input-port fhe)
	     (close-output-port fho)
	     result))))) ;; )



























(define (process:cmd-run-proc-each-line cmd proc . params)
  ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params)
  (handle-exceptions
   exn
   (begin
     (print "ERROR:  Failed to run command: " cmd " " (string-intersperse params " "))







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
	   (loop (read-line fh)
		 (append result (list curr)))
	   (begin
	     (close-input-port fh)
	     (close-input-port fhe)
	     (close-output-port fho)
	     result))))) ;; )

(define (process:cmd-run-with-stderr-and-exitcode->list cmd . params)
  ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params)
;;  (handle-exceptions
;;   exn
;;   (begin
;;     (print "ERROR:  Failed to run command: " cmd " " (string-intersperse params " "))
;;     (print "       " ((condition-property-accessor 'exn 'message) exn))
;;     #f)
   (let-values (((fh fho pid fhe) (if (null? params)
				      (process* cmd)
				      (process* cmd params))))
       (let loop ((curr (read-line fh))
		  (result  '()))
	 (let ((errstr (process:conservative-read fhe)))
	   (if (not (string=? errstr ""))
	       (set! result (append result (list errstr)))))
       (if (not (eof-object? curr))
	   (loop (read-line fh)
		 (append result (list curr)))
	   (begin
	     ;(close-input-port fh)
	     ;(close-input-port fhe)
	     ;(close-output-port fho)
             (let-values (((anotherpid normalexit? exitstatus)  (process-wait pid)))
               (list result (if normalexit? exitstatus -1))))))))

(define (process:cmd-run-proc-each-line cmd proc . params)
  ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params)
  (handle-exceptions
   exn
   (begin
     (print "ERROR:  Failed to run command: " cmd " " (string-intersperse params " "))