Megatest

Changes On Branch 1ebe0d3438d42e14
Login

Changes In Branch v1.64-defered-rundir Through [1ebe0d3438] Excluding Merge-Ins

This is equivalent to a diff from cedd024462 to 1ebe0d3438

2017-07-25
23:20
Merged in v1.64 changes and fixed couple places affected by testpath no longer being available in cmdinfo check-in: 8a6d50fd15 user: matt tags: v1.64-defered-rundir
2017-07-18
16:03
Checked in preliminary script to check pre-requisites for installing chicken check-in: ec59887bf5 user: jmoon18 tags: v1.64
15:06
Allocating test run areas at test start time now mostly working check-in: 1ebe0d3438 user: mrwellan tags: v1.64-defered-rundir
09:14
Moved defered-rundir branch to v1.64 check-in: 78c25e29a0 user: mrwellan tags: v1.64-defered-rundir
2017-07-17
17:02
Fixing fork check-in: cedd024462 user: jmoon18 tags: v1.64
16:57
bumped version check-in: 9a0b4410d3 user: bjbarcla tags: v1.64, v1.6426
15:31
Resolving fork check-in: 6c3c013ce5 user: jmoon18 tags: v1.64

Modified configf.scm from [b1611ec83f] to [26fb7cf3f7].

699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
            (if (common:file-exists? fname)   ;; now verify it is readable
                (if (configf:read-alist fname)
                    #t ;; data is good.
                    (begin
                      (handle-exceptions
                       exn
                       #f
                       (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.")
                       (delete-file fname))
                      #f))
                #f))))
    (common:faux-unlock fname)
    res))
  
;; convert hierarchial list to ini format







|







699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
            (if (common:file-exists? fname)   ;; now verify it is readable
                (if (configf:read-alist fname)
                    #t ;; data is good.
                    (begin
                      (handle-exceptions
                       exn
                       #f
                       (debug:print 0 *default-log-port* "WARNING: content read from cache " fname " is not readable. Deleting generated file.")
                       (delete-file fname))
                      #f))
                #f))))
    (common:faux-unlock fname)
    res))
  
;; convert hierarchial list to ini format

Modified launch.scm from [8ce078ac53] to [a70a186b30].

410
411
412
413
414
415
416





















417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
	      (if (hash-table-ref/default misc-flags 'keep-going #f)  ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta
		  (loop (calc-minutes)
                        (or new-cpu-load cpu-load)
                        (or new-disk-free disk-free)
                        (if do-sync (current-seconds) last-sync)))))))
    (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional























(define (launch:execute encoded-cmd)
  (let* ((cmdinfo    (common:read-encoded-string encoded-cmd))
	 (tconfigreg #f))
    (setenv "MT_CMDINFO" encoded-cmd)
    ;;(bb-check-path msg: "launch:execute incoming")
    (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed)
	;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1))
	(let* ((testpath  (assoc/default 'testpath  cmdinfo))  ;; testpath is the test spec area
	       (top-path  (assoc/default 'toppath   cmdinfo))
	       (work-area (assoc/default 'work-area cmdinfo))  ;; work-area is the test run area
	       (test-name (assoc/default 'test-name cmdinfo))
	       (runscript (assoc/default 'runscript cmdinfo))
	       (ezsteps   (assoc/default 'ezsteps   cmdinfo))
	       ;; (runremote (assoc/default 'runremote cmdinfo))
	       ;; (transport (assoc/default 'transport cmdinfo))  ;; not used
	       ;; (serverinf (assoc/default 'serverinf cmdinfo))
	       ;; (port      (assoc/default 'port      cmdinfo))







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>










|







410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
	      (if (hash-table-ref/default misc-flags 'keep-going #f)  ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta
		  (loop (calc-minutes)
                        (or new-cpu-load cpu-load)
                        (or new-disk-free disk-free)
                        (if do-sync (current-seconds) last-sync)))))))
    (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional

(define (launch:test-execute-exit-handler run-id test-id)
  (let ((sighand (lambda (signum)
                   ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
                   (if (eq? signum signal/stop)
                       (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting."))
                   (set! *time-to-exit* #t)
                   (print "Received signal " signum ", cleaning up before exit. Please wait...")
                   (let ((th1 (make-thread (lambda ()
                                             (rmt:test-set-state-status run-id test-id "INCOMPLETE" "KILLED" #f)
                                             (print "Killed by signal " signum ". Exiting")
                                             (thread-sleep! 1)
                                             (exit 1))))
                         (th2 (make-thread (lambda ()
                                             (thread-sleep! 2)
                                             (debug:print 0 *default-log-port* "Done")
                                             (exit 4)))))
                     (thread-start! th2)
                     (thread-start! th1)
                     (thread-join! th2)))))
    (set-signal-handler! signal/int sighand)
    (set-signal-handler! signal/term sighand)))

(define (launch:execute encoded-cmd)
  (let* ((cmdinfo    (common:read-encoded-string encoded-cmd))
	 (tconfigreg #f))
    (setenv "MT_CMDINFO" encoded-cmd)
    ;;(bb-check-path msg: "launch:execute incoming")
    (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed)
	;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1))
	(let* ((testpath  (assoc/default 'testpath  cmdinfo))  ;; testpath is the test spec area
	       (top-path  (assoc/default 'toppath   cmdinfo))
	       (work-area #f) ;; (assoc/default 'work-area cmdinfo))  ;; work-area is the test run area
	       (test-name (assoc/default 'test-name cmdinfo))
	       (runscript (assoc/default 'runscript cmdinfo))
	       (ezsteps   (assoc/default 'ezsteps   cmdinfo))
	       ;; (runremote (assoc/default 'runremote cmdinfo))
	       ;; (transport (assoc/default 'transport cmdinfo))  ;; not used
	       ;; (serverinf (assoc/default 'serverinf cmdinfo))
	       ;; (port      (assoc/default 'port      cmdinfo))
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562







563
564
565
566
567
568
569
	       (runname   (assoc/default 'runname   cmdinfo))
	       (megatest  (assoc/default 'megatest  cmdinfo))
	       (runtlim   (assoc/default 'runtlim   cmdinfo))
	       (contour   (assoc/default 'contour   cmdinfo))
	       (item-path (item-list->path itemdat))
	       (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo))
	       (keys      #f)
	       (keyvals   #f)
	       (fullrunscript (if (not runscript)
                                  #f
                                  (if (substring-index "/" runscript)
                                      runscript ;; use unadultered if contains slashes
                                      (let ((fulln (conc testpath "/" runscript)))
	                                  (if (and (common:file-exists? fulln)
                                                   (file-execute-access? fulln))
                                              fulln
                                              runscript))))) ;; assume it is on the path
	       ) ;; (rollup-status 0)

	  (if contour (setenv "MT_CONTOUR" contour))
	  
	  ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ...
	  ;;
	  (setenv "MT_TESTSUITENAME" areaname)
	  (setenv "MT_RUN_AREA_HOME" top-path)
	  (set! *toppath* top-path)
	  (setenv "MT_TEST_RUN_DIR"  work-area)

	;;    ;; On NFS it can be slow and unreliable to get needed startup information.
	;;    ;;  i. Check if we are on the homehost, if so, proceed
	;;    ;; ii. Check if host and port passed in via CMDINFO are valid and if
	;;    ;;     possible use them.
	;;    (let ((bestadrs (server:get-best-guess-address (get-host-name)))
	;;    	(needcare #f))
	;;      (if (equal? homehost bestadrs) ;; we are likely on the homehost
	;;    	(debug:print-info 0 *default-log-port* "test " test-name " appears to be running on the homehost " homehost)
	;;    	(let ((host-port (if serverurl (string-split serverurl ":") #f)))
	;;    	  (if (not *runremote*)(set! *runremote* (make-remote))) ;; init *runremote*
	;;    	  (if (string? homehost)
	;;    	      (if (and host-port
	;;    		       (> (length host-port) 1))
	;;    		  (let* ((host      (car host-port))
        ;;                           (port      (cadr host-port))
        ;;                           (start-res (http-transport:client-connect host port))
        ;;                           (ping-res  (rmt:login-no-auto-client-setup start-res)))
	;;    		    (if (and start-res
	;;    			     ping-res)
	;;    			;; (begin ;; let ((url  (http-transport:server-dat-make-url start-res)))
	;;    			(begin
	;;    			  (remote-conndat-set! *runremote* start-res)
	;;    			  ;; (remote-server-url-set! *runremote* url)
	;;    			  ;; (if (server:ping url)
	;;    			  (debug:print-info 0 *default-log-port* "connected to " host ":" port " using CMDINFO data."))
	;;    			(begin
	;;    			  (debug:print-info 0 *default-log-port* "have CMDINFO data but failed to connect to " host ":" port)
	;;    			  (set! *runremote* #f))
	;;    			  ;; (remote-conndat-set! *runremote* #f))
	;;    			))
	;;    		  (begin
	;;    		    (set! *runremote* #f)
	;;    		    (debug:print-info 0 *default-log-port* (if host-port
	;;    							       (conc "received invalid host-port information " host-port)
	;;    							       "no host-port information received"))
	;;    		    ;; potential for bad situation if simultaneous starting of hundreds of jobs on servers, set needcare.
	;;    		    (set! needcare #t)))
	;;    	      (begin
	;;    		(set! *runremote* #f)
	;;    		(debug:print-info 0 *default-log-port* "received no homehost information. Please report this to support as it should not happen.")
	;;    		(set! needcare #t)))))
	;;      (if needcare  ;; due to very slow NFS we will do a brute force mkdir to ensure that the directory inode it truly available on this host
	;;    	(let ((logdir (conc top-path "/logs"))) ;; we'll try to create this directory
	;;    	  (handle-exceptions
	;;    	      exn
	;;    	      (debug:print 0 *default-log-port* "Failed to create directory " logdir " expect problems, message: " ((condition-property-accessor 'exn 'message) exn))
	;;    	    (create-directory logdir #t)))))
	;;    	  
	  ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
	  (let loop ((count 0))
	    (if (or (common:file-exists? top-path)
		    (> count 10))
		(change-directory top-path)
		(begin
		  (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " top-path " not found")
		  (thread-sleep! 10)
		  (loop (+ count 1)))))
	  (launch:setup) ;; should be properly in the top-path now
	  (set! tconfigreg (tests:get-all))
	  (let ((sighand (lambda (signum)
			   ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
			   (if (eq? signum signal/stop)
			       (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting."))
			   (set! *time-to-exit* #t)
			   (print "Received signal " signum ", cleaning up before exit. Please wait...")
			   (let ((th1 (make-thread (lambda ()
						     (rmt:test-set-state-status run-id test-id "INCOMPLETE" "KILLED" #f)
						     (print "Killed by signal " signum ". Exiting")
						     (thread-sleep! 1)
						     (exit 1))))
				 (th2 (make-thread (lambda ()
						     (thread-sleep! 2)
						     (debug:print 0 *default-log-port* "Done")
						     (exit 4)))))
			     (thread-start! th2)
			     (thread-start! th1)
			     (thread-join! th2)))))
	    (set-signal-handler! signal/int sighand)
	    (set-signal-handler! signal/term sighand)
	    ) ;; (set-signal-handler! signal/stop sighand)
	  
	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
	  ;;
	  (let* ((test-info (rmt:get-test-info-by-id run-id test-id))
		 (test-host (if test-info
				(db:test-get-host        test-info)
				(begin
				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
				  (exit))))
		 (test-pid  (db:test-get-process_id  test-info)))







	    (cond
	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "KILLREQ" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      ) ;; prime it for running
	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))







|


















<

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<











|
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
|










>
>
>
>
>
>
>







465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490

491
















































492
493
494
495
496
497
498
499
500
501
502
503






504













505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
	       (runname   (assoc/default 'runname   cmdinfo))
	       (megatest  (assoc/default 'megatest  cmdinfo))
	       (runtlim   (assoc/default 'runtlim   cmdinfo))
	       (contour   (assoc/default 'contour   cmdinfo))
	       (item-path (item-list->path itemdat))
	       (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo))
	       (keys      #f)
	       ;; (keyvals   (assoc/default 'keyvals   cmdinfo))
	       (fullrunscript (if (not runscript)
                                  #f
                                  (if (substring-index "/" runscript)
                                      runscript ;; use unadultered if contains slashes
                                      (let ((fulln (conc testpath "/" runscript)))
	                                  (if (and (common:file-exists? fulln)
                                                   (file-execute-access? fulln))
                                              fulln
                                              runscript))))) ;; assume it is on the path
	       ) ;; (rollup-status 0)

	  (if contour (setenv "MT_CONTOUR" contour))
	  
	  ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ...
	  ;;
	  (setenv "MT_TESTSUITENAME" areaname)
	  (setenv "MT_RUN_AREA_HOME" top-path)
	  (set! *toppath* top-path)


















































	  ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
	  (let loop ((count 0))
	    (if (or (common:file-exists? top-path)
		    (> count 10))
		(change-directory top-path)
		(begin
		  (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " top-path " not found")
		  (thread-sleep! 10)
		  (loop (+ count 1)))))
	  (launch:setup) ;; should be properly in the top-path now
	  (set! tconfigreg (tests:get-all))







          (launch:test-execute-exit-handler run-id test-id)













          
	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
	  ;;
	  (let* ((test-info (rmt:get-test-info-by-id run-id test-id))
		 (test-host (if test-info
				(db:test-get-host        test-info)
				(begin
				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
				  (exit))))
		 (test-pid  (db:test-get-process_id  test-info)))
            
            (let ((dat (create-work-area run-id runname target test-id testpath #f test-name itemdat)))
              (set! work-area (car dat)))
            (debug:print-info 2 *default-log-port* "Using work area " work-area)
            
	    (setenv "MT_TEST_RUN_DIR"  work-area)
	    
	    (cond
	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "KILLREQ" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      ) ;; prime it for running
	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
1057
1058
1059
1060
1061
1062
1063
1064

1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
;; 
;;  <linkdir> - <target> - <testname> [ - <itempath> ]
;; 
;; All log file links should be stored relative to the top of link path
;;  
;; <target> - <testname> [ - <itempath> ] 
;;
(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat #!key (remtries 2))

  (let* ((item-path (if (string? itemdat) itemdat (item-list->path itemdat))) ;; if pass in string - just use it
	 (runname   (if (string? run-info) ;; if we pass in a string as run-info use it as run-name.
			run-info
			(db:get-value-by-header (db:get-rows run-info)
						(db:get-header run-info)
						"runname")))
	 (contour   #f) ;; NOT READY FOR THIS (args:get-arg "-contour"))
	 ;; convert back to db: from rdb: - this is always run at server end
	 (target   (string-intersperse (map cadr keyvals) "/"))

	 (not-iterated  (equal? "" item-path))

	 ;; all tests are found at <rundir>/test-base or <linkdir>/test-base
	 (testtop-base (conc target "/" runname "/" testname))
	 (test-base    (conc testtop-base (if not-iterated "" "/") item-path))








|
>
|







|







1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
;; 
;;  <linkdir> - <target> - <testname> [ - <itempath> ]
;; 
;; All log file links should be stored relative to the top of link path
;;  
;; <target> - <testname> [ - <itempath> ] 
;;
(define (create-work-area run-id run-info target test-id test-src-path disk-path-in testname itemdat #!key (tconfig #f)(remtries 2))
  (let* ((disk-path (if disk-path-in disk-path-in (get-best-disk *configdat* tconfig))) ;; NOTE: You'd better have tconfig defined!
	 (item-path (if (string? itemdat) itemdat (item-list->path itemdat))) ;; if pass in string - just use it
	 (runname   (if (string? run-info) ;; if we pass in a string as run-info use it as run-name.
			run-info
			(db:get-value-by-header (db:get-rows run-info)
						(db:get-header run-info)
						"runname")))
	 (contour   #f) ;; NOT READY FOR THIS (args:get-arg "-contour"))
	 ;; convert back to db: from rdb: - this is always run at server end
	 ;; (target   (string-intersperse (map cadr keyvals) "/"))

	 (not-iterated  (equal? "" item-path))

	 ;; all tests are found at <rundir>/test-base or <linkdir>/test-base
	 (testtop-base (conc target "/" runname "/" testname))
	 (test-base    (conc testtop-base (if not-iterated "" "/") item-path))

1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
	    (if (not (eq? status 0))
		(debug:print 2 *default-log-port* "ERROR: problem with running \"" cmd "\"")))
	  (list lnkpathf lnkpath ))
	(if (and test-src-path (> remtries 0))
	    (begin
	      (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries)
	      ;; 
	      (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1)))
	    (list #f #f)))))

;; 1. look though disks list for disk with most space
;; 2. create run dir on disk, path name is meaningful
;; 3. create link from run dir to megatest runs area 
;; 4. remotely run the test on allocated host
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
  (let* ( ;; (lock-key        (conc "test-" test-id))
	;; (got-lock        (let loop ((lock        (rmt:no-sync-get-lock lock-key))
	;; 			     (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
	;; 		    (if (car lock)
	;; 			#t
	;; 			(if (> (current-seconds) expire-time)
	;; 			    (begin
	;; 			      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path)
	;; 			      (rmt:no-sync-del! lock-key) ;; destroy the lock
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
	;; 			    (begin
	;; 			      (thread-sleep! 1)
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time))))))
	 (item-path       (item-list->path itemdat))
	 (contour         #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
    (let loop ((delta        (- (current-seconds) *last-launch*))
	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1)))
      (if (> launch-delay delta)
	  (begin
	    (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
		(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))







|









|

<
<
<
<
<
<
<
<
<
<
<
<
<
|







1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203













1204
1205
1206
1207
1208
1209
1210
1211
	    (if (not (eq? status 0))
		(debug:print 2 *default-log-port* "ERROR: problem with running \"" cmd "\"")))
	  (list lnkpathf lnkpath ))
	(if (and test-src-path (> remtries 0))
	    (begin
	      (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries)
	      ;; 
	      (create-work-area run-id run-info target test-id test-src-path disk-path-in testname itemdat remtries: (- remtries 1)))
	    (list #f #f)))))

;; 1. look though disks list for disk with most space
;; 2. create run dir on disk, path name is meaningful
;; 3. create link from run dir to megatest runs area 
;; 4. remotely run the test on allocated host
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-source-path itemdat params)
  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex













  (let* ((item-path       (item-list->path itemdat))
	 (contour         #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
    (let loop ((delta        (- (current-seconds) *last-launch*))
	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1)))
      (if (> launch-delay delta)
	  (begin
	    (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
		(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
			      (conc (if dir (conc dir "/") "")
				    (case (string->symbol exe)
				      ((dboard)    "../megatest")
				      ((mtest)     "../megatest")
				      ((dashboard) "megatest")
				      (else exe)))))
	   (launcher        (common:get-launcher *configdat* test-name item-path)) ;; (config-lookup *configdat* "jobtools"     "launcher"))
	   (test-sig        (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path
	   (work-area       #f)
	   (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
	   (diskpath   #f)
	   (cmdparms   #f)
	   (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	   (mt-bindir-path #f)
	   (testinfo   (rmt:get-test-info-by-id run-id test-id))
	   (mt_target  (string-intersperse (map cadr keyvals) "/"))
	   (debug-param (append (if (args:get-arg "-debug")  (list "-debug" (args:get-arg "-debug")) '())
				(if (args:get-arg "-logging")(list "-logging") '()))))







|


<







1252
1253
1254
1255
1256
1257
1258
1259
1260
1261

1262
1263
1264
1265
1266
1267
1268
			      (conc (if dir (conc dir "/") "")
				    (case (string->symbol exe)
				      ((dboard)    "../megatest")
				      ((mtest)     "../megatest")
				      ((dashboard) "megatest")
				      (else exe)))))
	   (launcher        (common:get-launcher *configdat* test-name item-path)) ;; (config-lookup *configdat* "jobtools"     "launcher"))
	   (test-sig        (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-source-path is the full path including the item-path
	   (work-area       #f)
	   (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all

	   (cmdparms   #f)
	   (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	   (mt-bindir-path #f)
	   (testinfo   (rmt:get-test-info-by-id run-id test-id))
	   (mt_target  (string-intersperse (map cadr keyvals) "/"))
	   (debug-param (append (if (args:get-arg "-debug")  (list "-debug" (args:get-arg "-debug")) '())
				(if (args:get-arg "-logging")(list "-logging") '()))))
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376

1377
1378
1379
1380
1381
1382
1383
1384
1385

1386
1387

1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
      
      ;; prevent overlapping actions - set to LAUNCHED as early as possible
      ;;
      ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail
      (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f)
      ;; (pp (hash-table->alist tconfig))
      (set! diskpath (get-best-disk *configdat* tconfig))
      (if diskpath
	  (let ((dat  (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat)))
	    (set! work-area (car dat))
	    (set! toptest-work-area (cadr dat))
	    (debug:print-info 2 *default-log-port* "Using work area " work-area))
	  (begin
	    (set! work-area (conc test-path "/tmp_run"))
	    (create-directory work-area #t)
	    (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run")))
      (set! cmdparms (base64:base64-encode 
		      (z3:encode-buffer 
		       (with-output-to-string
			 (lambda () ;; (list 'hosts     hosts)
			   (write (list (list 'testpath  test-path)
					;; (list 'transport (conc *transport-type*))
					;; (list 'serverinf *server-info*)
					(list 'homehost  (let* ((hhdat (common:get-homehost)))
							   (if hhdat
							       (car hhdat)
							       #f)))
					(list 'serverurl (if *runremote*
							     (remote-server-url *runremote*)
							     #f)) ;;
					(list 'areaname  (common:get-testsuite-name))
					(list 'toppath   *toppath*)
					(list 'work-area work-area)
					(list 'test-name test-name) 
					(list 'runscript runscript) 
					(list 'run-id    run-id   )
					(list 'test-id   test-id  )
					;; (list 'item-path item-path )
					(list 'itemdat   itemdat  )
					(list 'megatest  remote-megatest)
					(list 'ezsteps   ezsteps) 
					(list 'target    mt_target)
					(list 'contour   contour)

					(list 'runtlim   (if run-time-limit (common:hms-string->seconds run-time-limit) #f))
					(list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
					(list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
					(list 'runname   runname)
					(list 'mt-bindir-path mt-bindir-path))))))))
      
      ;; clean out step records from previous run if they exist
      ;; (rmt:delete-test-step-records run-id test-id)
      ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway

      (if (common:file-exists? work-area)
	  (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir

      (cond
       ;; ((and launcher hosts) ;; must be using ssh hostname
       ;;    (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
       ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
       (launcher
	(set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
       ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))
       (else
	(if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section"))
	(set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" ""))))))
      ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" "")))))
      (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm"))))
      (debug:print 1 *default-log-port* "Launching " work-area)
      ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done
      (debug:print 4 *default-log-port* "fullcmd: " fullcmd)
      (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible.
      (let* ((commonprevvals (alist->env-vars
			      (hash-table-ref/default *configdat* "env-override" '())))
	     (miscprevvals   (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
			      (append (list (list "MT_TEST_RUN_DIR" work-area)
					    (list "MT_TEST_NAME" test-name)
					    (list "MT_ITEM_INFO" (conc itemdat)) 
					    (list "MT_RUNNAME"   runname)
					    (list "MT_TARGET"    mt_target)
					    (list "MT_ITEMPATH"  item-path)
					    )
				      itemdat)))
	     (testprevvals   (alist->env-vars
			      (hash-table-ref/default tconfig "pre-launch-env-overrides" '())))







|
<
|
|
|
|
<
<
<
<




|











|










>









>
|
|
>












|






|

|







1280
1281
1282
1283
1284
1285
1286
1287

1288
1289
1290
1291




1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
      
      ;; prevent overlapping actions - set to LAUNCHED as early as possible
      ;;
      ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail
      (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f)
      ;; (pp (hash-table->alist tconfig))
;;(if (configf:var-is? *configdat* "setup" "early-setup" "yes")

;;    (let ((dat  (create-work-area run-id run-info keyvals test-id test-source-path #f test-name itemdat tconfig: tconfig)))
;;      (set! work-area (car dat))
;;      (set! toptest-work-area (cadr dat))
;;      (debug:print-info 2 *default-log-port* "Using work area " work-area)))




      (set! cmdparms (base64:base64-encode 
		      (z3:encode-buffer 
		       (with-output-to-string
			 (lambda () ;; (list 'hosts     hosts)
			   (write (list (list 'testpath  test-source-path)
					;; (list 'transport (conc *transport-type*))
					;; (list 'serverinf *server-info*)
					(list 'homehost  (let* ((hhdat (common:get-homehost)))
							   (if hhdat
							       (car hhdat)
							       #f)))
					(list 'serverurl (if *runremote*
							     (remote-server-url *runremote*)
							     #f)) ;;
					(list 'areaname  (common:get-testsuite-name))
					(list 'toppath   *toppath*)
					;; (list 'work-area work-area)
					(list 'test-name test-name) 
					(list 'runscript runscript) 
					(list 'run-id    run-id   )
					(list 'test-id   test-id  )
					;; (list 'item-path item-path )
					(list 'itemdat   itemdat  )
					(list 'megatest  remote-megatest)
					(list 'ezsteps   ezsteps) 
					(list 'target    mt_target)
					(list 'contour   contour)
                                        ;; (list 'keyvals   keyvals)
					(list 'runtlim   (if run-time-limit (common:hms-string->seconds run-time-limit) #f))
					(list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
					(list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
					(list 'runname   runname)
					(list 'mt-bindir-path mt-bindir-path))))))))
      
      ;; clean out step records from previous run if they exist
      ;; (rmt:delete-test-step-records run-id test-id)
      ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway
      
      ;; (if (and work-area (common:file-exists? work-area))
      ;;    (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir

      (cond
       ;; ((and launcher hosts) ;; must be using ssh hostname
       ;;    (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
       ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
       (launcher
	(set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
       ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))
       (else
	(if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section"))
	(set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" ""))))))
      ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" "")))))
      (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm"))))
      ;; (debug:print 1 *default-log-port* "Launching " work-area)
      ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done
      (debug:print 4 *default-log-port* "fullcmd: " fullcmd)
      (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible.
      (let* ((commonprevvals (alist->env-vars
			      (hash-table-ref/default *configdat* "env-override" '())))
	     (miscprevvals   (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
			      (append (list ;; (list "MT_TEST_RUN_DIR" (if work-area work-area "no-test-run-area-set-yet"))
					    (list "MT_TEST_NAME" test-name)
					    (list "MT_ITEM_INFO" (conc itemdat)) ;; GET RID OF THIS ONE
					    (list "MT_RUNNAME"   runname)
					    (list "MT_TARGET"    mt_target)
					    (list "MT_ITEMPATH"  item-path)
					    )
				      itemdat)))
	     (testprevvals   (alist->env-vars
			      (hash-table-ref/default tconfig "pre-launch-env-overrides" '())))