Megatest

Diff
Login

Differences From Artifact [f3b05e9850]:

To Artifact [7a5f293c75]:


69
70
71
72
73
74
75

76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96

97
98
99
100
101
102
103
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96

97
98
99
100
101
102
103
104







+




















-
+







	       (test-id   (assoc/default 'test-id   cmdinfo))
	       (target    (assoc/default 'target    cmdinfo))
	       (itemdat   (assoc/default 'itemdat   cmdinfo))
	       (env-ovrd  (assoc/default 'env-ovrd  cmdinfo))
	       (set-vars  (assoc/default 'set-vars  cmdinfo)) ;; pre-overrides from -setvar
	       (runname   (assoc/default 'runname   cmdinfo))
	       (megatest  (assoc/default 'megatest  cmdinfo))
	       (runtlim   (assoc/default 'runtlim   cmdinfo))
	       (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo))
	       (keys      #f)
	       (keyvals   #f)
	       (fullrunscript (if (not runscript)
                                  #f
                                  (if (substring-index "/" runscript)
                                      runscript ;; use unadultered if contains slashes
                                      (let ((fulln (conc testpath "/" runscript)))
	                                  (if (and (file-exists? fulln)
                                                   (file-execute-access? fulln))
                                              fulln
                                              runscript))))) ;; assume it is on the path
	       (rollup-status 0))
	  (change-directory top-path)
	  (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name))
	  ;; Setup the *runremote* global var
	  (if *runremote* (debug:print 2 "ERROR: I'm not expecting *runremote* to be set at this time"))
	  ;; (set! *runremote* runremote)
	  (set! *transport-type* (string->symbol transport))
	  (set! keys       (cdb:remote-run db:get-keys #f))
	  (set! keyvals    (if run-id (cdb:remote-run db:get-key-vals #f run-id) #f))
	  (set! keyvals    (keys:target->keyval keys target))
	  ;; apply pre-overrides before other variables. The pre-override vars must not
	  ;; clobbers things from the official sources such as megatest.config and runconfigs.config
	  (if (string? set-vars)
	      (let ((varpairs (string-split set-vars ",")))
		(debug:print 4 "varpairs: " varpairs)
		(map (lambda (varpair)
		       (let ((varval (string-split varpair "=")))
124
125
126
127
128
129
130
131

132
133
134
135
136
137
138
125
126
127
128
129
130
131

132
133
134
135
136
137
138
139







-
+







	  ;; Can setup as client for server mode now
	  ;; (client:setup)

	  (change-directory *toppath*) 
	  (set-megatest-env-vars run-id) ;; these may be needed by the launching process
	  (change-directory work-area) 

	  (set-run-config-vars run-id keys keyvals target) ;; (db:get-target db run-id))
	  (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id))
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
	  (set-megatest-env-vars run-id)
	  (set-item-env-vars itemdat)
	  (save-environment-as-files "megatest")
	  ;; open-run-close not needed for test-set-meta-info
	  (tests:set-meta-info #f test-id run-id test-name itemdat 0 work-area)
272
273
274
275
276
277
278
279








280
281
282
283
284
285
286

287
288
289
290
291
292
293
294
295
296
297
298
299
300
301















302
303
304
305
306
307
308
273
274
275
276
277
278
279

280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295















296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317







-
+
+
+
+
+
+
+
+







+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







							  (round 
							   (- 
							    (current-seconds) 
							    start-seconds)))))
					(kill-tries 0))
				   (let loop ((minutes   (calc-minutes)))
				     (begin
				       (set! kill-job? (test-get-kill-request test-id)) ;; run-id test-name itemdat))
				       (set! kill-job? (or (test-get-kill-request test-id) ;; run-id test-name itemdat))
							   (and runtlim (let* ((run-seconds   (- (current-seconds) start-seconds))
									       (time-exceeded (> run-seconds runtlim)))
									  (if time-exceeded
									      (begin
										(debug:print-info 0 "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim)
										#t)
									      #f)))))
				       ;; open-run-close not needed for test-set-meta-info
				       (tests:set-meta-info #f test-id run-id test-name itemdat minutes work-area)
				       (if kill-job? 
					   (begin
					     (mutex-lock! m)
					     (let* ((pid (vector-ref exit-info 0)))
					       (if (number? pid)
						   (process-signal pid signal/kill)
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job (attempt # " kill-tries ")")
						     (let ((processes (cmd-run->list (conc "pgrep -l -P " pid))))
						       (for-each 
							(lambda (p)
							  (let* ((parts  (string-split p))
								 (p-id   (if (> (length parts) 0)
									     (string->number (car parts))
									     #f)))
							    (if p-id
								(begin
								  (debug:print 0 "Killing " (cadr parts) "; kill -9  " p-id)
								  (system (conc "kill -9 " p-id))))))
							(car processes))
						       (system (conc "kill -9 -" pid))))
						   ;; (begin
						   ;;   (debug:print 0 "WARNING: Request received to kill job (attempt # " kill-tries ")")
						   ;;   (let ((processes (cmd-run->list (conc "pgrep -l -P " pid))))
						   ;;     (for-each 
						   ;;      (lambda (p)
						   ;;        (let* ((parts  (string-split p))
						   ;;      	 (p-id   (if (> (length parts) 0)
						   ;;      		     (string->number (car parts))
						   ;;      		     #f)))
						   ;;          (if p-id
						   ;;      	(begin
						   ;;      	  (debug:print 0 "Killing " (cadr parts) "; kill -9  " p-id)
						   ;;      	  (system (conc "kill -9 " p-id))))))
						   ;;      (car processes))
						   ;;     (system (conc "kill -9 -" pid))))
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
						     (tests:test-set-status! test-id "KILLED"  "FAIL"
								     (args:get-arg "-m") #f)
						     (sqlite3:finalize! tdb)
						     (exit 1))))
					     (set! kill-tries (+ 1 kill-tries))
404
405
406
407
408
409
410
411

412
413
414
415
416
417

418
419
420
421
422
423
424
413
414
415
416
417
418
419

420
421
422
423
424
425

426
427
428
429
430
431
432
433







-
+





-
+







;; 
;;  <linkdir> - <target> - <testname> [ - <itempath> ]
;; 
;; All log file links should be stored relative to the top of link path
;;  
;; <target> - <testname> [ - <itempath> ] 
;;
(define (create-work-area run-id run-info key-vals test-id test-src-path disk-path testname itemdat)
(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat)
  (let* ((item-path (item-list->path itemdat))
	 (runname  (db:get-value-by-header (db:get-row run-info)
					   (db:get-header run-info)
					   "runname"))
	 ;; convert back to db: from rdb: - this is always run at server end
	 (target   (string-intersperse key-vals "/"))
	 (target   (string-intersperse (map cadr keyvals) "/"))

	 (not-iterated  (equal? "" item-path))

	 ;; all tests are found at <rundir>/test-base or <linkdir>/test-base
	 (testtop-base (conc target "/" runname "/" testname))
	 (test-base    (conc testtop-base (if not-iterated "" "/") item-path))

552
553
554
555
556
557
558
559

560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575







576

577
578
579
580
581
582
583
561
562
563
564
565
566
567

568
569
570
571
572
573
574
575
576
577







578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593







-
+









-
-
-
-
-
-
-
+
+
+
+
+
+
+

+







;; 1. look though disks list for disk with most space
;; 2. create run dir on disk, path name is meaningful
;; 3. create link from run dir to megatest runs area 
;; 4. remotely run the test on allocated host
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test test-id run-id run-info key-vals runname test-conf keyvallst test-name test-path itemdat params)
(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
  (change-directory *toppath*)
  (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
   (list ;; (list "MT_TEST_RUN_DIR" work-area)
    (list "MT_RUN_AREA_HOME" *toppath*)
    (list "MT_TEST_NAME" test-name)
    ;; (list "MT_ITEM_INFO" (conc itemdat)) 
    (list "MT_RUNNAME"   runname)
    ;; (list "MT_TARGET"    mt_target)
    ))
  (let* ((useshell   (config-lookup *configdat* "jobtools"     "useshell"))
	 (launcher   (config-lookup *configdat* "jobtools"     "launcher"))
	 (runscript  (config-lookup test-conf   "setup"        "runscript"))
	 (ezsteps    (> (length (hash-table-ref/default test-conf "ezsteps" '())) 0)) ;; don't send all the steps, could be big
	 (diskspace  (config-lookup test-conf   "requirements" "diskspace"))
	 (memory     (config-lookup test-conf   "requirements" "memory"))
	 (hosts      (config-lookup *configdat* "jobtools"     "workhosts"))
  (let* ((useshell        (config-lookup *configdat* "jobtools"     "useshell"))
	 (launcher        (config-lookup *configdat* "jobtools"     "launcher"))
	 (runscript       (config-lookup test-conf   "setup"        "runscript"))
	 (ezsteps         (> (length (hash-table-ref/default test-conf "ezsteps" '())) 0)) ;; don't send all the steps, could be big
	 (diskspace       (config-lookup test-conf   "requirements" "diskspace"))
	 (memory          (config-lookup test-conf   "requirements" "memory"))
	 (hosts           (config-lookup *configdat* "jobtools"     "workhosts"))
	 (remote-megatest (config-lookup *configdat* "setup" "executable"))
	 (run-time-limit  (configf:lookup  test-conf   "requirements" "runtimelim"))
	 ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to 
	 ;;                allow running from dashboard. Extract the path
	 ;;                from the called megatest and convert dashboard
	 ;;             	  or dboard to megatest
	 (local-megatest  (let* ((lm  (car (argv)))
				 (dir (pathname-directory lm))
				 (exe (pathname-strip-directory lm)))
593
594
595
596
597
598
599
600

601
602
603
604
605
606
607
608
609
610
611

612
613
614
615
616
617
618
603
604
605
606
607
608
609

610
611
612
613
614
615
616
617
618
619
620

621
622
623
624
625
626
627
628







-
+










-
+







	 (diskpath   #f)
	 (cmdparms   #f)
	 (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	 (mt-bindir-path #f)
	 (item-path (item-list->path itemdat))
	 ;; (test-id    (cdb:remote-run db:get-test-id #f run-id test-name item-path))
	 (testinfo   (cdb:get-test-info-by-id *runremote* test-id))
	 (mt_target  (string-intersperse (map cadr keyvallst) "/"))
	 (mt_target  (string-intersperse (map cadr keyvals) "/"))
	 (debug-param (append (if (args:get-arg "-debug")  (list "-debug" (args:get-arg "-debug")) '())
			      (if (args:get-arg "-logging")(list "-logging") '()))))
    (if hosts (set! hosts (string-split hosts)))
    ;; set the megatest to be called on the remote host
    (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
    (set! mt-bindir-path (pathname-directory remote-megatest))
    (if launcher (set! launcher (string-split launcher)))
    ;; set up the run work area for this test
    (set! diskpath (get-best-disk *configdat*))
    (if diskpath
	(let ((dat  (create-work-area run-id run-info key-vals test-id test-path diskpath test-name itemdat)))
	(let ((dat  (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat)))
	  (set! work-area (car dat))
	  (set! toptest-work-area (cadr dat))
	  (debug:print-info 2 "Using work area " work-area))
	(begin
	  (set! work-area (conc test-path "/tmp_run"))
	  (create-directory work-area #t)
	  (debug:print 0 "WARNING: No disk work area specified - running in the test directory under tmp_run")))
629
630
631
632
633
634
635

636
637
638
639

640
641
642
643
644
645
646
639
640
641
642
643
644
645
646
647
648
649

650
651
652
653
654
655
656
657







+



-
+







				     (list 'runscript runscript) 
				     (list 'run-id    run-id   )
				     (list 'test-id   test-id  )
				     (list 'itemdat   itemdat  )
				     (list 'megatest  remote-megatest)
				     (list 'ezsteps   ezsteps) 
				     (list 'target    mt_target)
				     (list 'runtlim   (if run-time-limit (common:hms-string->seconds run-time-limit) #f))
				     (list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
				     (list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
				     (list 'runname   runname)
				     (list 'mt-bindir-path mt-bindir-path))))))) ;; (string-intersperse keyvallst " "))))
				     (list 'mt-bindir-path mt-bindir-path)))))))
    ;; clean out step records from previous run if they exist
    ;; (debug:print-info 4 "FIXMEEEEE!!!! This can be removed some day, perhaps move all test records to the test db?")
    ;; (open-run-close db:delete-test-step-records db test-id)
    (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir
    (tests:test-set-status! test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
    (cond
     ((and launcher hosts) ;; must be using ssh hostname
665
666
667
668
669
670
671

672


673
674
675
676
677
678
679
680
681
682
683
684
685








686
687
688
689
690
691
692
676
677
678
679
680
681
682
683

684
685
686
687
688
689
690
691
692
693





694
695
696
697
698
699
700
701
702
703
704
705
706
707
708







+
-
+
+








-
-
-
-
-
+
+
+
+
+
+
+
+







			    (append (list (list "MT_TEST_RUN_DIR" work-area)
					  (list "MT_TEST_NAME" test-name)
					  (list "MT_ITEM_INFO" (conc itemdat)) 
					  (list "MT_RUNNAME"   runname)
					  (list "MT_TARGET"    mt_target)
					  )
				    itemdat)))
	   ;; Launchwait defaults to true, must override it to turn off wait
	   (launch-results (apply (if (equal? (configf:lookup *configdat* "setup" "launchwait") "yes")
	   (launchwait     (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t))
	   (launch-results (apply (if launchwait
				      cmd-run-with-stderr->list
				      process-run)
				  (if useshell
				      (string-intersperse fullcmd " ")
				      (car fullcmd))
				  (if useshell
				      '()
				      (cdr fullcmd)))))
      (if (list? launch-results)
	  (with-output-to-file "mt_launch.log"
	    (lambda ()
	      (apply print launch-results))
	    #:append))
      (if (not launchwait) ;; give the OS a little time to allow the process to start
	  (thread-sleep! 0.01))
      (with-output-to-file "mt_launch.log"
	(lambda ()
	  (if (list? launch-results)
	      (apply print launch-results)
	      (print "NOTE: launched \"" fullcmd "\"\n  but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n  if you have problems with this"))
	  #:append))
      (debug:print 2 "Launching completed, updating db")
      (debug:print 2 "Launch results: " launch-results)
      (if (not launch-results)
          (begin
            (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now")
            ;; (sqlite3:finalize! db)
            ;; good ole "exit" seems not to work