Megatest

Diff
Login

Differences From Artifact [4d606e4f63]:

To Artifact [dd33d34146]:


446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
	       (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo))
	       (keys      #f)
	       (keyvals   #f)
	       (fullrunscript (if (not runscript)
                                  #f
                                  (if (substring-index "/" runscript)
                                      runscript ;; use unadultered if contains slashes
                                      (let ((fulln (conc testpath "/" runscript)))
	                                  (if (and (common:file-exists? fulln)
                                                   (file-execute-access? fulln))
                                              fulln
                                              runscript))))) ;; assume it is on the path
	       ) ;; (rollup-status 0)

	  (if contour (setenv "MT_CONTOUR" contour))







|







446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
	       (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo))
	       (keys      #f)
	       (keyvals   #f)
	       (fullrunscript (if (not runscript)
                                  #f
                                  (if (substring-index "/" runscript)
                                      runscript ;; use unadultered if contains slashes
                                      (let ((fulln (conc work-area "/" runscript)))
	                                  (if (and (common:file-exists? fulln)
                                                   (file-execute-access? fulln))
                                              fulln
                                              runscript))))) ;; assume it is on the path
	       ) ;; (rollup-status 0)

	  (if contour (setenv "MT_CONTOUR" contour))
512
513
514
515
516
517
518
519
520
521
522
523
524
525




























526
527
528
529
530
531
532
	;;    	  (handle-exceptions
	;;    	      exn
	;;    	      (debug:print 0 *default-log-port* "Failed to create directory " logdir " expect problems, message: " ((condition-property-accessor 'exn 'message) exn))
	;;    	    (create-directory logdir #t)))))
	;;    	  
	  ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
	  (let loop ((count 0))
	    (if (or (common:file-exists? top-path)
		    (> count 10))
		(change-directory top-path)
		(begin
		  (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " top-path " not found")
		  (thread-sleep! 10)
		  (loop (+ count 1)))))




























	  (launch:setup) ;; should be properly in the top-path now
	  (set! tconfigreg (tests:get-all))
	  (let ((sighand (lambda (signum)
			   ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
			   (if (eq? signum signal/stop)
			       (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting."))
			   (set! *time-to-exit* #t)







|

|

|


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
	;;    	  (handle-exceptions
	;;    	      exn
	;;    	      (debug:print 0 *default-log-port* "Failed to create directory " logdir " expect problems, message: " ((condition-property-accessor 'exn 'message) exn))
	;;    	    (create-directory logdir #t)))))
	;;    	  
	  ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
	  (let loop ((count 0))
	    (if (or (common:directory-exists? work-area)
		    (> count 10))
		(change-directory work-area)
		(begin
		  (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found")
		  (thread-sleep! 10)
		  (loop (+ count 1)))))

          (if (not (string=?  (common:real-path work-area)(common:real-path (current-directory))))
              (begin
                (debug:print 0 *default-log-port*
                             "INFO: we are expecting to be in directory " work-area "\n"
                             "     but we are actually in the directory " (current-directory) "\n"
                             "     doing another change dir.")
                (change-directory work-area)))
          
	  ;; spot check that the files in testpath are available. Too often NFS delays cause problems here.
	  (let ((files      (glob (conc testpath "/*")))
		(bad-files '()))
	    (for-each
	     (lambda (fullname)
	       (let* ((fname (pathname-strip-directory fullname))
                      (targn (conc work-area "/" fname)))
		 (if (not (file-exists? targn))
		     (set! bad-files (cons fname bad-files)))))
	     files)
	    (if (not (null? bad-files))
                (begin
                  (debug:print 0 *default-log-port* "INFO: test data from " testpath " not copied properly or filesystem problems causing data to not be found. Re-running the copy command.")
                  (debug:print 0 *default-log-port* "INFO: missing files from " work-area ": " (string-intersperse bad-files ", "))
                  (launch:test-copy testpath work-area))))

          ;; one more time, change to the work-area directory
          (change-directory work-area)
          
	  (launch:setup) ;; should be properly in the top-path now
	  (set! tconfigreg (tests:get-all))
	  (let ((sighand (lambda (signum)
			   ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
			   (if (eq? signum signal/stop)
			       (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting."))
			   (set! *time-to-exit* #t)
554
555
556
557
558
559
560

561
562
563
564
565
566
567
568
		 (test-host (if test-info
				(db:test-get-host        test-info)
				(begin
				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
				  (exit))))
		 (test-pid  (db:test-get-process_id  test-info)))
	    (cond

	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "KILLREQ" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      ) ;; prime it for running
	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
	      (if (process:alive-on-host? test-host test-pid)
		  (debug:print-error 0 *default-log-port* "test state is "  (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")







>
|







582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
		 (test-host (if test-info
				(db:test-get-host        test-info)
				(begin
				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
				  (exit))))
		 (test-pid  (db:test-get-process_id  test-info)))
	    (cond
             ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      ) ;; prime it for running
	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
	      (if (process:alive-on-host? test-host test-pid)
		  (debug:print-error 0 *default-log-port* "test state is "  (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")
1064
1065
1066
1067
1068
1069
1070
















1071
1072
1073
1074
1075
1076
1077
	(let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb
	  (if res
	      (cdr res)
	      (begin
		(if (common:low-noise-print 20 "No valid disks or no disk with enough space")
		    (debug:print-error 0 *default-log-port* "No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists and has enough space!\n    You can change minspace in the [setup] section of megatest.config. Current setting is: " minspace))
		(exit 1))))))) ;; TODO - move the exit to the calling location and return #f

















;; Desired directory structure:
;;
;;  <linkdir> - <target> - <testname> -.
;;                                     |
;;                                     v
;;  <rundir>  -  <target>  -    <testname> -|- <itempath(s)>







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
	(let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb
	  (if res
	      (cdr res)
	      (begin
		(if (common:low-noise-print 20 "No valid disks or no disk with enough space")
		    (debug:print-error 0 *default-log-port* "No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists and has enough space!\n    You can change minspace in the [setup] section of megatest.config. Current setting is: " minspace))
		(exit 1))))))) ;; TODO - move the exit to the calling location and return #f

(define (launch:test-copy test-src-path test-path)
  (let* ((ovrcmd (let ((cmd (config-lookup *configdat* "setup" "testcopycmd")))
		   (if cmd
		       ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH
		       (string-substitute "TEST_TARG_PATH" test-path
					  (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t)
		       #f)))
	 (cmd    (if ovrcmd 
		     ovrcmd
		     (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/"
			   " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log")))
	 (status (system cmd)))
    (if (not (eq? status 0))
	(debug:print 2 *default-log-port* "ERROR: problem with running \"" cmd "\""))))


;; Desired directory structure:
;;
;;  <linkdir> - <target> - <testname> -.
;;                                     |
;;                                     v
;;  <rundir>  -  <target>  -    <testname> -|- <itempath(s)>
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
	   (if (not (common:file-exists? lnktarget)) (create-symbolic-link test-path lnktarget)))))

    (if (not (directory? test-path))
	(create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes

    (if (and test-src-path (directory? test-path))
	(begin
	  (let* ((ovrcmd (let ((cmd (config-lookup *configdat* "setup" "testcopycmd")))
			   (if cmd
			       ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH
			       (string-substitute "TEST_TARG_PATH" test-path
						  (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t)
			       #f)))
		 (cmd    (if ovrcmd 
			     ovrcmd
			     (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/"
				   " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log")))
		 (status (system cmd)))
	    (if (not (eq? status 0))
		(debug:print 2 *default-log-port* "ERROR: problem with running \"" cmd "\"")))
	  (list lnkpathf lnkpath ))
	(if (and test-src-path (> remtries 0))
	    (begin
	      (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries)
	      ;; 
	      (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1)))
	    (list #f #f)))))







<
<
<
<
|
<
<
<
<
<
<
<
<







1274
1275
1276
1277
1278
1279
1280




1281








1282
1283
1284
1285
1286
1287
1288
	   (if (not (common:file-exists? lnktarget)) (create-symbolic-link test-path lnktarget)))))

    (if (not (directory? test-path))
	(create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes

    (if (and test-src-path (directory? test-path))
	(begin




	  (launch:test-copy test-src-path test-path)








	  (list lnkpathf lnkpath ))
	(if (and test-src-path (> remtries 0))
	    (begin
	      (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries)
	      ;; 
	      (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1)))
	    (list #f #f)))))