Megatest

Check-in [7aef4f4738]
Login
Overview
Comment:Do not run a test if a test run process lands where the state is KILLREQ. Also added a message when problems are discovered from the test copy.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.64
Files: files | file ages | folders
SHA1: 7aef4f47383a714e0a8e6d4a14359dfa4f20e9c6
User & Date: mrwellan on 2017-08-23 10:54:19
Other Links: branch diff | manifest | tags
Context
2017-08-23
11:58
Fixed couple issues with test-path handling. check-in: f8fd4a37c9 user: mrwellan tags: v1.64, v1.6429
10:54
Do not run a test if a test run process lands where the state is KILLREQ. Also added a message when problems are discovered from the test copy. check-in: 7aef4f4738 user: mrwellan tags: v1.64
09:41
Bump version to v1.6429 check-in: b72e75b4c5 user: mrwellan tags: v1.64
Changes

Modified launch.scm from [3e0171923e] to [77598ee90c].

523
524
525
526
527
528
529
530
531


532
533
534
535
536

537
538
539





540
541
542
543
544
545
546
523
524
525
526
527
528
529


530
531
532
533
534
535

536
537


538
539
540
541
542
543
544
545
546
547
548
549







-
-
+
+




-
+

-
-
+
+
+
+
+







		    (> count 10))
		(change-directory work-area)
		(begin
		  (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found")
		  (thread-sleep! 10)
		  (loop (+ count 1)))))
	  ;; spot check that the files in testpath are available. Too often NFS delays cause problems here.
	  (let ((files   (glob (conc testpath "/*")))
		(allgood #t))
	  (let ((files      (glob (conc testpath "/*")))
		(bad-files '()))
	    (for-each
	     (lambda (fullname)
	       (let* ((fname (pathname-strip-directory fullname)))
		 (if (not (file-exists? fname))
		     (set! allgood #f))))
		     (set! bad-files (cons fname bad-files)))))
	     files)
	    (if (not allgood)
		(launch:test-copy testpath work-area)))
	    (if (not (null? bad-files))
                (begin
                  (debug:print 0 *default-log-port* "INFO: test data from " testpath " not copied properly or filesystem problems causing data to not be found. Re-running the copy command.")
                  (debug:print 0 *default-log-port* "INFO: missing files from test run area: " (string-intersperse bad-files ", "))
                  (launch:test-copy testpath work-area))))
		 
	  (launch:setup) ;; should be properly in the top-path now
	  (set! tconfigreg (tests:get-all))
	  (let ((sighand (lambda (signum)
			   ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
			   (if (eq? signum signal/stop)
			       (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting."))
569
570
571
572
573
574
575

576

577
578
579
580
581
582
583
572
573
574
575
576
577
578
579

580
581
582
583
584
585
586
587







+
-
+







		 (test-host (if test-info
				(db:test-get-host        test-info)
				(begin
				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
				  (exit))))
		 (test-pid  (db:test-get-process_id  test-info)))
	    (cond
             ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "KILLREQ" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      ) ;; prime it for running
	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
	      (if (process:alive-on-host? test-host test-pid)
		  (debug:print-error 0 *default-log-port* "test state is "  (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")