Megatest

Check-in [7aef4f4738]
Login
Overview
Comment:Do not run a test if a test run process lands where the state is KILLREQ. Also added a message when problems are discovered from the test copy.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.64
Files: files | file ages | folders
SHA1: 7aef4f47383a714e0a8e6d4a14359dfa4f20e9c6
User & Date: mrwellan on 2017-08-23 10:54:19
Other Links: branch diff | manifest | tags
Context
2017-08-23
11:58
Fixed couple issues with test-path handling. check-in: f8fd4a37c9 user: mrwellan tags: v1.64, v1.6429
10:54
Do not run a test if a test run process lands where the state is KILLREQ. Also added a message when problems are discovered from the test copy. check-in: 7aef4f4738 user: mrwellan tags: v1.64
09:41
Bump version to v1.6429 check-in: b72e75b4c5 user: mrwellan tags: v1.64
Changes

Modified launch.scm from [3e0171923e] to [77598ee90c].

523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538



539
540
541
542
543
544
545
546
		    (> count 10))
		(change-directory work-area)
		(begin
		  (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found")
		  (thread-sleep! 10)
		  (loop (+ count 1)))))
	  ;; spot check that the files in testpath are available. Too often NFS delays cause problems here.
	  (let ((files   (glob (conc testpath "/*")))
		(allgood #t))
	    (for-each
	     (lambda (fullname)
	       (let* ((fname (pathname-strip-directory fullname)))
		 (if (not (file-exists? fname))
		     (set! allgood #f))))
	     files)
	    (if (not allgood)



		(launch:test-copy testpath work-area)))
		 
	  (launch:setup) ;; should be properly in the top-path now
	  (set! tconfigreg (tests:get-all))
	  (let ((sighand (lambda (signum)
			   ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
			   (if (eq? signum signal/stop)
			       (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting."))







|
|




|

|
>
>
>
|







523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
		    (> count 10))
		(change-directory work-area)
		(begin
		  (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found")
		  (thread-sleep! 10)
		  (loop (+ count 1)))))
	  ;; spot check that the files in testpath are available. Too often NFS delays cause problems here.
	  (let ((files      (glob (conc testpath "/*")))
		(bad-files '()))
	    (for-each
	     (lambda (fullname)
	       (let* ((fname (pathname-strip-directory fullname)))
		 (if (not (file-exists? fname))
		     (set! bad-files (cons fname bad-files)))))
	     files)
	    (if (not (null? bad-files))
                (begin
                  (debug:print 0 *default-log-port* "INFO: test data from " testpath " not copied properly or filesystem problems causing data to not be found. Re-running the copy command.")
                  (debug:print 0 *default-log-port* "INFO: missing files from test run area: " (string-intersperse bad-files ", "))
                  (launch:test-copy testpath work-area))))
		 
	  (launch:setup) ;; should be properly in the top-path now
	  (set! tconfigreg (tests:get-all))
	  (let ((sighand (lambda (signum)
			   ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
			   (if (eq? signum signal/stop)
			       (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting."))
569
570
571
572
573
574
575

576
577
578
579
580
581
582
583
		 (test-host (if test-info
				(db:test-get-host        test-info)
				(begin
				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
				  (exit))))
		 (test-pid  (db:test-get-process_id  test-info)))
	    (cond

	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "KILLREQ" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      ) ;; prime it for running
	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
	      (if (process:alive-on-host? test-host test-pid)
		  (debug:print-error 0 *default-log-port* "test state is "  (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")







>
|







572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
		 (test-host (if test-info
				(db:test-get-host        test-info)
				(begin
				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
				  (exit))))
		 (test-pid  (db:test-get-process_id  test-info)))
	    (cond
             ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      ) ;; prime it for running
	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
	      (if (process:alive-on-host? test-host test-pid)
		  (debug:print-error 0 *default-log-port* "test state is "  (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")