Megatest

Check-in [9d5c51a64f]
Login
Overview
Comment:corrected rmt:general-call call
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.65-telemetry
Files: files | file ages | folders
SHA1: 9d5c51a64fdbdabd04836bc503fcdfc5da588613
User & Date: bjbarcla on 2019-02-26 17:01:06
Other Links: branch diff | manifest | tags
Context
2019-02-27
17:14
cleaned up debug messages check-in: 93a64cc658 user: bjbarcla tags: v1.65-telemetry
2019-02-26
17:01
corrected rmt:general-call call check-in: 9d5c51a64f user: bjbarcla tags: v1.65-telemetry
15:23
updated so event_time is changed on REMOTEHOSTSTART... should prevent find-and-mark-incomplete from incorrectly marking running tests as dead check-in: e49f28b392 user: bjbarcla tags: v1.65-telemetry
Changes

Modified launch.scm from [b955ebc549] to [7d23ee6c29].

375
376
377
378
379
380
381

382
383
384
385
386
387
388
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389







+







			    (- 
			     (current-seconds) 
			     start-seconds)))))
	 (kill-tries 0))
    ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
    ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
    (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)

    (let loop ((minutes   (calc-minutes))
	       (cpu-load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
	       (disk-free (get-df (current-directory)))
               (last-sync (current-seconds)))
      (BB> "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync)
      (let* ((over-time     (> (current-seconds) (+ last-sync update-period)))
             (new-cpu-load  (let* ((load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
624
625
626
627
628
629
630
631

632
633
634
635
636
637
638
639
640

641
642
643
644
645
646
647
625
626
627
628
629
630
631

632
633
634
635
636
637
638
639
640

641
642
643
644
645
646
647
648







-
+








-
+







		 (test-pid  (db:test-get-process_id  test-info)))
	    (cond
             ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")

              (db:general-call dbstruct 'set-test-start-time (list test-id))
              (rmt:general-call 'set-test-start-time #f test-id)
              (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      ) ;; prime it for running
	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
	      (if (process:alive-on-host? test-host test-pid)
		  (debug:print-error 0 *default-log-port* "test state is "  (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")
		  (exit)))
	     ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
              (db:general-call dbstruct 'set-test-start-time (list test-id))
              (rmt:general-call 'set-test-start-time #f test-id)
	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      )
	     (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))
	      (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed")
	      (exit))))
	  
	  (debug:print 2 *default-log-port* "Executing " test-name " (id: " test-id ") on " (get-host-name))