Megatest

Check-in [cda53f40cf]
Login
Overview
Comment:open-run-close related refactoring
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | refactor
Files: files | file ages | folders
SHA1: cda53f40cf6e7052e2a004f302a7f22f39f4dd1e
User & Date: mrwellan on 2013-05-01 14:13:43
Other Links: branch diff | manifest | tags
Context
2013-05-01
16:02
Refactoring; open-run-close calls, remote calls check-in: dbcada9e2e user: mrwellan tags: refactor
14:13
open-run-close related refactoring check-in: cda53f40cf user: mrwellan tags: refactor
12:04
Refactored more open-run-close calls check-in: de3a20e5ec user: mrwellan tags: refactor
Changes

Modified launch.scm from [b383329c45] to [b5de4098a1].

404
405
406
407
408
409
410
411

412
413

414
415
416
417
418
419
420
404
405
406
407
408
409
410

411


412
413
414
415
416
417
418
419







-
+
-
-
+







;; 
;;  <linkdir> - <target> - <testname> [ - <itempath> ]
;; 
;; All log file links should be stored relative to the top of link path
;;  
;; <target> - <testname> [ - <itempath> ] 
;;
(define (create-work-area db run-id test-id test-src-path disk-path testname itemdat)
(define (create-work-area run-id run-info key-vals test-id test-src-path disk-path testname itemdat)
  (let* ((run-info (cdb:remote-run db:get-run-info #f run-id))
	 (item-path (item-list->path itemdat))
  (let* ((item-path (item-list->path itemdat))
	 (runname  (db:get-value-by-header (db:get-row run-info)
					   (db:get-header run-info)
					   "runname"))
	 ;; convert back to db: from rdb: - this is always run at server end
	 (key-vals (cdb:remote-run db:get-key-vals #f run-id))
	 (target   (string-intersperse key-vals "/"))

554
555
556
557
558
559
560
561

562
563
564
565
566
567
568
553
554
555
556
557
558
559

560
561
562
563
564
565
566
567







-
+







;; 1. look though disks list for disk with most space
;; 2. create run dir on disk, path name is meaningful
;; 3. create link from run dir to megatest runs area 
;; 4. remotely run the test on allocated host
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test db run-id runname test-conf keyvallst test-name test-path itemdat params)
(define (launch-test run-id run-info key-vals runname test-conf keyvallst test-name test-path itemdat params)
  (change-directory *toppath*)
  (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
   (list ;; (list "MT_TEST_RUN_DIR" work-area)
    (list "MT_RUN_AREA_HOME" *toppath*)
    (list "MT_TEST_NAME" test-name)
    ;; (list "MT_ITEM_INFO" (conc itemdat)) 
    (list "MT_RUNNAME"   runname)
606
607
608
609
610
611
612
613

614
615
616
617
618
619
620
605
606
607
608
609
610
611

612
613
614
615
616
617
618
619







-
+







    ;; set the megatest to be called on the remote host
    (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
    (set! mt-bindir-path (pathname-directory remote-megatest))
    (if launcher (set! launcher (string-split launcher)))
    ;; set up the run work area for this test
    (set! diskpath (get-best-disk *configdat*))
    (if diskpath
	(let ((dat  (open-run-close create-work-area db run-id test-id test-path diskpath test-name itemdat)))
	(let ((dat  (create-work-area run-id run-info key-vals test-id test-path diskpath test-name itemdat)))
	  (set! work-area (car dat))
	  (set! toptest-work-area (cadr dat))
	  (debug:print-info 2 "Using work area " work-area))
	(begin
	  (set! work-area (conc test-path "/tmp_run"))
	  (create-directory work-area #t)
	  (debug:print 0 "WARNING: No disk work area specified - running in the test directory under tmp_run")))

Modified run-tests-queue-classic.scm from [f5636f213b] to [a0cb8ca9a1].

1
2
3
4
5
6


7
8
9
10
11
12
13
14
15









16
17
18
19
20
21
22
1
2
3
4
5
6
7
8









9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24






+
+
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+








;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... >
(define (runs:run-tests-queue-classic run-id runname test-records keyvallst flags test-patts)
    ;; At this point the list of parent tests is expanded 
    ;; NB// Should expand items here and then insert into the run queue.
  (debug:print 5 "test-records: " test-records ", keyvallst: " keyvallst " flags: " (hash-table->alist flags))
  (let ((run-info              (cdb:remote-run db:get-run-info #f run-id))
	(key-vals              (cdb:remote-run db:get-key-vals #f run-id))
  (let ((sorted-test-names (tests:sort-by-priority-and-waiton test-records))
	(test-registry     (make-hash-table))
	(registry-mutex    (make-mutex))
	(num-retries        0)
	(max-retries       (config-lookup *configdat* "setup" "maxretries"))
	(max-concurrent-jobs     (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
				    (if (and mcj (string->number mcj))
					(string->number mcj)
					1))))
	(sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
	(test-registry         (make-hash-table))
	(registry-mutex        (make-mutex))
	(num-retries           0)
	(max-retries           (config-lookup *configdat* "setup" "maxretries"))
	(max-concurrent-jobs   (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
				 (if (and mcj (string->number mcj))
				     (string->number mcj)
				     1))))
    (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100))
    (if (not (null? sorted-test-names))
	(let loop ((hed         (car sorted-test-names))
		   (tal         (cdr sorted-test-names))
		   (reruns      '()))
	  (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns))
	  ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns)
125
126
127
128
129
130
131
132

133
134
135
136
137
138
139
127
128
129
130
131
132
133

134
135
136
137
138
139
140
141







-
+







		  (thread-sleep! 1) ;; (+ 2 *global-delta*))
		  ;; could have done hed tal here but doing car/cdr of newtal to rotate tests
		  (loop (car newtal)(cdr newtal) reruns))
		 ((and have-resources
		       (or (null? prereqs-not-met)
			   (and (eq? testmode 'toplevel)
				(null? non-completed))))
		  (run:test run-id runname keyvallst test-record flags #f)
		  (run:test run-id run-info key-vals runname keyvallst test-record flags #f)
		  (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'running)
		  (runs:shrink-can-run-more-tests-count)  ;; DELAY TWEAKER (still needed?)
		  ;; (thread-sleep! *global-delta*)
		  (if (not (null? tal))
		      (loop (car tal)(cdr tal) reruns)))
		 (else ;; must be we have unmet prerequisites
		    (debug:print 4 "FAILS: " fails)

Modified run-tests-queue-new.scm from [76b3d3f77e] to [b08080545a].

1
2
3
4
5
6
7
8
9
10
11
12


13
14
15
16
17
18
19
20
21









22
23
24
25
26
27
28
1
2
3
4
5
6
7
8
9
10
11
12
13
14









15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30












+
+
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+







;; (use trace)
;; (trace
;;  runs:queue-next-hed
;;  runs:queue-next-tal
;;  runs:queue-next-reg
;;  )

;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... >
(define (runs:run-tests-queue-new run-id runname test-records keyvallst flags test-patts reglen)
    ;; At this point the list of parent tests is expanded 
    ;; NB// Should expand items here and then insert into the run queue.
  (debug:print 5 "test-records: " test-records ", keyvallst: " keyvallst " flags: " (hash-table->alist flags))
  (let ((run-info              (cdb:remote-run db:get-run-info #f run-id))
	(key-vals              (cdb:remote-run db:get-key-vals #f run-id))
  (let ((sorted-test-names (tests:sort-by-priority-and-waiton test-records))
	(test-registry    (make-hash-table))
	(registry-mutex   (make-mutex))
	(num-retries        0)
	(max-retries       (config-lookup *configdat* "setup" "maxretries"))
	(max-concurrent-jobs     (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
				    (if (and mcj (string->number mcj))
					(string->number mcj)
					1)))) ;; length of the register queue ahead
	(sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
	(test-registry         (make-hash-table))
	(registry-mutex        (make-mutex))
	(num-retries           0)
	(max-retries           (config-lookup *configdat* "setup" "maxretries"))
	(max-concurrent-jobs   (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
				 (if (and mcj (string->number mcj))
				     (string->number mcj)
				     1)))) ;; length of the register queue ahead
    (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100))
    (if (not (null? sorted-test-names))
	(let loop ((hed         (car sorted-test-names))
		   (tal         (cdr sorted-test-names))
		   (reg         '()) ;; registered, put these at the head of tal 
		   (reruns      '()))
	  (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns))
147
148
149
150
151
152
153
154

155
156
157
158
159
160
161
149
150
151
152
153
154
155

156
157
158
159
160
161
162
163







-
+







		  (thread-sleep! 1) ;; (+ 2 *global-delta*))
		  ;; could have done hed tal here but doing car/cdr of newtal to rotate tests
		  (loop (car newtal)(cdr newtal) reg reruns))
		 ((and have-resources
		       (or (null? prereqs-not-met)
			   (and (eq? testmode 'toplevel)
				(null? non-completed))))
		  (run:test run-id runname keyvallst test-record flags #f)
		  (run:test run-id run-info key-vals runname keyvallst test-record flags #f)
		  (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'running)
		  (runs:shrink-can-run-more-tests-count)  ;; DELAY TWEAKER (still needed?)
		  ;; (thread-sleep! *global-delta*)
		  (if (not (null? tal))
		      (loop (runs:queue-next-hed tal reg reglen regfull)
			    (runs:queue-next-tal tal reg reglen regfull)
			    (runs:queue-next-reg tal reg reglen regfull)

Modified runs.scm from [99c4f45e9d] to [5cd6719766].

349
350
351
352
353
354
355
356

357
358
359
360
361
362
363
349
350
351
352
353
354
355

356
357
358
359
360
361
362
363







-
+







	  '()
	  reg)))

(include "run-tests-queue-classic.scm")
(include "run-tests-queue-new.scm")

;; parent-test is there as a placeholder for when parent-tests can be run as a setup step
(define (run:test run-id runname keyvallst test-record flags parent-test)
(define (run:test run-id run-info key-vals runname keyvallst test-record flags parent-test)
  ;; All these vars might be referenced by the testconfig file reader
  (let* ((test-name    (tests:testqueue-get-testname   test-record))
	 (test-waitons (tests:testqueue-get-waitons    test-record))
	 (test-conf    (tests:testqueue-get-testconfig test-record))
	 (itemdat      (tests:testqueue-get-itemdat    test-record))
	 (test-path    (conc *toppath* "/tests/" test-name)) ;; could use tests:get-testconfig here ...
	 (force        (hash-table-ref/default flags "-force" #f))
453
454
455
456
457
458
459
460

461
462
463
464
465
466
467
453
454
455
456
457
458
459

460
461
462
463
464
465
466
467







-
+







	       (if (not parent-test)
		   (debug:print 1 "NOTE: Not starting test " new-test-name " as it is state \"" (test:get-state testdat) 
				"\" and status \"" (test:get-status testdat) "\", use -rerun \"" (test:get-status testdat)
                                "\" or -force to override"))
	       ;; NOTE: No longer be checking prerequisites here! Will never get here unless prereqs are
	       ;;       already met.
	       ;; This would be a great place to do the process-fork
	       (if (not (launch-test #f run-id runname test-conf keyvallst test-name test-path itemdat flags))
	       (if (not (launch-test run-id run-info key-vals runname test-conf keyvallst test-name test-path itemdat flags))
		   (begin
		     (print "ERROR: Failed to launch the test. Exiting as soon as possible")
		     (set! *globalexitstatus* 1) ;; 
		     (process-signal (current-process-id) signal/kill))))))
	((KILLED) 
	 (debug:print 1 "NOTE: " new-test-name " is already running or was explictly killed, use -force to launch it."))
	((LAUNCHED REMOTEHOSTSTART RUNNING)