Megatest

Diff
Login

Differences From Artifact [372d06f4be]:

To Artifact [312826fb4b]:


38
39
40
41
42
43
44







45
46
47
48
49
50
51
52
53
54
55
56
57
;; where the {VAR=first,second,third ...} is optional.

;; given an exit code and whether or not logpro was used calculate OK/BAD
;; return #t if we are ok, #f otherwise
(define (steprun-good? logpro exitcode)
  (or (eq? exitcode 0)
      (and logpro (eq? exitcode 2))))








(define (launch:execute encoded-cmd)
  (let* ((cmdinfo   (read (open-input-string (base64:base64-decode encoded-cmd)))))
    (setenv "MT_CMDINFO" encoded-cmd)
    (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed)
                        ;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1))
	(let* ((testpath  (assoc/default 'testpath  cmdinfo))
	       (top-path  (assoc/default 'toppath   cmdinfo))
	       (work-area (assoc/default 'work-area cmdinfo))
	       (test-name (assoc/default 'test-name cmdinfo))
	       (runscript (assoc/default 'runscript cmdinfo))
	       (ezsteps   (assoc/default 'ezsteps   cmdinfo))
	       (db-host   (assoc/default 'db-host   cmdinfo))







>
>
>
>
>
>
>





|







38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
;; where the {VAR=first,second,third ...} is optional.

;; given an exit code and whether or not logpro was used calculate OK/BAD
;; return #t if we are ok, #f otherwise
(define (steprun-good? logpro exitcode)
  (or (eq? exitcode 0)
      (and logpro (eq? exitcode 2))))

;; if handed a string, process it, else look for MT_CMDINFO
(define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f))
  (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO"))))
    (if enccdm
	(read (open-input-string (base64:base64-decode enccmd)))
	'())))

(define (launch:execute encoded-cmd)
  (let* ((cmdinfo   (read (open-input-string (base64:base64-decode encoded-cmd)))))
    (setenv "MT_CMDINFO" encoded-cmd)
    (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed)
	;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1))
	(let* ((testpath  (assoc/default 'testpath  cmdinfo))
	       (top-path  (assoc/default 'toppath   cmdinfo))
	       (work-area (assoc/default 'work-area cmdinfo))
	       (test-name (assoc/default 'test-name cmdinfo))
	       (runscript (assoc/default 'runscript cmdinfo))
	       (ezsteps   (assoc/default 'ezsteps   cmdinfo))
	       (db-host   (assoc/default 'db-host   cmdinfo))
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
	  (open-run-close set-run-config-vars #f run-id)
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
	  (open-run-close set-megatest-env-vars #f run-id)
	  (set-item-env-vars itemdat)
	  (save-environment-as-files "megatest")
	  (open-run-close test-set-meta-info #f test-id run-id test-name itemdat 0)
	  (open-run-close tests:test-set-status! #f test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")
	      (if (and fullrunscript (not (file-execute-access? fullrunscript)))
		  (system (conc "chmod ug+x " fullrunscript))))
	  ;; We are about to actually kick off the test
	  ;; so this is a good place to remove the records for 
	  ;; any previous runs
	  ;; (db:test-remove-steps db run-id testname itemdat)
	  
	  (let* ((m            (make-mutex))
		 (kill-job?    #f)
		 (exit-info    (vector #t #t #t))
		 (job-thread   #f)
		 (runit        (lambda ()
				 ;; (let-values
				 ;;  (((pid exit-status exit-code)
				 ;;    (run-n-wait fullrunscript)))
				 (open-run-close tests:test-set-status! #f test-id "RUNNING" "n/a" #f #f)
				 ;; if there is a runscript do it first
				 (if fullrunscript
				     (let ((pid (process-run fullrunscript)))
				       (let loop ((i 0))
					 (let-values
					  (((pid-val exit-status exit-code) (process-wait pid #t)))
					  (mutex-lock! m)







|

















|







112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
	  (open-run-close set-run-config-vars #f run-id)
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
	  (open-run-close set-megatest-env-vars #f run-id)
	  (set-item-env-vars itemdat)
	  (save-environment-as-files "megatest")
	  (open-run-close test-set-meta-info #f test-id run-id test-name itemdat 0)
	  (tests:test-set-status! test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")
	      (if (and fullrunscript (not (file-execute-access? fullrunscript)))
		  (system (conc "chmod ug+x " fullrunscript))))
	  ;; We are about to actually kick off the test
	  ;; so this is a good place to remove the records for 
	  ;; any previous runs
	  ;; (db:test-remove-steps db run-id testname itemdat)
	  
	  (let* ((m            (make-mutex))
		 (kill-job?    #f)
		 (exit-info    (vector #t #t #t))
		 (job-thread   #f)
		 (runit        (lambda ()
				 ;; (let-values
				 ;;  (((pid exit-status exit-code)
				 ;;    (run-n-wait fullrunscript)))
				 (tests:test-set-status! test-id "RUNNING" "n/a" #f #f)
				 ;; if there is a runscript do it first
				 (if fullrunscript
				     (let ((pid (process-run fullrunscript)))
				       (let loop ((i 0))
					 (let-values
					  (((pid-val exit-status exit-code) (process-wait pid #t)))
					  (mutex-lock! m)
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
								   (if (eq? pid-val 0)
								       (begin
									 (thread-sleep! 2)
									 (processloop (+ i 1))))
								   ))
                                                     (let ((exinfo (vector-ref exit-info 2))
                                                           (logfna (if logpro-used (conc stepname ".html") "")))
                                                        ;; testing if procedures called in a remote call cause problems (ans: no or so I suspect)
						        (open-run-close db:teststep-set-status! #f test-id stepname "end" exinfo itemdat #f logfna))
						     (if logpro-used
							 (open-run-close db:test-set-log! #f test-id (conc stepname ".html")))
						     ;; set the test final status
						     (let* ((this-step-status (cond
									       ((and (eq? (vector-ref exit-info 2) 2) logpro-used) 'warn)
									       ((eq? (vector-ref exit-info 2) 0)                   'pass)
									       (else 'fail)))







|
|







203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
								   (if (eq? pid-val 0)
								       (begin
									 (thread-sleep! 2)
									 (processloop (+ i 1))))
								   ))
                                                     (let ((exinfo (vector-ref exit-info 2))
                                                           (logfna (if logpro-used (conc stepname ".html") "")))
						       ;; testing if procedures called in a remote call cause problems (ans: no or so I suspect)
						       (open-run-close db:teststep-set-status! #f test-id stepname "end" exinfo itemdat #f logfna))
						     (if logpro-used
							 (open-run-close db:test-set-log! #f test-id (conc stepname ".html")))
						     ;; set the test final status
						     (let* ((this-step-status (cond
									       ((and (eq? (vector-ref exit-info 2) 2) logpro-used) 'warn)
									       ((eq? (vector-ref exit-info 2) 0)                   'pass)
									       (else 'fail)))
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
						       (debug:print 4 "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used 
								    " this-step-status: " this-step-status " overall-status: " overall-status 
								    " next-status: " next-status " rollup-status: " rollup-status)
						       (case next-status
							 ((warn)
							  (set! rollup-status 2)
							  ;; NB// test-set-status! does rdb calls under the hood
							  (open-run-close tests:test-set-status! #f test-id "RUNNING" "WARN" 
									    (if (eq? this-step-status 'warn) "Logpro warning found" #f)
									    #f))
							 ((pass)
							  (open-run-close tests:test-set-status! #f test-id "RUNNING" "PASS" #f #f))
							 (else ;; 'fail
							  (set! rollup-status 1) ;; force fail
							  (open-run-close tests:test-set-status! #f test-id "RUNNING" "FAIL" (conc "Failed at step " stepname) #f)
							  ))))
						   (if (and (steprun-good? logpro-used (vector-ref exit-info 2))
							    (not (null? tal)))
						       (loop (car tal) (cdr tal) stepname)))
					     (debug:print 4 "WARNING: a prior step failed, stopping at " ezstep))))))))
		 (monitorjob   (lambda ()
				 (let* ((start-seconds (current-seconds))
					(calc-minutes  (lambda ()
							 (inexact->exact 
							  (round 
							   (- 
							    (current-seconds) 







|
|
|

|


|




|







228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
						       (debug:print 4 "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used 
								    " this-step-status: " this-step-status " overall-status: " overall-status 
								    " next-status: " next-status " rollup-status: " rollup-status)
						       (case next-status
							 ((warn)
							  (set! rollup-status 2)
							  ;; NB// test-set-status! does rdb calls under the hood
							  (tests:test-set-status! test-id "RUNNING" "WARN" 
									  (if (eq? this-step-status 'warn) "Logpro warning found" #f)
									  #f))
							 ((pass)
							  (tests:test-set-status! test-id "RUNNING" "PASS" #f #f))
							 (else ;; 'fail
							  (set! rollup-status 1) ;; force fail
							  (tests:test-set-status! test-id "RUNNING" "FAIL" (conc "Failed at step " stepname) #f)
							  ))))
						   (if (and (steprun-good? logpro-used (vector-ref exit-info 2))
							    (not (null? tal)))
						       (loop (car tal) (cdr tal) stepname)))
						 (debug:print 4 "WARNING: a prior step failed, stopping at " ezstep))))))))
		 (monitorjob   (lambda ()
				 (let* ((start-seconds (current-seconds))
					(calc-minutes  (lambda ()
							 (inexact->exact 
							  (round 
							   (- 
							    (current-seconds) 
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
								(begin
								  (debug:print 0 "Killing " (cadr parts) "; kill -9  " p-id)
								  (system (conc "kill -9 " p-id))))))
							(car processes))
						       (system (conc "kill -9 " pid))))
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
						     (open-run-close tests:test-set-status! #f test-id "KILLED"  "FAIL"
								       (args:get-arg "-m") #f)
						     (sqlite3:finalize! tdb)
						     (exit 1))))
					     (set! kill-tries (+ 1 kill-tries))
					     (mutex-unlock! m)))
				       ;; (sqlite3:finalize! db)
				       (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses
				       (loop (calc-minutes)))))))
		 (th1          (make-thread monitorjob))
		 (th2          (make-thread runit)))
	    (set! job-thread th2)
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th2)
	    (mutex-lock! m)
	    (let* ((item-path (item-list->path itemdat))
		   (testinfo  (open-run-close db:get-test-info-by-id #f test-id))) ;; )) ;; run-id test-name item-path)))
	      (if (not (equal? (db:test-get-state testinfo) "COMPLETED"))
		  (begin
		    (debug:print 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status)
		    (open-run-close tests:test-set-status! #f test-id 
				      (if kill-job? "KILLED" "COMPLETED")
				      (cond
				       ((not (vector-ref exit-info 1)) "FAIL") ;; job failed to run
				       ((eq? rollup-status 0)
					;; if the current status is AUTO the defer to the calculated value (i.e. leave this AUTO)
					(if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS"))
				       ((eq? rollup-status 1) "FAIL")
				       ((eq? rollup-status 2)
					;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN)
					(if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN"))
				       (else "FAIL"))
				      (args:get-arg "-m") #f)))
	      ;; for automated creation of the rollup html file this is a good place...
	      (if (not (equal? item-path ""))
		  (open-run-close tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no
	      )
	    (mutex-unlock! m)
	    ;; (exec-results (cmd-run->list fullrunscript)) ;;  (list ">" (conc test-name "-run.log"))))
	    ;; (success      exec-results)) ;; (eq? (cadr exec-results) 0)))







|
|



















|
|
|
|
|
|
|
|
|
|
|
|
|







276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
								(begin
								  (debug:print 0 "Killing " (cadr parts) "; kill -9  " p-id)
								  (system (conc "kill -9 " p-id))))))
							(car processes))
						       (system (conc "kill -9 " pid))))
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
						     (tests:test-set-status! test-id "KILLED"  "FAIL"
								     (args:get-arg "-m") #f)
						     (sqlite3:finalize! tdb)
						     (exit 1))))
					     (set! kill-tries (+ 1 kill-tries))
					     (mutex-unlock! m)))
				       ;; (sqlite3:finalize! db)
				       (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses
				       (loop (calc-minutes)))))))
		 (th1          (make-thread monitorjob))
		 (th2          (make-thread runit)))
	    (set! job-thread th2)
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th2)
	    (mutex-lock! m)
	    (let* ((item-path (item-list->path itemdat))
		   (testinfo  (open-run-close db:get-test-info-by-id #f test-id))) ;; )) ;; run-id test-name item-path)))
	      (if (not (equal? (db:test-get-state testinfo) "COMPLETED"))
		  (begin
		    (debug:print 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status)
		    (tests:test-set-status! test-id 
				    (if kill-job? "KILLED" "COMPLETED")
				    (cond
				     ((not (vector-ref exit-info 1)) "FAIL") ;; job failed to run
				     ((eq? rollup-status 0)
				      ;; if the current status is AUTO the defer to the calculated value (i.e. leave this AUTO)
				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS"))
				     ((eq? rollup-status 1) "FAIL")
				     ((eq? rollup-status 2)
				      ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN)
				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN"))
				     (else "FAIL"))
				    (args:get-arg "-m") #f)))
	      ;; for automated creation of the rollup html file this is a good place...
	      (if (not (equal? item-path ""))
		  (open-run-close tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no
	      )
	    (mutex-unlock! m)
	    ;; (exec-results (cmd-run->list fullrunscript)) ;;  (list ">" (conc test-name "-run.log"))))
	    ;; (success      exec-results)) ;; (eq? (cadr exec-results) 0)))
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411

	 ;; nb// if itempath is not "" then it is prefixed with "/"
	 (toptest-path (conc disk-path "/" testtop-base))
	 (test-path    (conc disk-path "/" test-base))

	 ;; ensure this exists first as links to subtests must be created there
	 (linktree  (let ((rd (config-lookup *configdat* "setup" "linktree")))
		     (if rd rd (conc *toppath* "/runs"))))

	 (lnkbase  (conc linktree "/" target "/" runname))
	 (lnkpath  (conc lnkbase "/" testname))
	 (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path)))

    ;; Update the rundir path in the test record for all
    (db:test-set-rundir! db run-id testname item-path lnkpathf)







|







404
405
406
407
408
409
410
411
412
413
414
415
416
417
418

	 ;; nb// if itempath is not "" then it is prefixed with "/"
	 (toptest-path (conc disk-path "/" testtop-base))
	 (test-path    (conc disk-path "/" test-base))

	 ;; ensure this exists first as links to subtests must be created there
	 (linktree  (let ((rd (config-lookup *configdat* "setup" "linktree")))
		      (if rd rd (conc *toppath* "/runs"))))

	 (lnkbase  (conc linktree "/" target "/" runname))
	 (lnkpath  (conc lnkbase "/" testname))
	 (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path)))

    ;; Update the rundir path in the test record for all
    (db:test-set-rundir! db run-id testname item-path lnkpathf)
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test db run-id runname test-conf keyvallst test-name test-path itemdat params)
  (change-directory *toppath*)
  (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
   (list ;; (list "MT_TEST_RUN_DIR" work-area)
	 (list "MT_RUN_AREA_HOME" *toppath*)
	 (list "MT_TEST_NAME" test-name)
	 ;; (list "MT_ITEM_INFO" (conc itemdat)) 
	 (list "MT_RUNNAME"   runname)
	 ;; (list "MT_TARGET"    mt_target)
	 ))
  (let* ((useshell   (config-lookup *configdat* "jobtools"     "useshell"))
	 (launcher   (config-lookup *configdat* "jobtools"     "launcher"))
	 (runscript  (config-lookup test-conf   "setup"        "runscript"))
	 (ezsteps    (> (length (hash-table-ref/default test-conf "ezsteps" '())) 0)) ;; don't send all the steps, could be big
	 (diskspace  (config-lookup test-conf   "requirements" "diskspace"))
	 (memory     (config-lookup test-conf   "requirements" "memory"))
	 (hosts      (config-lookup *configdat* "jobtools"     "workhosts"))







|
|
|
|
|
|







496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test db run-id runname test-conf keyvallst test-name test-path itemdat params)
  (change-directory *toppath*)
  (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
   (list ;; (list "MT_TEST_RUN_DIR" work-area)
    (list "MT_RUN_AREA_HOME" *toppath*)
    (list "MT_TEST_NAME" test-name)
    ;; (list "MT_ITEM_INFO" (conc itemdat)) 
    (list "MT_RUNNAME"   runname)
    ;; (list "MT_TARGET"    mt_target)
    ))
  (let* ((useshell   (config-lookup *configdat* "jobtools"     "useshell"))
	 (launcher   (config-lookup *configdat* "jobtools"     "launcher"))
	 (runscript  (config-lookup test-conf   "setup"        "runscript"))
	 (ezsteps    (> (length (hash-table-ref/default test-conf "ezsteps" '())) 0)) ;; don't send all the steps, could be big
	 (diskspace  (config-lookup test-conf   "requirements" "diskspace"))
	 (memory     (config-lookup test-conf   "requirements" "memory"))
	 (hosts      (config-lookup *configdat* "jobtools"     "workhosts"))
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
							  (list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
							  (list 'runname   runname)
							  (list 'mt-bindir-path mt-bindir-path))))))) ;; (string-intersperse keyvallst " "))))
    ;; clean out step records from previous run if they exist
    (debug:print 4 "INFO: FIXMEEEEE!!!! This can be removed some day, perhaps move all test records to the test db?")
    (open-run-close db:delete-test-step-records db test-id)
    (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir
    (open-run-close tests:test-set-status! db test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
    (cond
     ((and launcher hosts) ;; must be using ssh hostname
      (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
     (launcher
      (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))







|







571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
							  (list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
							  (list 'runname   runname)
							  (list 'mt-bindir-path mt-bindir-path))))))) ;; (string-intersperse keyvallst " "))))
    ;; clean out step records from previous run if they exist
    (debug:print 4 "INFO: FIXMEEEEE!!!! This can be removed some day, perhaps move all test records to the test db?")
    (open-run-close db:delete-test-step-records db test-id)
    (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir
    (tests:test-set-status! test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
    (cond
     ((and launcher hosts) ;; must be using ssh hostname
      (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
     (launcher
      (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))