Megatest

Diff
Login

Differences From Artifact [32c9f375e7]:

To Artifact [dd3e168314]:


51
52
53
54
55
56
57
58

59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74









75
76
77
78
79
80
81
51
52
53
54
55
56
57

58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89







-
+















-
+
+
+
+
+
+
+
+
+







	'())))

(define (launch:execute encoded-cmd)
  (let* ((cmdinfo   (read (open-input-string (base64:base64-decode encoded-cmd)))))
    (setenv "MT_CMDINFO" encoded-cmd)
    (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed)
	;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1))
	(let* ((testpath  (assoc/default 'testpath  cmdinfo))
	(let* ((testpath  (assoc/default 'testpath  cmdinfo))  ;; How is testpath different from work-area ??
	       (top-path  (assoc/default 'toppath   cmdinfo))
	       (work-area (assoc/default 'work-area cmdinfo))
	       (test-name (assoc/default 'test-name cmdinfo))
	       (runscript (assoc/default 'runscript cmdinfo))
	       (ezsteps   (assoc/default 'ezsteps   cmdinfo))
	       (db-host   (assoc/default 'db-host   cmdinfo))
	       (run-id    (assoc/default 'run-id    cmdinfo))
	       (test-id   (assoc/default 'test-id   cmdinfo))
	       (target    (assoc/default 'target    cmdinfo))
	       (itemdat   (assoc/default 'itemdat   cmdinfo))
	       (env-ovrd  (assoc/default 'env-ovrd  cmdinfo))
	       (set-vars  (assoc/default 'set-vars  cmdinfo)) ;; pre-overrides from -setvar
	       (runname   (assoc/default 'runname   cmdinfo))
	       (megatest  (assoc/default 'megatest  cmdinfo))
	       (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo))
	       (fullrunscript (if runscript (conc testpath "/" runscript) #f))
	       (fullrunscript (if (not runscript)
                                  #f
                                  (if (substring-index "/" runscript)
                                      runscript ;; use unadultered if contains slashes
                                      (let ((fulln (conc testpath "/" runscript)))
	                                  (if (and (file-exists? fulln)
                                                   (file-execute-access? fulln))
                                              fulln
                                              runscript))))) ;; assume it is on the path
	       (rollup-status 0))
	  
	  (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name))
	  ;; apply pre-overrides before other variables. The pre-override vars must not
	  ;; clobbers things from the official sources such as megatest.config and runconfigs.config
	  (if (string? set-vars)
	      (let ((varpairs (string-split set-vars ",")))
102
103
104
105
106
107
108
109

110
111
112
113
114
115

116
117
118
119
120
121
122
110
111
112
113
114
115
116

117
118
119
120
121
122

123
124
125
126
127
128
129
130







-
+





-
+







		;; (sqlite3:finalize! db)
		;; (sqlite3:finalize! tdb)
		(exit 1)))
	  ;; Can setup as client for server mode now
	  (server:client-setup)

	  (change-directory *toppath*) 
	  (open-run-close set-megatest-env-vars #f run-id) ;; these may be needed by the launching process
	  (set-megatest-env-vars run-id) ;; these may be needed by the launching process
	  (change-directory work-area) 

	  (open-run-close set-run-config-vars #f run-id)
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
	  (open-run-close set-megatest-env-vars #f run-id)
	  (set-megatest-env-vars run-id)
	  (set-item-env-vars itemdat)
	  (save-environment-as-files "megatest")
	  (open-run-close test-set-meta-info #f test-id run-id test-name itemdat 0)
	  (tests:test-set-status! test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")
	      (if (and fullrunscript (not (file-execute-access? fullrunscript)))
186
187
188
189
190
191
192
193

194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211

212
213

214
215
216
217
218
219
220
194
195
196
197
198
199
200

201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218

219
220

221
222
223
224
225
226
227
228







-
+

















-
+

-
+







						   ;;       (set! script (conc script "source " prev-env))))
						   
						   ;; call the command using mt_ezstep
						   (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "-") " " stepcmd))

						   (debug:print 4 "script: " script)

						   (open-run-close db:teststep-set-status! #f test-id stepname "start" "-" #f #f)
						   (cdb:remote-run db:teststep-set-status! #f test-id stepname "start" "-" #f #f)
						   ;; now launch
						   (let ((pid (process-run script)))
						     (let processloop ((i 0))
						       (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
								   (mutex-lock! m)
								   (vector-set! exit-info 0 pid)
								   (vector-set! exit-info 1 exit-status)
								   (vector-set! exit-info 2 exit-code)
								   (mutex-unlock! m)
								   (if (eq? pid-val 0)
								       (begin
									 (thread-sleep! 2)
									 (processloop (+ i 1))))
								   ))
                                                     (let ((exinfo (vector-ref exit-info 2))
                                                           (logfna (if logpro-used (conc stepname ".html") "")))
						       ;; testing if procedures called in a remote call cause problems (ans: no or so I suspect)
						       (open-run-close db:teststep-set-status! #f test-id stepname "end" exinfo #f logfna))
						       (cdb:remote-run db:teststep-set-status! #f test-id stepname "end" exinfo #f logfna))
						     (if logpro-used
							 (open-run-close db:test-set-log! #f test-id (conc stepname ".html")))
							 (cdb:test-set-log! *runremote*  test-id (conc stepname ".html")))
						     ;; set the test final status
						     (let* ((this-step-status (cond
									       ((and (eq? (vector-ref exit-info 2) 2) logpro-used) 'warn)
									       ((eq? (vector-ref exit-info 2) 0)                   'pass)
									       (else 'fail)))
							    (overall-status   (cond
									       ((eq? rollup-status 2) 'warn)
252
253
254
255
256
257
258
259

260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280

281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300


301
302
303
304
305
306
307
260
261
262
263
264
265
266

267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287

288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307

308
309
310
311
312
313
314
315
316







-
+




















-
+



















-
+
+







							  (round 
							   (- 
							    (current-seconds) 
							    start-seconds)))))
					(kill-tries 0))
				   (let loop ((minutes   (calc-minutes)))
				     (begin
				       (set! kill-job? (open-run-close test-get-kill-request #f test-id)) ;; run-id test-name itemdat))
				       (set! kill-job? (test-get-kill-request test-id)) ;; run-id test-name itemdat))
				       (open-run-close test-set-meta-info #f test-id run-id test-name itemdat minutes)
				       (if kill-job? 
					   (begin
					     (mutex-lock! m)
					     (let* ((pid (vector-ref exit-info 0)))
					       (if (number? pid)
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job (attempt # " kill-tries ")")
						     (let ((processes (cmd-run->list (conc "pgrep -l -P " pid))))
						       (for-each 
							(lambda (p)
							  (let* ((parts  (string-split p))
								 (p-id   (if (> (length parts) 0)
									     (string->number (car parts))
									     #f)))
							    (if p-id
								(begin
								  (debug:print 0 "Killing " (cadr parts) "; kill -9  " p-id)
								  (system (conc "kill -9 " p-id))))))
							(car processes))
						       (system (conc "kill -9 " pid))))
						       (system (conc "kill -9 -" pid))))
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
						     (tests:test-set-status! test-id "KILLED"  "FAIL"
								     (args:get-arg "-m") #f)
						     (sqlite3:finalize! tdb)
						     (exit 1))))
					     (set! kill-tries (+ 1 kill-tries))
					     (mutex-unlock! m)))
				       ;; (sqlite3:finalize! db)
				       (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses
				       (loop (calc-minutes)))))))
		 (th1          (make-thread monitorjob))
		 (th2          (make-thread runit)))
	    (set! job-thread th2)
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th2)
	    (mutex-lock! m)
	    (let* ((item-path (item-list->path itemdat))
		   (testinfo  (open-run-close db:get-test-info-by-id #f test-id))) ;; )) ;; run-id test-name item-path)))
		   (testinfo  (cdb:get-test-info-by-id *runremote* test-id))) ;; )) ;; run-id test-name item-path)))
	      ;; Am I completed?
	      (if (not (equal? (db:test-get-state testinfo) "COMPLETED"))
		  (begin
		    (debug:print 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status)
		    (tests:test-set-status! test-id 
				    (if kill-job? "KILLED" "COMPLETED")
				    (cond
				     ((not (vector-ref exit-info 1)) "FAIL") ;; job failed to run
383
384
385
386
387
388
389
390

391
392
393
394
395
396

397
398
399
400
401
402
403
392
393
394
395
396
397
398

399
400
401
402
403
404

405
406
407
408
409
410
411
412







-
+





-
+







;;  <linkdir> - <target> - <testname> [ - <itempath> ]
;; 
;; All log file links should be stored relative to the top of link path
;;  
;; <target> - <testname> [ - <itempath> ] 
;;
(define (create-work-area db run-id test-id test-src-path disk-path testname itemdat)
  (let* ((run-info (db:get-run-info db run-id))
  (let* ((run-info (cdb:remote-run db:get-run-info #f run-id))
	 (item-path (item-list->path itemdat))
	 (runname  (db:get-value-by-header (db:get-row run-info)
					   (db:get-header run-info)
					   "runname"))
	 ;; convert back to db: from rdb: - this is always run at server end
	 (key-vals (db:get-key-vals db run-id))
	 (key-vals (cdb:remote-run db:get-key-vals #f run-id))
	 (target   (string-intersperse key-vals "/"))

	 (not-iterated  (equal? "" item-path))

	 ;; all tests are found at <rundir>/test-base or <linkdir>/test-base
	 (testtop-base (conc target "/" runname "/" testname))
	 (test-base    (conc testtop-base (if not-iterated "" "/") item-path))
411
412
413
414
415
416
417
418

419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437

438
439

440

441
442
443
444

445
446
447
448
449
450
451
452
453
454

455
456
457
458
459
460
461
420
421
422
423
424
425
426

427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445

446
447
448
449

450
451
452
453

454
455
456
457
458
459
460
461
462
463

464
465
466
467
468
469
470
471







-
+


















-
+


+
-
+



-
+









-
+







		      (if rd rd (conc *toppath* "/runs"))))

	 (lnkbase  (conc linktree "/" target "/" runname))
	 (lnkpath  (conc lnkbase "/" testname))
	 (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path)))

    ;; Update the rundir path in the test record for all
    (db:test-set-rundir-by-test-id! db test-id lnkpathf)
    (cdb:test-set-rundir-by-test-id *runremote* test-id lnkpathf)

    (debug:print 2 "INFO:\n       lnkbase=" lnkbase "\n       lnkpath=" lnkpath "\n  toptest-path=" toptest-path "\n     test-path=" test-path)
    (if (not (file-exists? linktree))
	(begin
	  (debug:print 0 "WARNING: linktree did not exist! Creating it now at " linktree)
	  (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree))))
    ;; create the directory for the tests dir links, this is needed no matter what...
    (if (not (directory-exists? lnkbase))
	(create-directory lnkbase #t))
    
    ;; update the toptest record with its location rundir, cache the path
    ;; This wass highly inefficient, one db write for every subtest, potentially
    ;; thousands of unnecessary updates, cache the fact it was set and don't set it 
    ;; again. 

    ;; NB - This is not working right - some top tests are not getting the path set!!!

    (if (not (hash-table-ref/default *toptest-paths* testname #f))
	(let* ((testinfo       (db:get-test-info-by-id db test-id)) ;;  run-id testname item-path))
	(let* ((testinfo       (cdb:get-test-info-by-id *runremote* test-id)) ;;  run-id testname item-path))
	       (curr-test-path (if testinfo (db:test-get-rundir testinfo) #f)))
	  (hash-table-set! *toptest-paths* testname curr-test-path)
	  ;; NB// Was this for the test or for the parent in an iterated test?
	  (db:test-set-rundir! db run-id testname "" lnkpath) ;; toptest-path)
	  (cdb:test-set-rundir! *runremote* run-id testname "" lnkpath) ;; toptest-path)
	  (if (or (not curr-test-path)
		  (not (directory-exists? toptest-path)))
	      (begin
		(debug:print 2 "INFO: Creating " toptest-path " and link " lnkpath)
		(debug:print-info 2 "Creating " toptest-path " and link " lnkpath)
		(create-directory toptest-path #t)
		(hash-table-set! *toptest-paths* testname toptest-path)))))

    ;; Now create the link from the test path to the link tree, however
    ;; if the test is iterated it is necessary to create the parent path
    ;; to the iteration. use pathname-directory to trim the path by one
    ;; level
    (if (not not-iterated) ;; i.e. iterated
	(let ((iterated-parent  (pathname-directory (conc lnkpath "/" item-path))))
	  (debug:print 2 "INFO: Creating iterated parent " iterated-parent)
	  (debug:print-info 2 "Creating iterated parent " iterated-parent)
	  (create-directory iterated-parent #t)))

    (if (symbolic-link? lnkpath) (delete-file lnkpath))
    (if (not (or (file-exists? lnkpath)
		 (symbolic-link? lnkpath)))
	(create-symbolic-link toptest-path lnkpath))
    
482
483
484
485
486
487
488
489

490
491
492
493
494
495
496
492
493
494
495
496
497
498

499
500
501
502
503
504
505
506







-
+







    ;;   (if (and (file-exists? testlink)
    ;;            (or (regular-file? testlink)
    ;;     	   (symbolic-link? testlink)))
    ;;       (system (conc "rm -f " testlink)))
    ;;   (system  (conc "ln -sf " test-path " " testlink)))
    (if (directory? test-path)
	(begin
	  (let* ((cmd    (conc "rsync -av" (if (> *verbosity* 1) "" "q") " " test-src-path "/ " test-path "/"))
	  (let* ((cmd    (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/"))
		 (status (system cmd)))
	    (if (not (eq? status 0))
		(debug:print 2 "ERROR: problem with running \"" cmd "\"")))
	  (list lnkpathf lnkpath ))
	(list #f #f))))

;; 1. look though disks list for disk with most space
535
536
537
538
539
540
541
542
543


544
545
546
547
548
549
550
551
552
553
554
555
556
557
558

559
560
561
562
563
564
565
545
546
547
548
549
550
551


552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567

568
569
570
571
572
573
574
575







-
-
+
+














-
+







	 (work-area  #f)
	 (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
	 (diskpath   #f)
	 (cmdparms   #f)
	 (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	 (mt-bindir-path #f)
	 (item-path (item-list->path itemdat))
	 (test-id    (open-run-close db:get-test-id db run-id test-name item-path))
	 (testinfo   (open-run-close db:get-test-info-by-id db test-id))
	 (test-id    (cdb:remote-run db:get-test-id #f run-id test-name item-path))
	 (testinfo   (cdb:get-test-info-by-id *runremote* test-id))
	 (mt_target  (string-intersperse (map cadr keyvallst) "/"))
	 (debug-param (append (if (args:get-arg "-debug")  (list "-debug" (args:get-arg "-debug")) '())
			      (if (args:get-arg "-logging")(list "-logging") '()))))
    (if hosts (set! hosts (string-split hosts)))
    ;; set the megatest to be called on the remote host
    (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
    (set! mt-bindir-path (pathname-directory remote-megatest))
    (if launcher (set! launcher (string-split launcher)))
    ;; set up the run work area for this test
    (set! diskpath (get-best-disk *configdat*))
    (if diskpath
	(let ((dat  (open-run-close create-work-area db run-id test-id test-path diskpath test-name itemdat)))
	  (set! work-area (car dat))
	  (set! toptest-work-area (cadr dat))
	  (debug:print 2 "INFO: Using work area " work-area))
	  (debug:print-info 2 "Using work area " work-area))
	(begin
	  (set! work-area (conc test-path "/tmp_run"))
	  (create-directory work-area #t)
	  (debug:print 0 "WARNING: No disk work area specified - running in the test directory under tmp_run")))
    (set! cmdparms (base64:base64-encode (with-output-to-string
					   (lambda () ;; (list 'hosts     hosts)
					     (write (list (list 'testpath  test-path)
574
575
576
577
578
579
580
581
582


583
584
585
586
587
588
589
584
585
586
587
588
589
590


591
592
593
594
595
596
597
598
599







-
-
+
+







							  (list 'ezsteps   ezsteps) 
							  (list 'target    mt_target)
							  (list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
							  (list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
							  (list 'runname   runname)
							  (list 'mt-bindir-path mt-bindir-path))))))) ;; (string-intersperse keyvallst " "))))
    ;; clean out step records from previous run if they exist
    (debug:print 4 "INFO: FIXMEEEEE!!!! This can be removed some day, perhaps move all test records to the test db?")
    (open-run-close db:delete-test-step-records db test-id)
    ;; (debug:print-info 4 "FIXMEEEEE!!!! This can be removed some day, perhaps move all test records to the test db?")
    ;; (open-run-close db:delete-test-step-records db test-id)
    (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir
    (tests:test-set-status! test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
    (cond
     ((and launcher hosts) ;; must be using ssh hostname
      (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
     (launcher