Megatest

Diff
Login

Differences From Artifact [93df8109f6]:

To Artifact [16af3583d8]:


284
285
286
287
288
289
290


291
292
293
294
295
296
297
298
299


300
301
302
303
304
305
306

307




308
309
310

311
312
313
314


315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
		 0))

;; The watchdog is to keep an eye on things like db sync etc.
;;
(define *watchdog*
  (make-thread 
   (lambda ()


     (let loop ()
       (thread-sleep! 5) ;; five second resolution is only a minor burden and should be tolerable 

       ;; sync for filesystem local db writes
       ;;
       (let ((start-time (current-seconds)))
	 (mutex-lock! *db-multi-sync-mutex*)
	 (for-each 
	  (lambda (run-id)


	    (let ((last-write (hash-table-ref/default *db-local-sync* run-id 0)))
	      (if ;; (and 
	       (> (- start-time last-write) 5) ;; every five seconds
	       ;;      (common:db-access-allowed?))
	       (begin
		 (db:multi-db-sync (list run-id) 'new2old)
		 (if (common:low-noise-print 30 "sync new to old")

		     (debug:print-info 0 "Sync of newdb to olddb for run-id " run-id " completed in " (- (current-seconds) start-time) " seconds"))




		 (hash-table-delete! *db-local-sync* run-id)))))
	  (hash-table-keys *db-local-sync*))
	 (mutex-unlock! *db-multi-sync-mutex*))

       
       ;; keep going unless time to exit
       ;;
       (if (not *time-to-exit*)


	   (loop))))
   "Watchdog thread"))

(thread-start! *watchdog*)

(define (std-exit-procedure)
  (rmt:print-db-stats)
  (let ((run-ids (hash-table-keys *db-local-sync*)))
    (if (not (null? run-ids))
	(db:multi-db-sync run-ids 'new2old)))
  (if *dbstruct-db* (db:close-all *dbstruct-db*))
  (if *megatest-db* (begin
		      (sqlite3:interrupt! *megatest-db*)
		      (sqlite3:finalize! *megatest-db* #t)))
  (if *task-db*     (let ((db (vector-ref *task-db* 0)))
		      (sqlite3:interrupt! db)
		      (sqlite3:finalize! db #t))))

(define (std-signal-handler signum)
  (signal-mask! signum)
  (debug:print 0 "ERROR: Received signal " signum " exiting promptly")
  (std-exit-procedure)
  (exit))

(set-signal-handler! signal/int std-signal-handler)
(set-signal-handler! signal/term std-signal-handler)

(if (args:get-arg "-log")
    (let ((oup (open-output-file (args:get-arg "-log"))))
      (debug:print-info 0 "Sending log output to " (args:get-arg "-log"))
      (current-error-port oup)
      (current-output-port oup)))

(if (or (args:get-arg "-h")







>
>
|
<
<
|
|
|
|
|
|
>
>
|
<
|
<
|
|
|
>
|
>
>
>
>
|
<
|
>
|
|
|
|
>
>
|
|



<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







284
285
286
287
288
289
290
291
292
293


294
295
296
297
298
299
300
301
302

303

304
305
306
307
308
309
310
311
312
313

314
315
316
317
318
319
320
321
322
323
324
325
326






















327
328
329
330
331
332
333
		 0))

;; The watchdog is to keep an eye on things like db sync etc.
;;
(define *watchdog*
  (make-thread 
   (lambda ()
     (thread-sleep! 0.05) ;; delay for startup
     (let ((legacy-sync (configf:lookup *configdat* "setup" "megatest-db")))
       (let loop ()


	 ;; sync for filesystem local db writes
	 ;;
	 (let ((start-time      (current-seconds))
	       (servers-started (make-hash-table)))
	   (for-each 
	    (lambda (run-id)
	      (mutex-lock! *db-multi-sync-mutex*)
	      (if (and legacy-sync 
		       (hash-table-ref/default *db-local-sync* run-id #f))

		  ;; (if (> (- start-time last-write) 5) ;; every five seconds

		  (begin ;; let ((sync-time (- (current-seconds) start-time)))
		    (db:multi-db-sync (list run-id) 'new2old)
		    (if (common:low-noise-print 30 "sync new to old")
			(let ((sync-time (- (current-seconds) start-time)))
			  (debug:print-info 0 "Sync of newdb to olddb for run-id " run-id " completed in " sync-time " seconds")))
		    ;; (if (> sync-time 10) ;; took more than ten seconds, start a server for this run
		    ;;     (begin
		    ;;       (debug:print-info 0 "Sync is taking a long time, start up a server to assist for run " run-id)
		    ;;       (server:kind-run run-id)))))
		    (hash-table-delete! *db-local-sync* run-id)))

	      (mutex-unlock! *db-multi-sync-mutex*))
	    (hash-table-keys *db-local-sync*)))

	 ;; keep going unless time to exit
	 ;;
	 (if (not *time-to-exit*)
	     (begin
	       (thread-sleep! 1) ;; wait one second before syncing again
	       (loop)))))
     "Watchdog thread")))

(thread-start! *watchdog*)























(if (args:get-arg "-log")
    (let ((oup (open-output-file (args:get-arg "-log"))))
      (debug:print-info 0 "Sending log output to " (args:get-arg "-log"))
      (current-error-port oup)
      (current-output-port oup)))

(if (or (args:get-arg "-h")
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417

(if (args:get-arg "-itempatt")
    (let ((newval (conc (args:get-arg "-testpatt") "/" (args:get-arg "-itempatt"))))
      (debug:print 0 "WARNING: -itempatt has been deprecated, please use -testpatt testpatt/itempatt method, new testpatt is "newval)
      (hash-table-set! args:arg-hash "-testpatt" newval)
      (hash-table-delete! args:arg-hash "-itempatt")))

;; (on-exit std-exit-procedure)

;;======================================================================
;; Misc general calls
;;======================================================================

(if (args:get-arg "-env2file")
    (begin







|







388
389
390
391
392
393
394
395
396
397
398
399
400
401
402

(if (args:get-arg "-itempatt")
    (let ((newval (conc (args:get-arg "-testpatt") "/" (args:get-arg "-itempatt"))))
      (debug:print 0 "WARNING: -itempatt has been deprecated, please use -testpatt testpatt/itempatt method, new testpatt is "newval)
      (hash-table-set! args:arg-hash "-testpatt" newval)
      (hash-table-delete! args:arg-hash "-itempatt")))

(on-exit std-exit-procedure)

;;======================================================================
;; Misc general calls
;;======================================================================

(if (args:get-arg "-env2file")
    (begin
538
539
540
541
542
543
544

545
546
547
548
549
550
551
552
;; MAY STILL NEED THIS
;;		       (set! *megatest-db* (make-dbr:dbstruct path: *toppath* local: #t))))))))))

(if (or (args:get-arg "-list-servers")
	(args:get-arg "-stop-server"))
    (let ((tl (launch:setup-for-run)))
      (if tl 

	  (let* ((servers (open-run-close tasks:get-all-servers tasks:open-db))
		 (fmtstr  "~5a~12a~8a~20a~24a~10a~10a~10a~10a\n")
		 (servers-to-kill '())
		 (killinfo   (args:get-arg "-stop-server"))
		 (khost-port (if killinfo (if (substring-index ":" killinfo)(string-split ":") #f) #f))
		 (sid        (if killinfo (if (substring-index ":" killinfo) #f (string->number killinfo)) #f)))
	    (format #t fmtstr "Id" "MTver" "Pid" "Host" "Interface:OutPort" "InPort" "LastBeat" "State" "Transport")
	    (format #t fmtstr "==" "=====" "===" "====" "=================" "======" "========" "=====" "=========")







>
|







523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
;; MAY STILL NEED THIS
;;		       (set! *megatest-db* (make-dbr:dbstruct path: *toppath* local: #t))))))))))

(if (or (args:get-arg "-list-servers")
	(args:get-arg "-stop-server"))
    (let ((tl (launch:setup-for-run)))
      (if tl 
	  (let* ((tdbdat  (tasks:open-db))
		 (servers (tasks:get-all-servers (db:delay-if-busy tdbdat)))
		 (fmtstr  "~5a~12a~8a~20a~24a~10a~10a~10a~10a\n")
		 (servers-to-kill '())
		 (killinfo   (args:get-arg "-stop-server"))
		 (khost-port (if killinfo (if (substring-index ":" killinfo)(string-split ":") #f) #f))
		 (sid        (if killinfo (if (substring-index ":" killinfo) #f (string->number killinfo)) #f)))
	    (format #t fmtstr "Id" "MTver" "Pid" "Host" "Interface:OutPort" "InPort" "LastBeat" "State" "Transport")
	    (format #t fmtstr "==" "=====" "===" "====" "=================" "======" "========" "=====" "=========")
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
		      (killed     #f)
		      (status     (< last-update 20)))
		 ;;   (zmq-sockets (if status (server:client-connect hostname port) #f)))
		 ;; no need to login as status of #t indicates we are connecting to correct 
		 ;; server
		 (if (equal? state "dead")
		     (if (> last-update (* 25 60 60)) ;; keep records around for slighly over a day.
			 (open-run-close tasks:server-deregister tasks:open-db hostname pullport: pullport pid: pid action: 'delete))
		     (if (> last-update 20)        ;; Mark as dead if not updated in last 20 seconds
			 (open-run-close tasks:server-deregister tasks:open-db hostname pullport: pullport pid: pid)))
		 (format #t fmtstr id mt-ver pid hostname (conc interface ":" pullport) pubport last-update
			 (if status "alive" "dead") transport)
		 (if (or (equal? id sid)
			 (equal? sid 0)) ;; kill all/any
		     (begin
		       (debug:print-info 0 "Attempting to stop server with pid " pid)
		       (tasks:kill-server status hostname pullport pid transport)))))







|

|







553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
		      (killed     #f)
		      (status     (< last-update 20)))
		 ;;   (zmq-sockets (if status (server:client-connect hostname port) #f)))
		 ;; no need to login as status of #t indicates we are connecting to correct 
		 ;; server
		 (if (equal? state "dead")
		     (if (> last-update (* 25 60 60)) ;; keep records around for slighly over a day.
			 (tasks:server-deregister (db:delay-if-busy tdbdat) hostname pullport: pullport pid: pid action: 'delete))
		     (if (> last-update 20)        ;; Mark as dead if not updated in last 20 seconds
			 (tasks:server-deregister (db:delay-if-busy tdbdat) hostname pullport: pullport pid: pid)))
		 (format #t fmtstr id mt-ver pid hostname (conc interface ":" pullport) pubport last-update
			 (if status "alive" "dead") transport)
		 (if (or (equal? id sid)
			 (equal? sid 0)) ;; kill all/any
		     (begin
		       (debug:print-info 0 "Attempting to stop server with pid " pid)
		       (tasks:kill-server status hostname pullport pid transport)))))
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
					   (tdb:step-get-stepname step)
					   (tdb:step-get-state step)
					   (tdb:step-get-status step)
					   (tdb:step-get-event_time step)))
				 steps)))))
		      tests)))))
	     runs)
	  (db:close-all dbstruct)
	  (set! *didsomething* #t))))

;;======================================================================
;; full run
;;======================================================================

;; get lock in db for full run for this directory







|







794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
					   (tdb:step-get-stepname step)
					   (tdb:step-get-state step)
					   (tdb:step-get-status step)
					   (tdb:step-get-event_time step)))
				 steps)))))
		      tests)))))
	     runs)
	  ;; (db:close-all dbstruct)
	  (set! *didsomething* #t))))

;;======================================================================
;; full run
;;======================================================================

;; get lock in db for full run for this directory
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
	  (begin
	    (debug:print 0 "Failed to setup, exiting") 
	    (exit 1)))
      ;; keep this one local
      ;; (open-run-close db:clean-up #f)
      (db:multi-db-sync 
       #f ;; do all run-ids
       'new2old
       'killservers
       'dejunk
       'adj-testids
       'old2new
       'new2old
       )
      (set! *didsomething* #t)))

(if (args:get-arg "-mark-incompletes")
    (begin
      (if (not (launch:setup-for-run))
	  (begin
	    (debug:print 0 "Failed to setup, exiting") 
	    (exit 1)))
      (open-run-close db:find-and-mark-incomplete #f)
      (set! *didsomething* #t)))

;;======================================================================
;; Update the tests meta data from the testconfig files
;;======================================================================







|


|
|








|







1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
	  (begin
	    (debug:print 0 "Failed to setup, exiting") 
	    (exit 1)))
      ;; keep this one local
      ;; (open-run-close db:clean-up #f)
      (db:multi-db-sync 
       #f ;; do all run-ids
       ;; 'new2old
       'killservers
       'dejunk
       ;; 'adj-testids
       ;; 'old2new
       'new2old
       )
      (set! *didsomething* #t)))

(if (args:get-arg "-mark-incompletes")
    (begin
      (if (not (launch:setup-for-run))
	  (begin
	    (debug:print 0 "Failed to setup, exiting") b
	    (exit 1)))
      (open-run-close db:find-and-mark-incomplete #f)
      (set! *didsomething* #t)))

;;======================================================================
;; Update the tests meta data from the testconfig files
;;======================================================================
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
    (begin
      (db:multi-db-sync 
       #f ;; do all run-ids
       'killservers
       'dejunk
       'adj-testids
       'old2new
       'new2old
       )
      (set! *didsomething* #t)))

(if (args:get-arg "-sync-to-megatest.db")
    (begin
      (db:multi-db-sync 
       #f ;; do all run-ids







|







1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
    (begin
      (db:multi-db-sync 
       #f ;; do all run-ids
       'killservers
       'dejunk
       'adj-testids
       'old2new
       ;; 'new2old
       )
      (set! *didsomething* #t)))

(if (args:get-arg "-sync-to-megatest.db")
    (begin
      (db:multi-db-sync 
       #f ;; do all run-ids