Megatest

Diff
Login

Differences From Artifact [903bab69fd]:

To Artifact [e808e7265f]:


22
23
24
25
26
27
28



29
30
31
32
33
34
35
36
37
38
39
40












41
42
43
44
45
46
47
22
23
24
25
26
27
28
29
30
31












32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50







+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+







;; Tasks db
;;======================================================================

;; wait up to aprox n seconds for a journal to go away
;;
(define (tasks:wait-on-journal path n #!key (remove #f))
  (let ((fullpath (conc path "-journal")))
    (handle-exceptions
     exn
     #t ;; if stuff goes wrong just allow it to move on
    (let loop ((journal-exists (file-exists? fullpath))
	       (count          n)) ;; wait ten times ...
      (if journal-exists
	  (if (> count 0)
	      (begin
		(thread-sleep! 1)
		(loop (file-exists? fullpath)
		      (- count 1)))
	      (begin
		(if remove (system (conc "rm -rf " path)))
		#f))
	  #t))))
     (let loop ((journal-exists (file-exists? fullpath))
		(count          n)) ;; wait ten times ...
       (if journal-exists
	   (if (> count 0)
	       (begin
		 (thread-sleep! 1)
		 (loop (file-exists? fullpath)
		       (- count 1)))
	       (begin
		 (if remove (system (conc "rm -rf " path)))
		 #f))
	   #t)))))

(define (tasks:get-task-db-path)
  (if *task-db*
      (vector-ref *task-db* 1)
      (let* ((linktree     (configf:lookup *configdat* "setup" "linktree"))
	     (dbpath       (conc linktree "/.db/monitor.db")))
	dbpath)))
319
320
321
322
323
324
325

326



327













328
329
330
331
332
333
334
322
323
324
325
326
327
328
329

330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353







+
-
+
+
+

+
+
+
+
+
+
+
+
+
+
+
+
+







    res))

;; no elegance here ...
;;
(define (tasks:kill-server hostname pid)
  (debug:print-info 0 "Attempting to kill server process " pid " on host " hostname)
  (setenv "TARGETHOST" hostname)
  (setenv "TARGETHOST_LOGF" "server-kills.log")
  (system (conc "nbfake kill " pid)))
  (system (conc "nbfake kill " pid))
  (unsetenv "TARGETHOST_LOGF")
  (unsetenv "TARGETHOST"))
 
;; look up a server by run-id and send it a kill, also delete the record for that server
;;
(define (tasks:kill-server-run-id run-id)
  (let* ((tdb  (tasks:open-db))
	 (sdat (tasks:get-server mdb run-id)))
    (if sdat
	(let ((hostname (vector-ref sdat 6))
	      (pid      (vector-ref sdat 5)))
	  (debug:print-info 0 "Killing server for run-id " run-id " on host " hostname " with pid " pid)
	  (tasks:kill-server hostname pid)
	  (tasks:server-delete-record mdb server-id tag) )
	(debug:print-info 0 "No server found for run-id " run-id ", nothing to kill"))))
    
;;   (if status ;; #t means alive
;;       (begin
;; 	(if (equal? hostname (get-host-name))
;; 	    (handle-exceptions
;; 	     exn
;; 	     (debug:print-info 0 "server may or may not be dead, check for megatest -server running as pid " pid "\n"
;; 			       "  EXCEPTION: " ((condition-property-accessor 'exn 'message) exn))
626
627
628
629
630
631
632
633
634
635
636
637
638








639
640
641
642
643
644
645
646
647
648
649

















650
651
652
653
654
655
656
645
646
647
648
649
650
651






652
653
654
655
656
657
658
659











660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683







-
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







	(hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string
    (if (null? records)
	(debug:print 0 "No run launching processes found for " target " / " run-name)
	(debug:print 0 "Found " (length records) " run(s) to kill."))
    (for-each 
     (lambda (record)
       (let* ((param-key (list-ref record 8))
	      (match-dat (string-search hostpid-rx param-key))
	      (hostname  (cadr match-dat))
	      (pid       (caddr match-dat)))
	 (debug:print 0 "Sending SIGINT to process " pid " on host " hostname)
	 (if (equal? (get-host-name) hostname)
	     (begin
	      (match-dat (string-search hostpid-rx param-key)))
	 (if match-dat
	     (let ((hostname  (cadr match-dat))
		   (pid       (string->number (caddr match-dat))))
	       (debug:print 0 "Sending SIGINT to process " pid " on host " hostname)
	       (if (equal? (get-host-name) hostname)
		   (if (process:alive? pid)
		       (begin
	       (process-signal (string->number pid) signal/int)
	       (thread-sleep! 5)
	       (handle-exceptions
		exn
		#t
		(process-signal (string->number pid) signal/kill)))
	     ;;  (call-with-environment-variables
	     (let ((old-targethost (getenv "TARGETHOST")))
	       (set-environment-variable "TARGETHOST" hostname)
	       (system (conc "nbfake " kill " " pid))
	       (if old-targethost (set-environment-variable "TARGETHOST" old-targethost))))))
			 (handle-exceptions
			  exn
			  (begin
			    (debug:print 0 "Kill of process " pid " on host " hostname " failed.")
			    (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
			    #t)
			  (process-signal pid signal/int)
			  (thread-sleep! 5)
			  (if (process:alive? pid)
			      (process-signal pid signal/kill)))))
		   ;;  (call-with-environment-variables
		   (let ((old-targethost (getenv "TARGETHOST")))
		     (setenv "TARGETHOST" hostname)
		     (system (conc "nbfake kill " pid))
		     (if old-targethost (setenv "TARGETHOST" old-targethost))
		     (unsetenv "TARGETHOST"))))
	     (debug:print 0 "ERROR: no record or improper record for " target "/" run-name " in tasks_queue in monitor.db"))))
     records)))


;;======================================================================
;; The routines to process tasks
;;======================================================================