Megatest

Diff
Login

Differences From Artifact [903bab69fd]:

To Artifact [e808e7265f]:


22
23
24
25
26
27
28



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
;; Tasks db
;;======================================================================

;; wait up to aprox n seconds for a journal to go away
;;
(define (tasks:wait-on-journal path n #!key (remove #f))
  (let ((fullpath (conc path "-journal")))



    (let loop ((journal-exists (file-exists? fullpath))
	       (count          n)) ;; wait ten times ...
      (if journal-exists
	  (if (> count 0)
	      (begin
		(thread-sleep! 1)
		(loop (file-exists? fullpath)
		      (- count 1)))
	      (begin
		(if remove (system (conc "rm -rf " path)))
		#f))
	  #t))))

(define (tasks:get-task-db-path)
  (if *task-db*
      (vector-ref *task-db* 1)
      (let* ((linktree     (configf:lookup *configdat* "setup" "linktree"))
	     (dbpath       (conc linktree "/.db/monitor.db")))
	dbpath)))







>
>
>
|
|
|
|
|
|
|
|
|
|
|
|







22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
;; Tasks db
;;======================================================================

;; wait up to aprox n seconds for a journal to go away
;;
(define (tasks:wait-on-journal path n #!key (remove #f))
  (let ((fullpath (conc path "-journal")))
    (handle-exceptions
     exn
     #t ;; if stuff goes wrong just allow it to move on
     (let loop ((journal-exists (file-exists? fullpath))
		(count          n)) ;; wait ten times ...
       (if journal-exists
	   (if (> count 0)
	       (begin
		 (thread-sleep! 1)
		 (loop (file-exists? fullpath)
		       (- count 1)))
	       (begin
		 (if remove (system (conc "rm -rf " path)))
		 #f))
	   #t)))))

(define (tasks:get-task-db-path)
  (if *task-db*
      (vector-ref *task-db* 1)
      (let* ((linktree     (configf:lookup *configdat* "setup" "linktree"))
	     (dbpath       (conc linktree "/.db/monitor.db")))
	dbpath)))
319
320
321
322
323
324
325

326


327













328
329
330
331
332
333
334
    res))

;; no elegance here ...
;;
(define (tasks:kill-server hostname pid)
  (debug:print-info 0 "Attempting to kill server process " pid " on host " hostname)
  (setenv "TARGETHOST" hostname)

  (system (conc "nbfake kill " pid)))


 













;;   (if status ;; #t means alive
;;       (begin
;; 	(if (equal? hostname (get-host-name))
;; 	    (handle-exceptions
;; 	     exn
;; 	     (debug:print-info 0 "server may or may not be dead, check for megatest -server running as pid " pid "\n"
;; 			       "  EXCEPTION: " ((condition-property-accessor 'exn 'message) exn))







>
|
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>







322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
    res))

;; no elegance here ...
;;
(define (tasks:kill-server hostname pid)
  (debug:print-info 0 "Attempting to kill server process " pid " on host " hostname)
  (setenv "TARGETHOST" hostname)
  (setenv "TARGETHOST_LOGF" "server-kills.log")
  (system (conc "nbfake kill " pid))
  (unsetenv "TARGETHOST_LOGF")
  (unsetenv "TARGETHOST"))
 
;; look up a server by run-id and send it a kill, also delete the record for that server
;;
(define (tasks:kill-server-run-id run-id)
  (let* ((tdb  (tasks:open-db))
	 (sdat (tasks:get-server mdb run-id)))
    (if sdat
	(let ((hostname (vector-ref sdat 6))
	      (pid      (vector-ref sdat 5)))
	  (debug:print-info 0 "Killing server for run-id " run-id " on host " hostname " with pid " pid)
	  (tasks:kill-server hostname pid)
	  (tasks:server-delete-record mdb server-id tag) )
	(debug:print-info 0 "No server found for run-id " run-id ", nothing to kill"))))
    
;;   (if status ;; #t means alive
;;       (begin
;; 	(if (equal? hostname (get-host-name))
;; 	    (handle-exceptions
;; 	     exn
;; 	     (debug:print-info 0 "server may or may not be dead, check for megatest -server running as pid " pid "\n"
;; 			       "  EXCEPTION: " ((condition-property-accessor 'exn 'message) exn))
626
627
628
629
630
631
632
633

634
635
636
637

638
639
640
641
642



643
644



645
646
647
648
649


650
651
652
653
654
655
656
	(hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string
    (if (null? records)
	(debug:print 0 "No run launching processes found for " target " / " run-name)
	(debug:print 0 "Found " (length records) " run(s) to kill."))
    (for-each 
     (lambda (record)
       (let* ((param-key (list-ref record 8))
	      (match-dat (string-search hostpid-rx param-key))

	      (hostname  (cadr match-dat))
	      (pid       (caddr match-dat)))
	 (debug:print 0 "Sending SIGINT to process " pid " on host " hostname)
	 (if (equal? (get-host-name) hostname)

	     (begin
	       (process-signal (string->number pid) signal/int)
	       (thread-sleep! 5)
	       (handle-exceptions
		exn



		#t
		(process-signal (string->number pid) signal/kill)))



	     ;;  (call-with-environment-variables
	     (let ((old-targethost (getenv "TARGETHOST")))
	       (set-environment-variable "TARGETHOST" hostname)
	       (system (conc "nbfake " kill " " pid))
	       (if old-targethost (set-environment-variable "TARGETHOST" old-targethost))))))


     records)))


;;======================================================================
;; The routines to process tasks
;;======================================================================








|
>
|
|
|
|
>
|
<
<
|
|
>
>
>
|
|
>
>
>
|
|
|
|
|
>
>







645
646
647
648
649
650
651
652
653
654
655
656
657
658
659


660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
	(hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string
    (if (null? records)
	(debug:print 0 "No run launching processes found for " target " / " run-name)
	(debug:print 0 "Found " (length records) " run(s) to kill."))
    (for-each 
     (lambda (record)
       (let* ((param-key (list-ref record 8))
	      (match-dat (string-search hostpid-rx param-key)))
	 (if match-dat
	     (let ((hostname  (cadr match-dat))
		   (pid       (string->number (caddr match-dat))))
	       (debug:print 0 "Sending SIGINT to process " pid " on host " hostname)
	       (if (equal? (get-host-name) hostname)
		   (if (process:alive? pid)
		       (begin


			 (handle-exceptions
			  exn
			  (begin
			    (debug:print 0 "Kill of process " pid " on host " hostname " failed.")
			    (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
			    #t)
			  (process-signal pid signal/int)
			  (thread-sleep! 5)
			  (if (process:alive? pid)
			      (process-signal pid signal/kill)))))
		   ;;  (call-with-environment-variables
		   (let ((old-targethost (getenv "TARGETHOST")))
		     (setenv "TARGETHOST" hostname)
		     (system (conc "nbfake kill " pid))
		     (if old-targethost (setenv "TARGETHOST" old-targethost))
		     (unsetenv "TARGETHOST"))))
	     (debug:print 0 "ERROR: no record or improper record for " target "/" run-name " in tasks_queue in monitor.db"))))
     records)))


;;======================================================================
;; The routines to process tasks
;;======================================================================