Megatest

Diff
Login

Differences From Artifact [590db5beef]:

To Artifact [af8f785c6d]:


50
51
52
53
54
55
56
57
58


59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

74
75
76
77
78
79
80
81
82
83
84

85
86
87
88

89

90
91
92
93
94
95


96
97
98
99
100
101
102
103
104
105
106

107
108
109
110
111
112
113
50
51
52
53
54
55
56


57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72

73
74
75
76
77
78
79
80
81
82
83

84
85
86
87
88
89

90
91
92
93
94


95
96
97
98
99
100
101
102
103
104
105
106

107
108
109
110
111
112
113
114







-
-
+
+














-
+










-
+




+
-
+




-
-
+
+










-
+







		       (loop (file-exists? fullpath)
			     (- count 1)))
		     (begin
		       (if remove (system (conc "rm -rf " fullpath)))
		       #f)))
	       #t))))))

(define (tasks:get-task-db-path)
  (let* ((linktree     (configf:lookup *configdat* "setup" "linktree"))
(define (tasks:get-task-db-path area-dat)
  (let* ((linktree     (configf:lookup (megatest:area-configdat area-dat) "setup" "linktree"))
	 (dbpath       (conc linktree "/.db")))
    dbpath))



;; If file exists AND
;;    file readable
;;         ==> open it
;; If file exists AND
;;    file NOT readable
;;         ==> open in-mem version
;; If file NOT exists
;;    ==> open in-mem version
;;
(define (tasks:open-db #!key (numretries 4))
(define (tasks:open-db area-dat #!key (numretries 4))
  (if *task-db*
      *task-db*
      (handle-exceptions
       exn
       (if (> numretries 0)
	   (begin
	     (print-call-chain (current-error-port))
	     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 0 " exn=" (condition->list exn))
	     (thread-sleep! 1)
	     (tasks:open-db numretries (- numretries 1)))
	     (tasks:open-db area-dat numretries: (- numretries 1)))
	   (begin
	     (print-call-chain (current-error-port))
	     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 0 " exn=" (condition->list exn))))
       (let* ((toppath      (megatest:area-path area-dat))
       (let* ((dbpath       (tasks:get-task-db-path))
	      (dbpath       (tasks:get-task-db-path area-dat))
	      (dbfile       (conc dbpath "/monitor.db"))
	      (avail        (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away
	      (exists       (file-exists? dbpath))
	      (write-access (file-write-access? dbpath))
	      (mdb          (cond ;; what the hek is *toppath* doing here?
			     ((and (string? *toppath*)(file-write-access? *toppath*))
	      (mdb          (cond ;; what the hek is toppath doing here?
			     ((and (string? toppath)(file-write-access? toppath))
			      (sqlite3:open-database dbfile))
			     ((file-read-access? dbpath)    (sqlite3:open-database dbfile))
			     (else (sqlite3:open-database ":memory:")))) ;; (never-give-up-open-db dbpath))
	      (handler      (make-busy-timeout 36000)))
	 (if (and exists
		  (not write-access))
	     (set! *db-write-access* write-access)) ;; only unset so other db's also can use this control
	 (sqlite3:set-busy-handler! mdb handler)
	 (db:set-sync mdb) ;; (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;"))
	 ;;  (if (or (and (not exists)
	 ;; 	      (file-write-access? *toppath*))
	 ;; 	      (file-write-access? toppath))
	 ;; 	 (not (file-read-access? dbpath)))
	 ;;      (begin
	 ;; 
	 ;; TASKS QUEUE MOVED TO main.db
	 ;;
	 ;; (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY,
         ;;                        action TEXT DEFAULT '',
147
148
149
150
151
152
153
154

155
156
157
158
159
160
161
148
149
150
151
152
153
154

155
156
157
158
159
160
161
162







-
+







                                  hostname TEXT,
                                  cmdline TEXT,
                                  login_time TIMESTAMP,
                                  logout_time TIMESTAMP DEFAULT -1,
                                CONSTRAINT clients_constraint UNIQUE (pid,hostname));")
	       
	       ;))
	 (set! *task-db* (cons mdb dbpath))
	 (set! *task-db* (cons mdb dbpath)) ;; Move into area-dat !!!!
	 *task-db*))))

;;======================================================================
;; Server and client management
;;======================================================================

;; make-vector-record tasks hostinfo id interface port pubport transport pid hostname
251
252
253
254
255
256
257
258

259
260
261
262
263
264
265
266
267
268
269
270
252
253
254
255
256
257
258

259





260
261
262
263
264
265
266







-
+
-
-
-
-
-







	(highnum        64000)
	(used-ports     '())
	(get-rand-port  (lambda ()
			  (+ lownum (random (- highnum lownum)))))
	(port-param     (if (and (args:get-arg "-port")
				 (string->number (args:get-arg "-port")))
			    (string->number (args:get-arg "-port"))
			    #f))
			    #f)))
	;; (config-port    (if (and (config-lookup  *configdat* "server" "port")
	;; 			 (string->number (config-lookup  *configdat* "server" "port")))
	;; 		    (string->number (config-lookup  *configdat* "server" "port"))
	;; 		    #f))
	)
    (sqlite3:for-each-row
     (lambda (port)
       (set! used-ports (cons port used-ports)))
     mdb
     "SELECT port FROM servers;")
    (cond
     ((and port-param res)   (if (> res port-param) res port-param))
358
359
360
361
362
363
364
365
366






367
368
369

370
371
372
373
374
375
376
354
355
356
357
358
359
360


361
362
363
364
365
366
367
368

369
370
371
372
373
374
375
376







-
-
+
+
+
+
+
+


-
+







    (sqlite3:for-each-row
     (lambda (id)
       (set! res id))
     mdb ;; NEEDS dbprep ADDED
     "SELECT id FROM servers WHERE run_id=? AND state = 'running';" run-id)
    res))

(define (tasks:need-server run-id)
  (configf:lookup *configdat* "server" "required"))
(define (tasks:need-server run-id area-dat)
  (let ((req (configf:lookup (megatest:area-configdat area-dat) "server" "required")))
    (if (and req
	     (equal? req "yes"))
	#t
	#f)))

;; 	(maxqry (cdr (rmt:get-max-query-average run-id)))
;; 	(threshold   (string->number (or (configf:lookup *configdat* "server" "server-query-threshold") "10"))))
;; 	(threshold   (string->number (or (configf:lookup configdat "server" "server-query-threshold") "10"))))
;;     (cond
;;      (forced 
;;       (if (common:low-noise-print 60 run-id "server required is set")
;; 	  (debug:print-info 0 "Server required is set, starting server for run-id " run-id "."))
;;       #t)
;;      ((> maxqry threshold)
;;       (if (common:low-noise-print 60 run-id "Max query time execeeded")
426
427
428
429
430
431
432
433
434


435
436
437
438
439
440
441
426
427
428
429
430
431
432


433
434
435
436
437
438
439
440
441







-
-
+
+







  (setenv "TARGETHOST_LOGF" "server-kills.log")
  (system (conc "nbfake kill " pid))
  (unsetenv "TARGETHOST_LOGF")
  (unsetenv "TARGETHOST"))
 
;; look up a server by run-id and send it a kill, also delete the record for that server
;;
(define (tasks:kill-server-run-id run-id #!key (tag "default"))
  (let* ((tdbdat  (tasks:open-db))
(define (tasks:kill-server-run-id run-id area-dat #!key (tag "default"))
  (let* ((tdbdat  (tasks:open-db area-dat))
	 (sdat    (tasks:get-server (db:delay-if-busy tdbdat) run-id)))
    (if sdat
	(let ((hostname (vector-ref sdat 6))
	      (pid      (vector-ref sdat 5))
	      (server-id (vector-ref sdat 0)))
	  (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "killed")
	  (debug:print-info 0 "Killing server " server-id " for run-id " run-id " on host " hostname " with pid " pid)
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
509
510
511
512
513
514
515





















516
517
518
519
520
521
522







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-







     (lambda (count)
       (set! res count))
     mdb
     "SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;"
     (car (user-information (current-user-id))))
    res))

;; 
(define (tasks:start-monitor db mdb)
  (if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more
      (debug:print-info 1 "Not starting monitor, already have more than two running")
      (let* ((megatestdb     (conc *toppath* "/megatest.db"))
	     (monitordbf     (conc (db:dbfile-path #f) "/monitor.db"))
	     (last-db-update 0)) ;; (file-modification-time megatestdb)))
	(task:register-monitor mdb)
	(let loop ((count      0)
		   (next-touch 0)) ;; next-touch is the time where we need to update last_update
	  ;; if the db has been modified we'd best look at the task queue
	  (let ((modtime (file-modification-time megatestdbpath )))
	    (if (> modtime last-db-update)
		(tasks:process-queue db mdb last-db-update megatestdb next-touch))
	    ;; WARNING: Possible race conditon here!!
	    ;; should this update be immediately after the task-get-action call above?
	    (if (> (current-seconds) next-touch)
		(begin
		  (tasks:monitors-update mdb)
		  (loop (+ count 1)(+ (current-seconds) 240)))
		(loop (+ count 1) next-touch)))))))
      
;;======================================================================
;; T A S K S   Q U E U E
;;
;;   NOTE:: These operate on task_queue which is in main.db
;;
;;======================================================================
737
738
739
740
741
742
743
744
745


746
747
748
749
750
751
752
716
717
718
719
720
721
722


723
724
725
726
727
728
729
730
731







-
-
+
+







     target run-name state-patt action-patt test-patt)
    res)) ;; )

;; kill any runner processes (i.e. processes handling -runtests) that match target/runname
;; 
;; do a remote call to get the task queue info but do the killing as self here.
;;
(define (tasks:kill-runner target run-name)
  (let ((records    (rmt:tasks-find-task-queue-records target run-name "%" "running" "run-tests"))
(define (tasks:kill-runner target run-name area-dat)
  (let ((records    (rmt:tasks-find-task-queue-records target run-name "%" "running" "run-tests" area-dat))
	(hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string
    (if (null? records)
	(debug:print 0 "No run launching processes found for " target " / " run-name)
	(debug:print 0 "Found " (length records) " run(s) to kill."))
    (for-each 
     (lambda (record)
       (let* ((param-key (list-ref record 8))