Megatest

Changes On Branch 3f19516a56ce37d1
Login

Changes In Branch mrw_wip_fixes_ww37 Excluding Merge-Ins

This is equivalent to a diff from bf22a32f91 to 3f19516a56

2016-09-07
17:57
added check for DISPLAY settings prior to launching dashboard check-in: f158e57b14 user: bjbarcla tags: v1.61
16:44
needed cleanup; one-run-updater wip bugfix check-in: 062b578b1b user: bjbarcla tags: v1.61_onerun
16:29
db updated check-in: 9a47c20038 user: ritikaag tags: db
15:56
Updated makefile to have less scary output when it can't find postgres and mysql check-in: 9a36d96247 user: jmoon18 tags: v1.61
2016-09-06
17:10
Create new branch named "refactor-db-v1.61-shoeb" check-in: ce9c0bd882 user: srehman tags: refactor-db-v1.61-shoeb
17:08
WIP: read-only area support. Better MT_ variable support. Not working properly yet. Closed-Leaf check-in: 3f19516a56 user: mrwellan tags: mrw_wip_fixes_ww37
2016-09-02
17:46
added colors on the graph check-in: bf22a32f91 user: ritikaag tags: v1.61
2016-08-31
16:48
Partial fixes for dashboard issues. WIP check-in: ee53267d1a user: mrwellan tags: v1.61, v1.6104

Modified common.scm from [8d88deb1b5] to [a3b3963328].

525
526
527
528
529
530
531
532

533
534
535
536
537
538
539
525
526
527
528
529
530
531

532
533
534
535
536
537
538
539







-
+







(define (common:args-get-runname)
  (let ((res (or (args:get-arg "-runname")
		 (args:get-arg ":runname")
		 (getenv "MT_RUNNAME"))))
    ;; (if res (set-environment-variable "MT_RUNNAME" res)) ;; not sure if this is a good idea. side effect and all ...
    res))

(define (common:args-get-target #!key (split #f))
(define (common:args-get-target #!key (split #f)(run-dat #f))
  (let* ((keys    (if (hash-table? *configdat*) (keys:config-get-fields *configdat*) '()))
	 (numkeys (length keys))
	 (target  (or (args:get-arg "-reqtarg")
		      (args:get-arg "-target")
		      (getenv "MT_TARGET")))
	 (tlist   (if target (string-split target "/" #t) '()))
	 (valid   (if target

Modified db.scm from [e4bdea0e8f] to [ce1b0574f9].

10
11
12
13
14
15
16
17

18
19
20
21
22
23
24
10
11
12
13
14
15
16

17
18
19
20
21
22
23
24







-
+







;;======================================================================

;;======================================================================
;; Database access
;;======================================================================

(require-extension (srfi 18) extras tcp)
(use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64 format dot-locking z3)
(use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64 format dot-locking z3 typed-records)
(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:))

(declare (unit db))
(declare (uses common))
(declare (uses keys))
(declare (uses ods))
73
74
75
76
77
78
79


80
81
82


83
84
85
86
87


88
89
90
91
92
93
94
73
74
75
76
77
78
79
80
81
82


83
84
85
86
87


88
89
90
91
92
93
94
95
96







+
+

-
-
+
+



-
-
+
+







	(let ((dbdat (if (or (not run-id)
			     (eq? run-id 0))
			 (db:open-main dbstruct)
			 (db:open-rundb dbstruct run-id)
			 )))
	  dbdat))))

(defstruct db:dbdat db path writeable)

(define (db:dbdat-get-db dbdat)
  (if (pair? dbdat)
      (car dbdat)
  (if (db:dbdat? dbdat)
      (db:dbdat-db dbdat)
      dbdat))

(define (db:dbdat-get-path dbdat)
  (if (pair? dbdat)
      (cdr dbdat)
  (if (db:dbdat? dbdat)
      (db:dbdat-path dbdat)
      #f))

;; mod-read:
;;     'mod   modified data
;;     'read  read data
;;
(define (db:done-with dbstruct run-id mod-read)
167
168
169
170
171
172
173
174

175
176
177
178
179
180
181
182
183
184
185
186
187


188
189
190
191
192
193
194
195
196
197
198

















199
200
201
202
203
204
205
169
170
171
172
173
174
175

176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191











192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215







-
+













+
+
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







	       
(define (db:set-sync db)
  (let ((syncprag (configf:lookup *configdat* "setup" "sychronous")))
    (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 1) ";"))))

;; open an sql database inside a file lock
;;
;; returns: db existed-prior-to-opening
;; returns: db:dbdat record <db existed-prior-to-opening
;;
(define (db:lock-create-open fname initproc)
  ;; (if (file-exists? fname)
  ;;     (let ((db (sqlite3:open-database fname)))
  ;;       (sqlite3:set-busy-handler! db (make-busy-timeout 136000))
  ;;       (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = 0;")
  ;;       db)
  (let* ((parent-dir   (pathname-directory fname))
	 (dir-writable (file-write-access? parent-dir))
	 (file-exists  (file-exists? fname))
	 (file-write   (if file-exists
			   (file-write-access? fname)
			   dir-writable )))
    (make-db:dbdat
     db:
    (if file-write ;; dir-writable
	(let (;; (lock    (obtain-dot-lock fname 1 5 10))
	      (db      (sqlite3:open-database fname)))
	  (sqlite3:set-busy-handler! db (make-busy-timeout 136000))
	  (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = 0;")
	  (if (not file-exists)(initproc db))
	  ;; (release-dot-lock fname)
	  db)
	(begin
	  (debug:print 2 *default-log-port* "WARNING: opening db in non-writable dir " fname)
	  (sqlite3:open-database fname))))) ;; )
     (if file-write ;; dir-writable or db writeable
	 (let (;; (lock    (obtain-dot-lock fname 1 5 10))
	       (db      (sqlite3:open-database fname)))
	   (sqlite3:set-busy-handler! db (make-busy-timeout 136000))
	   (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = 0;")
	   (if (and initproc (not file-exists))
	       (initproc db))
	   ;; (release-dot-lock fname)
	   db)
	 (begin
	   (debug:print 2 *default-log-port* "WARNING: opening db in non-writable dir " fname)
	   (sqlite3:open-database 
	    (if file-exists
		fname
		":memory:"))))
     writeable: file-write
     )))

;; This routine creates the db. It is only called if the db is not already opened
;; 
(define (db:open-rundb dbstruct run-id #!key (attemptnum 0)(do-not-open #f)) ;;  (conc *toppath* "/megatest.db") (car *configinfo*)))
  (let* ((local  (dbr:dbstruct-get-local dbstruct))
	 (rdb    (if local
		     (dbr:dbstruct-get-localdb dbstruct run-id)
238
239
240
241
242
243
244
245

246
247
248
249
250
251
252
248
249
250
251
252
253
254

255
256
257
258
259
260
261
262







-
+







				     (set! *megatest-db* db)
				     db)))
		 (write-access (file-write-access? dbpath))
		 ;; (handler      (make-busy-timeout 136000))
		 )
	    (if (and dbexists (not write-access))
		(set! *db-write-access* #f)) ;; only unset so other db's also can use this control
	    (dbr:dbstruct-set-rundb!  dbstruct (cons db dbpath))
	    (dbr:dbstruct-set-rundb!  dbstruct db) ;; (cons db dbpath))
	    (dbr:dbstruct-set-inuse!  dbstruct #t)
	    (dbr:dbstruct-set-olddb!  dbstruct olddb)
	    ;; (dbr:dbstruct-set-run-id! dbstruct run-id)
	    (mutex-unlock! *rundb-mutex*)
	    (if local
		(begin
		  (dbr:dbstruct-set-localdb! dbstruct run-id db) ;; (dbr:dbstruct-set-inmem! dbstruct db) ;; direct access ...
294
295
296
297
298
299
300
301



302
303

304
305
306
307
308
309
310
304
305
306
307
308
309
310

311
312
313
314

315
316
317
318
319
320
321
322







-
+
+
+

-
+







  (let* ((dbdir    (db:dbfile-path #f)) ;; (conc (configf:lookup *configdat* "setup" "linktree") "/.db"))
	 (dbstruct (make-dbr:dbstruct path: dbdir local: local)))
    dbstruct))

;; Open the classic megatest.db file in toppath
;;
(define (db:open-megatest-db)
  (let* ((dbpath       (conc *toppath* "/megatest.db"))
  (let* ((dbfile       (conc *toppath* "/megatest.db"))
	 (dbpath       *toppath*)
	 (dir-writable (file-write-access? dbpath))
	 (dbexists     (file-exists? dbpath))
	 (db           (db:lock-create-open dbpath
	 (db           (db:lock-create-open dbfile
					    (lambda (db)
					      (db:initialize-main-db db)
					      (db:initialize-run-id-db db))))
	 (write-access (file-write-access? dbpath)))
    (if (and dbexists (not write-access))
	(set! *db-write-access* #f))
    (cons db dbpath)))

Modified launch.scm from [3e54d60917] to [bb5e1b31be].

750
751
752
753
754
755
756

757
758
759
760
761
762
763
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764







+







	      (set! *toppath*    (or toppath (cadr first-pass))) ;; use the gathered data unless already have it
	      (set! toppath      *toppath*)
	      (if (not *toppath*)
		  (begin
		    (debug:print-error 0 *default-log-port* "you are not in a megatest area!")
		    (exit 1)))
	      (setenv "MT_RUN_AREA_HOME" *toppath*)
	      (setenv "MT_TESTSUITENAME" (common:get-testsuite-name))
	      ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it
	      (let* ((keys         (rmt:get-keys))
		     (key-vals     (keys:target->keyval keys target))
		     (linktree     (or (getenv "MT_LINKTREE")
				       (if *configdat* (configf:lookup *configdat* "setup" "linktree") #f)))
		     (second-pass  (find-and-read-config
				    mtconfig
795
796
797
798
799
800
801
802

803
804


805
806
807
808
809
810
811
812

813
814
815
816
817
818
819

820

821
822
823
824
825

826


827
828
829
830
831
832
833
796
797
798
799
800
801
802

803
804

805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823

824
825
826
827
828
829
830

831
832
833
834
835
836
837
838
839







-
+

-
+
+








+







+
-
+





+
-
+
+







	      (set! *configstatus* 'partial))
	    (begin
	      (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.")
	      (exit 2))))))
    ;; additional house keeping
    (let* ((linktree (or (getenv "MT_LINKTREE")
			 (if *configdat* (configf:lookup *configdat* "setup" "linktree") #f))))
      (if linktree
      (if (string? linktree)
	  (begin
	    (if (not (file-exists? linktree))
	    (if (and (not (file-exists? linktree))
		     (file-write-access? (pathname-directory linktree)))
		(begin
		  (handle-exceptions
		   exn
		   (begin
		     (debug:print-error 0 *default-log-port* "Something went wrong when trying to create linktree dir at " linktree)
		     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
		     (exit 1))
		   (create-directory linktree #t))))
	    ;; now create the lt link in MT_RUN_AREA_HOME
	    (handle-exceptions
	     exn
	     (begin
	       (debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*)
	       (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)))
	     (let ((tlink (conc *toppath* "/lt")))
	       (if (not (file-exists? tlink))
		   (if (file-write-access? *toppath*)
		   (create-symbolic-link linktree tlink)))))
		       (create-symbolic-link linktree tlink))))))
	  (begin
	    (debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config")
	    )))
    (if (and *toppath*
	     (directory-exists? *toppath*))
	(begin
	(setenv "MT_RUN_AREA_HOME" *toppath*)
	  (setenv "MT_RUN_AREA_HOME" *toppath*)
	  (setenv "MT_TESTSUITENAME" (common:get-testsuite-name)))
	(begin
	  (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")))
    *toppath*))

(define launch:setup launch:setup-new)

(define (get-best-disk confdat testconfig)

Modified mt.scm from [7c996fd4dc] to [e82a868788].

127
128
129
130
131
132
133
134



135
136
137
138
139
140
141
142
143
144
145
146
147


148
149
150
151
152
153
154
127
128
129
130
131
132
133

134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158







-
+
+
+













+
+







			  (cons testn res)))))))))

;;======================================================================
;;  T R I G G E R S
;;======================================================================

(define (mt:process-triggers run-id test-id newstate newstatus)
  (let* ((test-dat      (rmt:get-test-info-by-id run-id test-id)))
  (let* ((test-dat      (rmt:get-test-info-by-id run-id test-id))
	 (target        (rmt:get-target run-id))
	 (run-name      (rmt:get-run-name-from-id run-id)))
    (if test-dat
	(let* ((test-rundir   ;; (rmt:sdb-qry 'getstr ;; (filedb:get-path *fdb*
		(db:test-get-rundir test-dat)) ;; ) ;; )
	       (test-name     (db:test-get-testname test-dat))
	       (tconfig       #f)
	       (state         (if newstate  newstate  (db:test-get-state  test-dat)))
	       (status        (if newstatus newstatus (db:test-get-status test-dat))))
	  (if (and test-name
		   test-rundir   ;; #f means no dir set yet
		   (file-exists? test-rundir)
		   (directory? test-rundir))
	      (call-with-environment-variables
	       (list (cons "MT_TEST_NAME" test-name)
		     (cons "MT_RUNNAME"   run-name)
		     (cons "MT_TARGET"    target)
		     (cons "MT_TEST_RUN_DIR" test-rundir)
		     (cons "MT_ITEMPATH"     (db:test-get-item-path test-dat)))
	       (lambda ()
		 (push-directory test-rundir)
		 (set! tconfig (mt:lazy-read-test-config test-name))
		 (for-each (lambda (trigger)
			     (let ((cmd  (configf:lookup tconfig "triggers" trigger))

Modified runs.scm from [de4f2b1394] to [cab56a9d5e].

84
85
86
87
88
89
90

91
92
93
94
95
96
97
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98







+







    (alist->env-vars (hash-table-ref/default *configdat* "env-override" '()))
    ;; Lets use this as an opportunity to put MT_RUNNAME in the environment
    (let ((runname  (if inrunname inrunname (rmt:get-run-name-from-id run-id))))
      (if runname
	  (setenv "MT_RUNNAME" runname)
	  (debug:print-error 0 *default-log-port* "no value for runname for id " run-id)))
    (setenv "MT_RUN_AREA_HOME" *toppath*)
    (setenv "MT_TESTSUITENAME" (common:get-testsuite-name))
    ;; if a testname and itempath are available set the remaining appropriate variables
    (if testname (setenv "MT_TEST_NAME" testname))
    (if itempath (setenv "MT_ITEMPATH"  itempath))
    (if (and testname link-tree)
	(setenv "MT_TEST_RUN_DIR" (conc (getenv "MT_LINKTREE")  "/"
					(getenv "MT_TARGET")    "/"
					(getenv "MT_RUNNAME")   "/"

Modified tasks.scm from [7aab5e9e48] to [3106aed1fe].

74
75
76
77
78
79
80


81
82
83
84
85
86
87
88
89
90
91
92
93
94
















95
96
97

98
99
100
101






102
103

104
105
106
107
108
109


110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128

129
130
131
132
133
134
135

136
137
138
139
140
141
142
143
144
145
146
147
148
149

150
151
152
153
154
155
156
157
158
159
160


161
162
163
164
165
166
167
74
75
76
77
78
79
80
81
82














83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98

99

100
101



102
103
104
105
106
107


108
109
110
111
112


113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132

133
134
135
136
137
138
139

140
141
142
143
144
145
146
147
148
149
150
151
152
153

154
155
156
157
158
159
160
161
162
163


164
165
166
167
168
169
170
171
172







+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-

-
+

-
-
-
+
+
+
+
+
+
-
-
+




-
-
+
+


















-
+






-
+













-
+









-
-
+
+







;;         ==> open in-mem version
;; If file NOT exists
;;    ==> open in-mem version
;;
(define (tasks:open-db #!key (numretries 4))
  (if *task-db*
      *task-db*
      (let* ((dbpath       (tasks:get-task-db-path))
	     (dbfile       (conc dbpath "/monitor.db")))
      (handle-exceptions
       exn
       (if (> numretries 0)
	   (begin
	     (print-call-chain (current-error-port))
	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 0 *default-log-port* " exn=" (condition->list exn))
	     (thread-sleep! 1)
	     (tasks:open-db numretries (- numretries 1)))
	   (begin
	     (print-call-chain (current-error-port))
	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 0 *default-log-port* " exn=" (condition->list exn))))
       (let* ((dbpath       (tasks:get-task-db-path))
	(handle-exceptions
	 exn
	 ;; (if (> numretries 0)
	 (begin
	   (print-call-chain (current-error-port))
	   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	   (debug:print 0 *default-log-port* " exn=" (condition->list exn))
	   (debug:print 0 *default-log-port* "ERROR: fatal problem accessing file: " dbfile)
	   (exit 1))
;; 	     (thread-sleep! 1)
;; 	     (tasks:open-db numretries (- numretries 1)))
;; 	   (begin
;; 	     (print-call-chain (current-error-port))
;; 	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
;; 	     (debug:print 0 *default-log-port* " exn=" (condition->list exn))))
       (let* (
	      (dbfile       (conc dbpath "/monitor.db"))
	      (avail        (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away
	      (exists       (file-exists? dbpath))
	      (exists       (file-exists? dbfile))
	      (write-access (file-write-access? dbpath))
	      (mdb          (cond ;; what the hek is *toppath* doing here?
			     ((and (string? *toppath*)(file-write-access? *toppath*))
			      (sqlite3:open-database dbfile))
	      (mdb          (db:lock-create-open dbfile #f))
	      (db           (db:dbdat-db mdb))
	      ;; (cond 
	      ;;   	     ;; if db exists and readable or directory writeable, open the real db
	      ;;   	     ((or (and dbpath write-access)(and dbpath exists))
	      ;;   	      (sqlite3:open-database dbfile))
			     ((file-read-access? dbpath)    (sqlite3:open-database dbfile))
			     (else (sqlite3:open-database ":memory:")))) ;; (never-give-up-open-db dbpath))
	      ;;   	     (else (sqlite3:open-database ":memory:")))) ;; (never-give-up-open-db dbpath))
	      (handler      (make-busy-timeout 36000)))
	 (if (and exists
		  (not write-access))
	     (set! *db-write-access* write-access)) ;; only unset so other db's also can use this control
	 (sqlite3:set-busy-handler! mdb handler)
	 (db:set-sync mdb) ;; (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;"))
	 (sqlite3:set-busy-handler! db handler)
	 (db:set-sync db) ;; (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;"))
	 ;;  (if (or (and (not exists)
	 ;; 	      (file-write-access? *toppath*))
	 ;; 	 (not (file-read-access? dbpath)))
	 ;;      (begin
	 ;; 
	 ;; TASKS QUEUE MOVED TO main.db
	 ;;
	 ;; (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY,
         ;;                        action TEXT DEFAULT '',
         ;;                        owner TEXT,
         ;;                        state TEXT DEFAULT 'new',
         ;;                        target TEXT DEFAULT '',
         ;;                        name TEXT DEFAULT '',
         ;;                        testpatt TEXT DEFAULT '',
         ;;                        keylock TEXT,
         ;;                        params TEXT,
         ;;                        creation_time TIMESTAMP,
         ;;                        execution_time TIMESTAMP);")
	 (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY,
	 (sqlite3:execute db "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY,
                                pid INTEGER,
                                start_time TIMESTAMP,
                                last_update TIMESTAMP,
                                hostname TEXT,
                                username TEXT,
                               CONSTRAINT monitors_constraint UNIQUE (pid,hostname));")
	 (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS servers (id INTEGER PRIMARY KEY,
	 (sqlite3:execute db "CREATE TABLE IF NOT EXISTS servers (id INTEGER PRIMARY KEY,
                                  pid INTEGER,
                                  interface TEXT,
                                  hostname TEXT,
                                  port INTEGER,
                                  pubport INTEGER,
                                  start_time TIMESTAMP,
                                  priority INTEGER,
                                  state TEXT,
                                  mt_version TEXT,
                                  heartbeat TIMESTAMP,
                                  transport TEXT,
                                  run_id INTEGER);")
	 ;;                               CONSTRAINT servers_constraint UNIQUE (pid,hostname,port));")
	 (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS clients (id INTEGER PRIMARY KEY,
	 (sqlite3:execute db "CREATE TABLE IF NOT EXISTS clients (id INTEGER PRIMARY KEY,
                                  server_id INTEGER,
                                  pid INTEGER,
                                  hostname TEXT,
                                  cmdline TEXT,
                                  login_time TIMESTAMP,
                                  logout_time TIMESTAMP DEFAULT -1,
                                CONSTRAINT clients_constraint UNIQUE (pid,hostname));")
	       
	       ;))
	 (set! *task-db* (cons mdb dbpath))
	 *task-db*))))
	 (set! *task-db* mdb) ;; (db:dbdat(cons mdb dbpath))
	 *task-db*)))))

;;======================================================================
;; Server and client management
;;======================================================================

;; make-vector-record tasks hostinfo id interface port pubport transport pid hostname
(define (tasks:hostinfo-get-id          vec)    (vector-ref  vec 0))
337
338
339
340
341
342
343
344

345
346
347
348
349
350
351
352
353
354
355
356
357

358
359
360
361
362
363
364
365
366

367
368
369
370
371
372
373
342
343
344
345
346
347
348

349
350
351
352
353
354
355
356
357
358
359
360
361

362
363
364
365
366
367
368
369
370

371
372
373
374
375
376
377
378







-
+












-
+








-
+







	     (debug:print 0 *default-log-port* " trying call to tasks:get-server again in 10 seconds")
	     (thread-sleep! 10)
	     (tasks:get-server mdb run-id retries: (- retries 0)))
	   (debug:print 0 *default-log-port* "10 tries of tasks:get-server all crashed and burned. Giving up and returning \"no server found\"")))
     (sqlite3:for-each-row
      (lambda (id interface port pubport transport pid hostname)
	(set! res (vector id interface port pubport transport pid hostname)))
      mdb
      (db:dbdat-db mdb)
      ;; removed:
      ;; strftime('%s','now')-heartbeat < 10 AND mt_version = ?
      "SELECT id,interface,port,pubport,transport,pid,hostname FROM servers
          WHERE run_id=? AND state='running'
          ORDER BY start_time DESC LIMIT 1;" run-id) ;; (common:version-signature) run-id)
     res)))

(define (tasks:server-running-or-starting? mdb run-id)
  (let ((res #f))
    (sqlite3:for-each-row
     (lambda (id)
       (set! res id))
     mdb ;; NEEDS dbprep ADDED
     (db:dbdat-db mdb) ;; NEEDS dbprep ADDED
     "SELECT id FROM servers WHERE run_id=? AND (state = 'running' OR (state = 'dbprep' AND  (strftime('%s','now') - start_time) < 60));" run-id)
    res))

(define (tasks:server-running? mdb run-id)
  (let ((res #f))
    (sqlite3:for-each-row
     (lambda (id)
       (set! res id))
     mdb ;; NEEDS dbprep ADDED
     (db:dbdat-db mdb) ;; NEEDS dbprep ADDED
     "SELECT id FROM servers WHERE run_id=? AND state = 'running';" run-id)
    res))

(define (tasks:need-server run-id)
  (equal? (configf:lookup *configdat* "server" "required") "yes"))

;; 	(maxqry (cdr (rmt:get-max-query-average run-id)))
402
403
404
405
406
407
408
409

410
411
412
413
414
415
416
417
418
419
420

421
422
423
424
425
426
427
407
408
409
410
411
412
413

414
415
416
417
418
419
420
421
422
423
424

425
426
427
428
429
430
431
432







-
+










-
+








(define (tasks:get-all-servers mdb)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id)
       ;;                       0  1     2         3      4     5          6        7     8          9          10        11     12
       (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res)))
     mdb
     (db:dbdat-db mdb)
     "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id 
        FROM servers WHERE state NOT LIKE 'defunct%' ORDER BY start_time DESC;")
    res))

(define (tasks:get-server-records mdb run-id)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id)
       ;;                       0  1     2         3      4     5          6        7     8          9          10        11     12
       (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res)))
     mdb
     (db:dbdat-db mdb)
     "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id 
        FROM servers WHERE run_id=? AND state NOT LIKE 'defunct%' ORDER BY start_time DESC;"
     run-id)
    (reverse res)))

;; no elegance here ...
;;
451
452
453
454
455
456
457

458

459
460
461
462
463
464
465
466
467

468
469
470
471
472
473
474
456
457
458
459
460
461
462
463

464
465
466
467
468
469
470
471
472

473
474
475
476
477
478
479
480







+
-
+








-
+







    ))
    
;;======================================================================
;; M O N I T O R S
;;======================================================================

(define (tasks:remove-monitor-record mdb)
  (sqlite3:execute (db:dbdat-db mdb)
  (sqlite3:execute mdb "DELETE FROM monitors WHERE pid=? AND hostname=?;"
		   "DELETE FROM monitors WHERE pid=? AND hostname=?;"
		   (current-process-id)
		   (get-host-name)))

(define (tasks:get-monitors mdb)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (a . rem)
       (set! res (cons (apply vector a rem) res)))
     mdb
     (db:dbdat-db mdb)
     "SELECT id,pid,strftime('%m/%d/%Y %H:%M',datetime(start_time,'unixepoch'),'localtime'),strftime('%m/%d/%Y %H:%M:%S',datetime(last_update,'unixepoch'),'localtime'),hostname,username FROM monitors ORDER BY last_update ASC;")
    (reverse res)
    ))

(define (tasks:monitors->text-table monitors)
  (let ((fmtstr "~4a~8a~20a~20a~10a~10a"))
    (conc (format #f fmtstr "id" "pid" "start time" "last update" "hostname" "user") "\n"
483
484
485
486
487
488
489
490

491
492
493
494
495
496
497
498

499
500

501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516

517
518
519
520
521
522
523
489
490
491
492
493
494
495

496
497
498
499
500
501
502
503

504
505

506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521

522
523
524
525
526
527
528
529







-
+







-
+

-
+















-
+







			  (tasks:monitor-get-username    monitor)))
		monitors)
	   "\n"))))
   
;; update the last_update field with the current time and
;; if any monitors appear dead, remove them
(define (tasks:monitors-update mdb)
  (sqlite3:execute mdb "UPDATE monitors SET last_update=strftime('%s','now') WHERE pid=? AND hostname=?;"
  (sqlite3:execute (db:dbdat-db mdb) "UPDATE monitors SET last_update=strftime('%s','now') WHERE pid=? AND hostname=?;"
			  (current-process-id)
			  (get-host-name))
  (let ((deadlist '()))
    (sqlite3:for-each-row
     (lambda (id pid host last-update delta)
       (print "Going to delete stale record for monitor with pid " pid " on host " host " last updated " delta " seconds ago")
       (set! deadlist (cons id deadlist)))
     mdb 
     (db:dbdat-db mdb)
     "SELECT id,pid,hostname,last_update,strftime('%s','now')-last_update AS delta FROM monitors WHERE delta > 700;")
    (sqlite3:execute mdb (conc "DELETE FROM monitors WHERE id IN ('" (string-intersperse (map conc deadlist) "','") "');")))
    (sqlite3:execute (db:dbdat-db mdb) (conc "DELETE FROM monitors WHERE id IN ('" (string-intersperse (map conc deadlist) "','") "');")))
  )
(define (tasks:register-monitor db port)
  (let* ((pid (current-process-id))
	 (hostname (get-host-name))
	 (userinfo (user-information (current-user-id)))
	 (username (car userinfo)))
    (print "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username)
    (sqlite3:execute db "INSERT INTO monitors (pid,start_time,last_update,hostname,username) VALUES (?,strftime('%s','now'),strftime('%s','now'),?,?);"
		     pid hostname username)))

(define (tasks:get-num-alive-monitors mdb)
  (let ((res 0))
    (sqlite3:for-each-row 
     (lambda (count)
       (set! res count))
     mdb
     (db:dbdat-db mdb)
     "SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;"
     (car (user-information (current-user-id))))
    res))

;; 
(define (tasks:start-monitor db mdb)
  (if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more