Megatest

Check-in [076441b7a6]
Login
Overview
Comment:Conversion to inmem-per-run-db-per-run-server
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | inmem-per-run-db-per-run-server
Files: files | file ages | folders
SHA1: 076441b7a672d6dd96919c908d72425586ada37c
User & Date: matt on 2013-12-01 21:28:23
Other Links: branch diff | manifest | tags
Context
2013-12-01
23:06
Coarse grained migration of accessing server via run-id index into *runremote* check-in: 1aa5496577 user: matt tags: inmem-per-run-db-per-run-server
21:28
Conversion to inmem-per-run-db-per-run-server check-in: 076441b7a6 user: matt tags: inmem-per-run-db-per-run-server
2013-11-30
22:00
More fixes check-in: 6ce4b508b6 user: matt tags: inmem-per-run-db
Changes

Modified common.scm from [a28c1ed00f] to [0ebd6dd938].

57
58
59
60
61
62
63

64
65
66
67
68
69
70
(define *server-info*       #f)
(define *time-to-exit*      #f)
(define *received-response* #f)
(define *default-numtries*  10)
(define *server-run*        #t)
(define *db-write-access*   #t)
(define *inmemdb*           #f)


(define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys*              (make-hash-table)) ;; cache the keys here
(define *keyvals*           (make-hash-table))
(define *toptest-paths*     (make-hash-table)) ;; cache toptest path settings here
(define *test-paths*        (make-hash-table)) ;; cache test-id to test run paths here
(define *test-ids*          (make-hash-table)) ;; cache run-id, testname, and item-path => test-id







>







57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
(define *server-info*       #f)
(define *time-to-exit*      #f)
(define *received-response* #f)
(define *default-numtries*  10)
(define *server-run*        #t)
(define *db-write-access*   #t)
(define *inmemdb*           #f)
(define *run-id*            #f)

(define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys*              (make-hash-table)) ;; cache the keys here
(define *keyvals*           (make-hash-table))
(define *toptest-paths*     (make-hash-table)) ;; cache toptest path settings here
(define *test-paths*        (make-hash-table)) ;; cache test-id to test run paths here
(define *test-ids*          (make-hash-table)) ;; cache run-id, testname, and item-path => test-id

Modified dashboard-tests.scm from [78c574598f] to [0f4d58ff5b].

138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
;;======================================================================
;; Run info panel
;;======================================================================
(define (run-info-panel db keydat testdat runname)
  (let* ((run-id     (db:test-get-run_id testdat))
	 (rundat     (db:get-run-info db run-id))
	 (header     (db:get-header rundat))
	 (event_time (db:get-value-by-header (db:get-row rundat)
					     (db:get-header rundat)
					     "event_time")))
    (iup:frame 
     #:title "Megatest Run Info" ; #:expand "YES"
     (iup:hbox ; #:expand "YES"
      (apply iup:vbox ; #:expand "YES"
	     (append (map (lambda (keyval)







|







138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
;;======================================================================
;; Run info panel
;;======================================================================
(define (run-info-panel db keydat testdat runname)
  (let* ((run-id     (db:test-get-run_id testdat))
	 (rundat     (db:get-run-info db run-id))
	 (header     (db:get-header rundat))
	 (event_time (db:get-value-by-header (db:get-rows rundat)
					     (db:get-header rundat)
					     "event_time")))
    (iup:frame 
     #:title "Megatest Run Info" ; #:expand "YES"
     (iup:hbox ; #:expand "YES"
      (apply iup:vbox ; #:expand "YES"
	     (append (map (lambda (keyval)
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
    (if (not testdat)
	(begin
	  (debug:print 2 "ERROR: No test data found for test " test-id ", exiting")
	  (exit 1))
	(let* (;; (run-id        (if testdat (db:test-get-run_id testdat) #f))
	       (keydat        (if testdat (db:get-key-val-pairs dbstruct run-id) #f))
	       (rundat        (if testdat (db:get-run-info dbstruct run-id) #f))
	       (runname       (if testdat (db:get-value-by-header (db:get-row rundat)
								  (db:get-header rundat)
								  "runname") #f))
	       (tdb           (tdb:open-test-db-by-test-id-local dbstruct run-id test-id))
	       ;; These next two are intentional bad values to ensure errors if they should not
	       ;; get filled in properly.
	       (logfile       "/this/dir/better/not/exist")
	       (rundir        logfile)







|







460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
    (if (not testdat)
	(begin
	  (debug:print 2 "ERROR: No test data found for test " test-id ", exiting")
	  (exit 1))
	(let* (;; (run-id        (if testdat (db:test-get-run_id testdat) #f))
	       (keydat        (if testdat (db:get-key-val-pairs dbstruct run-id) #f))
	       (rundat        (if testdat (db:get-run-info dbstruct run-id) #f))
	       (runname       (if testdat (db:get-value-by-header (db:get-rows rundat)
								  (db:get-header rundat)
								  "runname") #f))
	       (tdb           (tdb:open-test-db-by-test-id-local dbstruct run-id test-id))
	       ;; These next two are intentional bad values to ensure errors if they should not
	       ;; get filled in properly.
	       (logfile       "/this/dir/better/not/exist")
	       (rundir        logfile)

Modified db.scm from [2818f0f5b2] to [46374c3bd7].

168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
	  (if (not dbexists)
	      (db:initialize-main-db db))
	  (dbr:dbstruct-set-main! dbstruct db)
	  db))))

;; Make the dbstruct, setup up auxillary db's and call for main db at least once
;;
(define (db:setup #!key (local #f))
  (let ((dbstruct (make-dbr:dbstruct path: *toppath* local: local)))
    (db:get-db dbstruct #f) ;; force one call to main
    ;; (if (not sdb:qry)
    ;;     (begin
    ;;       (set! sdb:qry (make-sdb:qry (conc *toppath* "/db/strings.db"))) ;; we open the normalization helpers here
    ;;       (sdb:qry 'setup #f)
    ;;       ;; Initialize with some known needed strings, NOTE: set this up to only execute on first db initialization







|







168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
	  (if (not dbexists)
	      (db:initialize-main-db db))
	  (dbr:dbstruct-set-main! dbstruct db)
	  db))))

;; Make the dbstruct, setup up auxillary db's and call for main db at least once
;;
(define (db:setup run-id #!key (local #f))
  (let ((dbstruct (make-dbr:dbstruct path: *toppath* local: local)))
    (db:get-db dbstruct #f) ;; force one call to main
    ;; (if (not sdb:qry)
    ;;     (begin
    ;;       (set! sdb:qry (make-sdb:qry (conc *toppath* "/db/strings.db"))) ;; we open the normalization helpers here
    ;;       (sdb:qry 'setup #f)
    ;;       ;; Initialize with some known needed strings, NOTE: set this up to only execute on first db initialization
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787





788
789
790
791
792
793
794
		       (lambda (key)
			 (set! res (cons key res)))
		       (db:get-db dbstruct #f)
		       "SELECT fieldname FROM keys ORDER BY id DESC;")))
	(set! *db-keys* res)
	res)))

;; 
(define (db:get-value-by-header row header field)
  (debug:print-info 4 "db:get-value-by-header row: " row " header: " header " field: " field)
  (if (null? header) #f
      (let loop ((hed (car header))
		 (tal (cdr header))
		 (n   0))
	(if (equal? hed field)
	    (vector-ref row n)
	    (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1)))))))






;;======================================================================
;;  R U N S
;;======================================================================

(define (db:get-run-name-from-id dbstruct run-id)
  (let ((res #f))
    (sqlite3:for-each-row







|

<








>
>
>
>
>







770
771
772
773
774
775
776
777
778

779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
		       (lambda (key)
			 (set! res (cons key res)))
		       (db:get-db dbstruct #f)
		       "SELECT fieldname FROM keys ORDER BY id DESC;")))
	(set! *db-keys* res)
	res)))

;; look up values in a header/data structure
(define (db:get-value-by-header row header field)

  (if (null? header) #f
      (let loop ((hed (car header))
		 (tal (cdr header))
		 (n   0))
	(if (equal? hed field)
	    (vector-ref row n)
	    (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1)))))))

;; Accessors for the header/data structure
;; get rows and header from 
(define (db:get-header vec)(vector-ref vec 0))
(define (db:get-rows   vec)(vector-ref vec 1))

;;======================================================================
;;  R U N S
;;======================================================================

(define (db:get-run-name-from-id dbstruct run-id)
  (let ((res #f))
    (sqlite3:for-each-row
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
     runs-info)
    res))

;; db:get-runs-by-patt
;; get runs by list of criteria
;; register a test run with the db
;;
;; Use: (db-get-value-by-header (db:get-header runinfo)(db:get-row runinfo))
;;  to extract info from the structure returned
;;
(define (db:get-runs-by-patt dbstruct keys runnamepatt targpatt offset limit) ;; test-name)
  (let* ((tmp      (runs:get-std-run-fields keys '("id" "runname" "state" "status" "owner" "event_time")))
	 (keystr   (car tmp))
	 (header   (cadr tmp))
	 (res     '())







|







977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
     runs-info)
    res))

;; db:get-runs-by-patt
;; get runs by list of criteria
;; register a test run with the db
;;
;; Use: (db:get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
;;  to extract info from the structure returned
;;
(define (db:get-runs-by-patt dbstruct keys runnamepatt targpatt offset limit) ;; test-name)
  (let* ((tmp      (runs:get-std-run-fields keys '("id" "runname" "state" "status" "owner" "event_time")))
	 (keystr   (car tmp))
	 (header   (cadr tmp))
	 (res     '())
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
		   (lambda (a . r)
		     (set! res (cons (list->vector (cons a r)) res)))
		   (db:get-db dbstruct #f)
		   qry-str
		   runnamepatt)))
    (vector header res)))

;; use (get-value-by-header (db:get-header runinfo)(db:get-row runinfo))
(define (db:get-run-info dbstruct run-id)
  ;;(if (hash-table-ref/default *run-info-cache* run-id #f)
  ;;    (hash-table-ref *run-info-cache* run-id)
  (let* ((res       (vector #f #f #f #f))
	 (keys      (db:get-keys dbstruct))
	 (remfields (list "id" "runname" "state" "status" "owner" "event_time"))
	 (header    (append keys remfields))







|







1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
		   (lambda (a . r)
		     (set! res (cons (list->vector (cons a r)) res)))
		   (db:get-db dbstruct #f)
		   qry-str
		   runnamepatt)))
    (vector header res)))

;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
(define (db:get-run-info dbstruct run-id)
  ;;(if (hash-table-ref/default *run-info-cache* run-id #f)
  ;;    (hash-table-ref *run-info-cache* run-id)
  (let* ((res       (vector #f #f #f #f))
	 (keys      (db:get-keys dbstruct))
	 (remfields (list "id" "runname" "state" "status" "owner" "event_time"))
	 (header    (append keys remfields))

Modified db_records.scm from [832e173195] to [8182037580].

107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
(define-inline (db:test-set-diskfree! vec val)(vector-set! vec 8 val))
(define-inline (db:test-set-testname! vec val)(vector-set! vec 2 val))
(define-inline (db:test-set-state!    vec val)(vector-set! vec 3 val))
(define-inline (db:test-set-status!   vec val)(vector-set! vec 4 val))
(define-inline (db:test-set-run_duration! vec val)(vector-set! vec 12 val))
(define-inline (db:test-set-final_logf! vec val)(vector-set! vec 13 val))

;; get rows and header from 
(define-inline (db:get-header vec)(vector-ref vec 0))
(define-inline (db:get-rows   vec)(vector-ref vec 1))

;; make-vector-record "" db mintest id run_id testname state status event_time item_path
;;
(define (make-db:mintest)(make-vector 7))
(define-inline (db:mintest-get-id           vec)    (vector-ref  vec 0))
(define-inline (db:mintest-get-run_id       vec)    (vector-ref  vec 1))
(define-inline (db:mintest-get-testname     vec)    (vector-ref  vec 2))
(define-inline (db:mintest-get-state        vec)    (vector-ref  vec 3))







<
<
<
<







107
108
109
110
111
112
113




114
115
116
117
118
119
120
(define-inline (db:test-set-diskfree! vec val)(vector-set! vec 8 val))
(define-inline (db:test-set-testname! vec val)(vector-set! vec 2 val))
(define-inline (db:test-set-state!    vec val)(vector-set! vec 3 val))
(define-inline (db:test-set-status!   vec val)(vector-set! vec 4 val))
(define-inline (db:test-set-run_duration! vec val)(vector-set! vec 12 val))
(define-inline (db:test-set-final_logf! vec val)(vector-set! vec 13 val))





;; make-vector-record "" db mintest id run_id testname state status event_time item_path
;;
(define (make-db:mintest)(make-vector 7))
(define-inline (db:mintest-get-id           vec)    (vector-ref  vec 0))
(define-inline (db:mintest-get-run_id       vec)    (vector-ref  vec 1))
(define-inline (db:mintest-get-testname     vec)    (vector-ref  vec 2))
(define-inline (db:mintest-get-state        vec)    (vector-ref  vec 3))
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
(define-inline (tdb:steps-table-get-runtime    vec)    (vector-ref  vec 4))
(define-inline (tdb:step-stable-set-stepname!  vec val)(vector-set! vec 0 val))
(define-inline (tdb:step-stable-set-start!     vec val)(vector-set! vec 1 val))
(define-inline (tdb:step-stable-set-end!       vec val)(vector-set! vec 2 val))
(define-inline (tdb:step-stable-set-status!    vec val)(vector-set! vec 3 val))
(define-inline (tdb:step-stable-set-runtime!   vec val)(vector-set! vec 4 val))

;; use this one for db-get-run-info
(define-inline (db:get-row    vec)(vector-ref vec 1))

;; The data structure for handing off requests via wire
(define (make-cdb:packet)(make-vector 6))
(define-inline (cdb:packet-get-client-sig   vec)    (vector-ref  vec 0))
(define-inline (cdb:packet-get-qtype        vec)    (vector-ref  vec 1))
(define-inline (cdb:packet-get-immediate    vec)    (vector-ref  vec 2))
(define-inline (cdb:packet-get-query-sig    vec)    (vector-ref  vec 3))
(define-inline (cdb:packet-get-params       vec)    (vector-ref  vec 4))







<
<
<







203
204
205
206
207
208
209



210
211
212
213
214
215
216
(define-inline (tdb:steps-table-get-runtime    vec)    (vector-ref  vec 4))
(define-inline (tdb:step-stable-set-stepname!  vec val)(vector-set! vec 0 val))
(define-inline (tdb:step-stable-set-start!     vec val)(vector-set! vec 1 val))
(define-inline (tdb:step-stable-set-end!       vec val)(vector-set! vec 2 val))
(define-inline (tdb:step-stable-set-status!    vec val)(vector-set! vec 3 val))
(define-inline (tdb:step-stable-set-runtime!   vec val)(vector-set! vec 4 val))




;; The data structure for handing off requests via wire
(define (make-cdb:packet)(make-vector 6))
(define-inline (cdb:packet-get-client-sig   vec)    (vector-ref  vec 0))
(define-inline (cdb:packet-get-qtype        vec)    (vector-ref  vec 1))
(define-inline (cdb:packet-get-immediate    vec)    (vector-ref  vec 2))
(define-inline (cdb:packet-get-query-sig    vec)    (vector-ref  vec 3))
(define-inline (cdb:packet-get-params       vec)    (vector-ref  vec 4))

Modified http-transport.scm from [ac12c8febe] to [2e4f53bbc7].

56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
	   (set! res adr)))
     (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
    (string-intersperse 
     (map number->string
	  (u8vector->list
	   (if res res (hostname->ip hostname)))) ".")))

(define (http-transport:run hostn)
  (debug:print 2 "Attempting to start the server ...")
  (if (not *toppath*)
      (if (not (setup-for-run))
	  (begin
	    (debug:print 0 "ERROR: cannot find megatest.config, cannot start server, exiting")
	    (exit))))
  (let* (;; (iface           (if (string=? "-" hostn)







|







56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
	   (set! res adr)))
     (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
    (string-intersperse 
     (map number->string
	  (u8vector->list
	   (if res res (hostname->ip hostname)))) ".")))

(define (http-transport:run hostn run-id server-id)
  (debug:print 2 "Attempting to start the server ...")
  (if (not *toppath*)
      (if (not (setup-for-run))
	  (begin
	    (debug:print 0 "ERROR: cannot find megatest.config, cannot start server, exiting")
	    (exit))))
  (let* (;; (iface           (if (string=? "-" hostn)
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
				   (send-response body: "hey there!\n"
						  headers: '((content-type text/plain))))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ "hey"))
				   (send-response body: "hey there!\n"
						  headers: '((content-type text/plain))))
				  (else (continue))))))))
    (http-transport:try-start-server ipaddrstr start-port)))

;; This is recursively run by http-transport:run until sucessful
;;
(define (http-transport:try-start-server ipaddrstr portnum)
  (handle-exceptions
   exn
   (begin
     (print-error-message exn)
     (if (< portnum 9000)
	 (begin 
	   (debug:print 0 "WARNING: failed to start on portnum: " portnum ", trying next port")
	   (thread-sleep! 0.1)
	   ;; (open-run-close tasks:remove-server-records tasks:open-db)
	   (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum)
	   (http-transport:try-start-server ipaddrstr (+ portnum 1)))
	 (print "ERROR: Tried and tried but could not start the server")))
   ;; any error in following steps will result in a retry
   (set! *runremote* (list ipaddrstr portnum))
   ;; (open-run-close tasks:remove-server-records tasks:open-db)
   (open-run-close tasks:server-register 
		   tasks:open-db 
		   (current-process-id)
		   ipaddrstr portnum 0 'startup 'http)
   (debug:print 1 "INFO: Trying to start server on " ipaddrstr ":" portnum)
   ;; This starts the spiffy server
   ;; NEED WAY TO SET IP TO #f TO BIND ALL
   (start-server bind-address: ipaddrstr port: portnum)
   (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum)
   (debug:print 1 "INFO: server has been stopped")))








|



|








<
<
|



<
|

|
|







139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158


159
160
161
162

163
164
165
166
167
168
169
170
171
172
173
				   (send-response body: "hey there!\n"
						  headers: '((content-type text/plain))))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ "hey"))
				   (send-response body: "hey there!\n"
						  headers: '((content-type text/plain))))
				  (else (continue))))))))
    (http-transport:try-start-server ipaddrstr start-port server-id)))

;; This is recursively run by http-transport:run until sucessful
;;
(define (http-transport:try-start-server ipaddrstr portnum server-id)
  (handle-exceptions
   exn
   (begin
     (print-error-message exn)
     (if (< portnum 9000)
	 (begin 
	   (debug:print 0 "WARNING: failed to start on portnum: " portnum ", trying next port")
	   (thread-sleep! 0.1)


	   (http-transport:try-start-server ipaddrstr (+ portnum 1) server-id))
	 (print "ERROR: Tried and tried but could not start the server")))
   ;; any error in following steps will result in a retry
   (set! *runremote* (list ipaddrstr portnum))

   (open-run-close tasks:server-set-interface-port 
		   tasks:open-db 
		   server-id 
		   ipaddrstr portnum)
   (debug:print 1 "INFO: Trying to start server on " ipaddrstr ":" portnum)
   ;; This starts the spiffy server
   ;; NEED WAY TO SET IP TO #f TO BIND ALL
   (start-server bind-address: ipaddrstr port: portnum)
   (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum)
   (debug:print 1 "INFO: server has been stopped")))

395
396
397
398
399
400
401
402
403
404
405
406


407
408
409
410
411


412
413
414


415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
;; 	  (set! *transport-type* 'fs)
;; 	  #f))))


;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running)
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (let* ((server-info (let loop ()


                        (let ((sdat #f))
                          (mutex-lock! *heartbeat-mutex*)
                          (set! sdat *runremote*)
                          (mutex-unlock! *heartbeat-mutex*)
                          (if sdat


			      sdat
                              (begin
                                (sleep 4)


                                (loop))))))
         (iface       (car server-info))
         (port        (cadr server-info))
         (last-access 0)
	 (tdb         (tasks:open-db))
	 (spid        ;;(open-run-close tasks:server-get-server-id tasks:open-db #f iface port #f))
	   (tasks:server-get-server-id tdb #f iface port #f))
	 (server-timeout (let ((tmo (config-lookup  *configdat* "server" "timeout")))
			   (if (and (string? tmo)
				    (string->number tmo))
			       (* 60 60 (string->number tmo))
			       ;; default to three days
			       (* 3 24 60 60)))))
    (debug:print-info 2 "server-timeout: " server-timeout ", server pid: " spid " on " iface ":" port)
    (let loop ((count 0))
      ;; Use this opportunity to sync the inmemdb to db
      (let ((start-time (current-milliseconds))
	    (sync-time  #f)
	    (rem-time   #f))
	(if *inmemdb* (db:sync-touched *inmemdb* force-sync: #t))
	(set! sync-time  (- (current-milliseconds) start-time))







|



|
>
>




|
>
>



>
>
|




<
<






|







392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422


423
424
425
426
427
428
429
430
431
432
433
434
435
436
;; 	  (set! *transport-type* 'fs)
;; 	  #f))))


;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running server-id)
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (let* ((server-info (let loop ((start-time (current-seconds))
				 (changed    #t)
				 (last-sdat  "not this"))
                        (let ((sdat #f))
                          (mutex-lock! *heartbeat-mutex*)
                          (set! sdat *runremote*)
                          (mutex-unlock! *heartbeat-mutex*)
                          (if (and sdat
				   (not changed)
				   (> (- (current-seconds) start-time) 2))
			      sdat
                              (begin
                                (sleep 4)
                                (loop start-time
				      (equal? sdat last-sdat)
				      sdat))))))
         (iface       (car server-info))
         (port        (cadr server-info))
         (last-access 0)
	 (tdb         (tasks:open-db))


	 (server-timeout (let ((tmo (config-lookup  *configdat* "server" "timeout")))
			   (if (and (string? tmo)
				    (string->number tmo))
			       (* 60 60 (string->number tmo))
			       ;; default to three days
			       (* 3 24 60 60)))))
    (tasks:server-set-state! tdb server-id "running")
    (let loop ((count 0))
      ;; Use this opportunity to sync the inmemdb to db
      (let ((start-time (current-milliseconds))
	    (sync-time  #f)
	    (rem-time   #f))
	(if *inmemdb* (db:sync-touched *inmemdb* force-sync: #t))
	(set! sync-time  (- (current-milliseconds) start-time))
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501

502
503
504
505

506
507
508
509
510
511
512
513
514
515
516
517

518

519
520
521
522
523


524
525


526
527
528
529
530
531
532
533
534
      
      ;; Check that iface and port have not changed (can happen if server port collides)
      (mutex-lock! *heartbeat-mutex*)
      (set! sdat *runremote*)
      (mutex-unlock! *heartbeat-mutex*)
      
      (if (or (not (equal? sdat (list iface port)))
	      (not spid))
	  (begin 
	    (debug:print-info 0 "interface changed, refreshing iface and port info")
	    (set! iface (car sdat))
	    (set! port  (cadr sdat))
	    (set! spid  (tasks:server-get-server-id tdb #f iface port #f))))
      
      ;; NOTE: Get rid of this mechanism! It really is not needed...
      ;; (open-run-close tasks:server-update-heartbeat tasks:open-db spid)
      (tasks:server-update-heartbeat tdb spid)
      
      ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access

      ;; Transfer *last-db-access* to last-access to use in checking that we are still alive
      (mutex-lock! *heartbeat-mutex*)
      (set! last-access *last-db-access*)
      (mutex-unlock! *heartbeat-mutex*)

      ;; (debug:print 11 "last-access=" last-access ", server-timeout=" server-timeout)
      (if (and *server-run*
	       (> (+ last-access server-timeout)
		  (current-seconds)))
	  (begin
	    (debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
	    (loop 0))
	  (begin
	    (debug:print-info 0 "Starting to shutdown the server.")
	    ;; need to delete only *my* server entry (future use)
	    (set! *time-to-exit* #t)
	    (if *inmemdb* (db:sync-touched *inmemdb* force-sync: #t))
	    (open-run-close tasks:server-deregister-self tasks:open-db (get-host-name))
	    (thread-sleep! 1)
	    (debug:print-info 0 "Max cached queries was    " *max-cache-size*)
	    (debug:print-info 0 "Number of cached writes   " *number-of-writes*)
	    (debug:print-info 0 "Average cached write time "
			      (if (eq? *number-of-writes* 0)
				  "n/a (no writes)"
				  (/ *writes-total-delay*
				     *number-of-writes*))
			      " ms")
	    (debug:print-info 0 "Number non-cached queries "  *number-non-write-queries*)
	    (debug:print-info 0 "Average non-cached time   "
			      (if (eq? *number-non-write-queries* 0)
				  "n/a (no queries)"
				  (/ *total-non-write-delay* 
				     *number-non-write-queries*))
			      " ms")
	    (debug:print-info 0 "Server shutdown complete. Exiting")

	    (exit))))))

;; all routes though here end in exit ...
(define (http-transport:launch)

  (if (not *toppath*)
      (if (not (setup-for-run))
	  (begin
	    (debug:print 0 "ERROR: cannot find megatest.config, exiting")
	    (exit))))
  (debug:print-info 2 "Starting the standalone server")
  (if (args:get-arg "-daemonize")
      (daemon:ize))
  (let ((hostinfo (open-run-close tasks:get-best-server tasks:open-db)))
    (debug:print 11 "http-transport:launch hostinfo=" hostinfo)
    ;; #(1 "143.182.207.24" 5736 -1 "http" 22771 "hostname")
    (if hostinfo

	(debug:print-info 2 "NOT starting new server, one is already running on " (vector-ref hostinfo 1) ":" (vector-ref hostinfo 2))

	(if *toppath* 
	    (let* ((th2 (make-thread (lambda ()
				       (http-transport:run 
					(if (args:get-arg "-server")
					    (args:get-arg "-server")


					    "-"))) "Server run"))
		   (th3 (make-thread http-transport:keep-running "Keep running")))


	      ;; Database connection
	      (set! *inmemdb*  (db:setup))
	      (thread-start! th2)
	      (thread-start! th3)
	      (set! *didsomething* #t)
	      (thread-join! th2))
	    (debug:print 0 "ERROR: Failed to setup for megatest")))
    ;; (sdb:qry 'finalize)
    (exit)))







|



|
<



|




















|
|
















>



|
>








|
<
<
|
>
|
>





>
>
|
|
>
>

|







448
449
450
451
452
453
454
455
456
457
458
459

460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516


517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
      
      ;; Check that iface and port have not changed (can happen if server port collides)
      (mutex-lock! *heartbeat-mutex*)
      (set! sdat *runremote*)
      (mutex-unlock! *heartbeat-mutex*)
      
      (if (or (not (equal? sdat (list iface port)))
	      (not server-id))
	  (begin 
	    (debug:print-info 0 "interface changed, refreshing iface and port info")
	    (set! iface (car sdat))
	    (set! port  (cadr sdat))))

      
      ;; NOTE: Get rid of this mechanism! It really is not needed...
      ;; (open-run-close tasks:server-update-heartbeat tasks:open-db spid)
      (tasks:server-update-heartbeat tdb server-id)
      
      ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access

      ;; Transfer *last-db-access* to last-access to use in checking that we are still alive
      (mutex-lock! *heartbeat-mutex*)
      (set! last-access *last-db-access*)
      (mutex-unlock! *heartbeat-mutex*)

      ;; (debug:print 11 "last-access=" last-access ", server-timeout=" server-timeout)
      (if (and *server-run*
	       (> (+ last-access server-timeout)
		  (current-seconds)))
	  (begin
	    (debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
	    (loop 0))
	  (begin
	    (debug:print-info 0 "Starting to shutdown the server.")
	    ;; need to delete only *my* server entry (future use)
	    (set! *time-to-exit* #t)
	    (if *inmemdb* (db:sync-touched *inmemdb* force-sync: #t))
	    ( tasks:server-set-state! tdb server-id "shutting-down")
	    (thread-sleep! 5)
	    (debug:print-info 0 "Max cached queries was    " *max-cache-size*)
	    (debug:print-info 0 "Number of cached writes   " *number-of-writes*)
	    (debug:print-info 0 "Average cached write time "
			      (if (eq? *number-of-writes* 0)
				  "n/a (no writes)"
				  (/ *writes-total-delay*
				     *number-of-writes*))
			      " ms")
	    (debug:print-info 0 "Number non-cached queries "  *number-non-write-queries*)
	    (debug:print-info 0 "Average non-cached time   "
			      (if (eq? *number-non-write-queries* 0)
				  "n/a (no queries)"
				  (/ *total-non-write-delay* 
				     *number-non-write-queries*))
			      " ms")
	    (debug:print-info 0 "Server shutdown complete. Exiting")
	    (tasks:server-delete-record! tdb server-id)
	    (exit))))))

;; all routes though here end in exit ...
(define (http-transport:launch run-id)
  (set! *run-id*   run-id)
  (if (not *toppath*)
      (if (not (setup-for-run))
	  (begin
	    (debug:print 0 "ERROR: cannot find megatest.config, exiting")
	    (exit))))
  (debug:print-info 2 "Starting the standalone server")
  (if (args:get-arg "-daemonize")
      (daemon:ize))
  (let ((server-id (open-run-close tasks:server-lock-slot tasks:open-db run-id)))


    (if (not server-id)
	(begin
	  (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue")
	  (open-run-close tasks:server-delete-records-for-this-pid tasks:open-db))
	(if *toppath* 
	    (let* ((th2 (make-thread (lambda ()
				       (http-transport:run 
					(if (args:get-arg "-server")
					    (args:get-arg "-server")
					    "-")
					run-id
					server-id)) "Server run"))
		   (th3 (make-thread (lambda ()
				       (http-transport:keep-running server-id))
				     "Keep running")))
	      ;; Database connection
	      (set! *inmemdb*  (db:setup run-id))
	      (thread-start! th2)
	      (thread-start! th3)
	      (set! *didsomething* #t)
	      (thread-join! th2))
	    (debug:print 0 "ERROR: Failed to setup for megatest")))
    ;; (sdb:qry 'finalize)
    (exit)))

Modified launch.scm from [7c2ca47271] to [01eba87552].

473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
;; 
;; All log file links should be stored relative to the top of link path
;;  
;; <target> - <testname> [ - <itempath> ] 
;;
(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat)
  (let* ((item-path (item-list->path itemdat))
	 (runname  (db:get-value-by-header (db:get-row run-info)
					   (db:get-header run-info)
					   "runname"))
	 ;; convert back to db: from rdb: - this is always run at server end
	 (target   (string-intersperse (map cadr keyvals) "/"))

	 (not-iterated  (equal? "" item-path))








|







473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
;; 
;; All log file links should be stored relative to the top of link path
;;  
;; <target> - <testname> [ - <itempath> ] 
;;
(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat)
  (let* ((item-path (item-list->path itemdat))
	 (runname  (db:get-value-by-header (db:get-rows run-info)
					   (db:get-header run-info)
					   "runname"))
	 ;; convert back to db: from rdb: - this is always run at server end
	 (target   (string-intersperse (map cadr keyvals) "/"))

	 (not-iterated  (equal? "" item-path))

Modified megatest.scm from [d71e637572] to [8e86c2252f].

28
29
30
31
32
33
34

35
36
37
38
39
40
41
(declare (uses daemon))
(declare (uses db))
;; (declare (uses sdb))
;; (declare (uses filedb))
(declare (uses tdb))
(declare (uses mt))
(declare (uses api))


(define *db* #f) ;; this is only for the repl, do not use in general!!!!

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")







>







28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
(declare (uses daemon))
(declare (uses db))
;; (declare (uses sdb))
;; (declare (uses filedb))
(declare (uses tdb))
(declare (uses mt))
(declare (uses api))
(declare (uses tasks)) ;; only used for debugging.

(define *db* #f) ;; this is only for the repl, do not use in general!!!!

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
199
200
201
202
203
204
205

206
207
208
209
210
211
212
			"-set-state-status"
			"-debug" ;; for *verbosity* > 2
			"-gen-megatest-test"
			"-override-timeout"
			"-test-files"  ;; -test-paths is for listing all
			"-load"        ;; load and exectute a scheme file
			"-dumpmode"

			) 
		 (list  "-h"
			"-version"
		        "-force"
		        "-xterm"
		        "-showkeys"
		        "-show-keys"







>







200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
			"-set-state-status"
			"-debug" ;; for *verbosity* > 2
			"-gen-megatest-test"
			"-override-timeout"
			"-test-files"  ;; -test-paths is for listing all
			"-load"        ;; load and exectute a scheme file
			"-dumpmode"
			"-run-id"
			) 
		 (list  "-h"
			"-version"
		        "-force"
		        "-xterm"
		        "-showkeys"
		        "-show-keys"
341
342
343
344
345
346
347
348


349

350

351
352
353
354
355
356
357

(if (args:get-arg "-server")

    ;; Server? Start up here.
    ;;
    (let ((tl        (setup-for-run))
	  (transport (or (configf:lookup *configdat* "setup" "transport")
			 (args:get-arg "-transport" "http"))))


      (debug:print 2 "Launching server using transport " transport)

      (server:launch (string->symbol transport)))


    ;; Not a server? This section will decide how to communicate
    ;;
    ;;  Setup client for all expect listed here
    (if (null? (lset-intersection 
		     equal?
		     (hash-table-keys args:arg-hash)







|
>
>
|
>
|
>







343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363

(if (args:get-arg "-server")

    ;; Server? Start up here.
    ;;
    (let ((tl        (setup-for-run))
	  (transport (or (configf:lookup *configdat* "setup" "transport")
			 (args:get-arg "-transport" "http")))
	  (run-id    (and (args:get-arg "-run-id")
			  (string->number (args:get-arg "-run-id")))))
      (debug:print 2 "Launching server using transport " transport " for run-id=" run-id)
      (if run-id
	  (server:launch (string->symbol transport) run-id)
	  (debug:print 0 "ERROR: server requires run-id be specified with -run-id")))

    ;; Not a server? This section will decide how to communicate
    ;;
    ;;  Setup client for all expect listed here
    (if (null? (lset-intersection 
		     equal?
		     (hash-table-keys args:arg-hash)

Modified mt.scm from [78f8ad9fdf] to [029729b148].

35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
;;  R U N S
;;======================================================================

;; runs:get-runs-by-patt
;; get runs by list of criteria
;; register a test run with the db
;;
;; Use: (db-get-value-by-header (db:get-header runinfo)(db:get-row runinfo))
;;  to extract info from the structure returned
;;
(define (mt:get-runs-by-patt keys runnamepatt targpatt)
  (let loop ((runsdat  (rmt:get-runs-by-patt keys runnamepatt targpatt 0 500))
	     (res      '())
	     (offset   0)
	     (limit    500))







|







35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
;;  R U N S
;;======================================================================

;; runs:get-runs-by-patt
;; get runs by list of criteria
;; register a test run with the db
;;
;; Use: (db-get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
;;  to extract info from the structure returned
;;
(define (mt:get-runs-by-patt keys runnamepatt targpatt)
  (let loop ((runsdat  (rmt:get-runs-by-patt keys runnamepatt targpatt 0 500))
	     (res      '())
	     (offset   0)
	     (limit    500))

Modified server.scm from [ddc244d255] to [be1ec84c84].

40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
;; S E R V E R
;;======================================================================

;; Call this to start the actual server
;;

;; all routes though here end in exit ...
(define (server:launch transport)
  (if (not *toppath*)
      (if (not (setup-for-run))
	  (begin
	    (debug:print 0 "ERROR: cannot find megatest.config, exiting")
	    (exit))))
  (debug:print-info 2 "Starting server using " transport " transport")
  (set! *transport-type* transport)
  (case transport
    ;; ((fs)   (exit)) ;; there is no "fs" server transport
    ((fs http) (http-transport:launch))
    ((zmq)     (zmq-transport:launch))
    (else
     (debug:print "WARNING: unrecognised transport " transport)
     (exit))))

;;======================================================================
;; Q U E U E   M A N A G E M E N T
;;======================================================================







|









|
|







40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
;; S E R V E R
;;======================================================================

;; Call this to start the actual server
;;

;; all routes though here end in exit ...
(define (server:launch transport run-id)
  (if (not *toppath*)
      (if (not (setup-for-run))
	  (begin
	    (debug:print 0 "ERROR: cannot find megatest.config, exiting")
	    (exit))))
  (debug:print-info 2 "Starting server using " transport " transport")
  (set! *transport-type* transport)
  (case transport
    ;; ((fs)   (exit)) ;; there is no "fs" server transport
    ((fs http) (http-transport:launch run-id))
    ((zmq)     (zmq-transport:launch run-id))
    (else
     (debug:print "WARNING: unrecognised transport " transport)
     (exit))))

;;======================================================================
;; Q U E U E   M A N A G E M E N T
;;======================================================================

Modified tasks.scm from [c9fadd13bc] to [318318de38].

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
                                  pubport INTEGER,
                                  start_time TIMESTAMP,
                                  priority INTEGER,
                                  state TEXT,
                                  mt_version TEXT,
                                  heartbeat TIMESTAMP,
                                  transport TEXT,
                                  run_id INTEGER,
                               CONSTRAINT servers_constraint UNIQUE (pid,hostname,port));")
	  (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS clients (id INTEGER PRIMARY KEY,
                                  server_id INTEGER,
                                  pid INTEGER,
                                  hostname TEXT,
                                  cmdline TEXT,
                                  login_time TIMESTAMP,
                                  logout_time TIMESTAMP DEFAULT -1,







|
|







62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
                                  pubport INTEGER,
                                  start_time TIMESTAMP,
                                  priority INTEGER,
                                  state TEXT,
                                  mt_version TEXT,
                                  heartbeat TIMESTAMP,
                                  transport TEXT,
                                  run_id INTEGER);")
;;                               CONSTRAINT servers_constraint UNIQUE (pid,hostname,port));")
	  (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS clients (id INTEGER PRIMARY KEY,
                                  server_id INTEGER,
                                  pid INTEGER,
                                  hostname TEXT,
                                  cmdline TEXT,
                                  login_time TIMESTAMP,
                                  logout_time TIMESTAMP DEFAULT -1,
89
90
91
92
93
94
95
96
97






98


99
100
101
102

103




104
105
106
107
108
109
110
111
112
113
114
115
116

117
118
119
120
121
122
123
124

125
126
127

128
129
130
131
132
133
134
















135



136
137


138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
(define (tasks:hostinfo-get-interface   vec)    (vector-ref  vec 1))
(define (tasks:hostinfo-get-port        vec)    (vector-ref  vec 2))
(define (tasks:hostinfo-get-pubport     vec)    (vector-ref  vec 3))
(define (tasks:hostinfo-get-transport   vec)    (vector-ref  vec 4))
(define (tasks:hostinfo-get-pid         vec)    (vector-ref  vec 5))
(define (tasks:hostinfo-get-hostname    vec)    (vector-ref  vec 6))

;; state: 'live, 'shutting-down, 'dead
(define (tasks:server-register mdb pid interface port priority state transport #!key (pubport -1))






  (debug:print-info 11 "tasks:server-register " pid " " interface " " port " " priority " " state)


  (sqlite3:execute 
   mdb 
   "INSERT OR REPLACE INTO servers (pid,hostname,port,pubport,start_time,priority,state,mt_version,heartbeat,interface,transport)
                             VALUES(?,  ?,       ?,   ?,  strftime('%s','now'), ?, ?, ?, strftime('%s','now'),?,?);"

   pid (get-host-name) port pubport priority (conc state) 




   (common:version-signature)
   interface 
   (conc transport))
  (vector 
   (tasks:server-get-server-id mdb (get-host-name) interface port pid)
   interface
   port
   pubport
   transport
   ))

;; NB// two servers with same pid on different hosts will be removed from the list if pid: is used!
(define (tasks:server-deregister mdb hostname #!key (port #f)(pid #f)(action 'delete))

  (debug:print-info 11 "server-deregister " hostname ", port " port ", pid " pid)
  (if *db-write-access*
      (if pid
	  (case action
	    ((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE pid=?;" pid))
	    (else    (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE pid=?;" pid)))
	  (if port
	      (case action

	    ((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE (interface=? or hostname=?) AND port=?;" hostname hostname port))
	    (else    (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE (interface=? or hostname=?) AND port=?;" hostname hostname port)))
	      (debug:print 0 "ERROR: tasks:server-deregister called with neither pid nor port specified")))))


(define (tasks:server-deregister-self mdb hostname)
  (tasks:server-deregister mdb hostname pid: (current-process-id)))

;; need a simple call for robustly removing records given host and port
(define (tasks:server-delete mdb hostname port)
  (tasks:server-deregister mdb hostname port: port action: 'delete))




















(define (tasks:server-get-server-id mdb hostname iface port pid)
  (debug:print-info 12 "tasks:server-get-server-id " mdb " " hostname " " iface " " port " " pid)


  (let ((res #f))
    (sqlite3:for-each-row
     (lambda (id)
       (set! res id))
     mdb
     (cond
      ((and hostname  pid)  "SELECT id FROM servers WHERE hostname=?  AND pid=?;")
      ((and iface     port) "SELECT id FROM servers WHERE interface=? AND port=?;")
      ((and hostname  port) "SELECT id FROM servers WHERE hostname=?  AND port=?;")
      (else
       (begin
	 (debug:print 0 "ERROR: tasks:server-get-server-id needs (hostname and pid) OR (iface and port) OR (hostname and port)")
	 "SELECT id FROM servers WHERE pid=-999;")))
     (if hostname hostname iface)(if pid pid port))
    res))

(define (tasks:server-update-heartbeat mdb server-id)
  (debug:print-info 1 "Heart beat update of server id=" server-id)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 "WARNING: probable timeout on monitor.db access")







<
|
>
>
>
>
>
>
|
>
>


|
|
>
|
>
>
>
>
|
|
|
<
<
<
|
<
<


<
|
>
|
|
|
|
<
|
|
<
>
|
|
|
>

|
|

<
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
|
<
>
>
|

|
|

<
|
<
<
<
|
<
<
<
|







89
90
91
92
93
94
95

96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118



119


120
121

122
123
124
125
126
127

128
129

130
131
132
133
134
135
136
137
138

139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161

162
163
164
165
166
167
168

169



170



171
172
173
174
175
176
177
178
(define (tasks:hostinfo-get-interface   vec)    (vector-ref  vec 1))
(define (tasks:hostinfo-get-port        vec)    (vector-ref  vec 2))
(define (tasks:hostinfo-get-pubport     vec)    (vector-ref  vec 3))
(define (tasks:hostinfo-get-transport   vec)    (vector-ref  vec 4))
(define (tasks:hostinfo-get-pid         vec)    (vector-ref  vec 5))
(define (tasks:hostinfo-get-hostname    vec)    (vector-ref  vec 6))


(define (tasks:server-lock-slot mdb run-id)
  (let ((res '())
	(best #f))
    (tasks:server-clean-out-old-records-for-run-id mdb run-id)
    (tasks:server-set-available mdb run-id)
    (thread-sleep! 2) ;; Try removing this. It may not be needed.
    (tasks:server-am-i-the-server? mdb run-id)))
	
;; register that this server may come online (first to register goes though with the process)
(define (tasks:server-set-available mdb run-id)
  (sqlite3:execute 
   mdb 
   "INSERT INTO servers (pid,hostname,port,pubport,start_time,      priority,state,mt_version,heartbeat,   interface,transport,run_id)
                   VALUES(?, ?,       ?,   ?, strftime('%s','now'), ?,       ?,    ?, strftime('%s','now'),?,        ?,        ?);"
   (current-process-id)       ;; pid
   (get-host-name)            ;; hostname
   -1                         ;; port
   -1                         ;; pubport
   (random 1000)              ;; priority (used a tiebreaker on get-available)
   "available"                ;; state
   (common:version-signature) ;; mt_version
   -1                         ;; interface
   -1                         ;; transport



   run-id


   ))


(define (tasks:server-clean-out-old-records-for-run-id mdb run-id)
  (sqlite3:execute mdb "DELETE FROM servers WHERE state='available' AND (strftime('%s','now') - start_time) > 30 AND run_id=?;" run-id)
  (sqlite3:execute mdb "DELETE FROM servers WHERE state='running'   AND (strftime('%s','now') - heartbeat)  > 10 AND run_id=?;" run-id))
  

(define (tasks:server-set-state! mdb server-id state)

  (sqlite3:execute mdb "UPDATE servers SET state=? WHERE id=?;" state server-id))


(define (tasks:server-delete-record! mdb server-id)
  (sqlite3:execute mdb "DELETE FROM servers WHERE id=?;" server-id))

(define (tasks:server-delete-records-for-this-pid mdb)
  (sqlite3:execute mdb "DELETE FROM servers WHERE hostname=? AND pid=?;" (get-host-name) (current-process-id)))

(define (tasks:server-set-interface-port mdb server-id interface port)
  (sqlite3:execute mdb "UPDATE servers SET interface=?,port=? WHERE id=?;" interface port server-id))


(define (tasks:server-am-i-the-server? mdb run-id)
  (let* ((all    (tasks:server-get-servers-vying-for-run-id mdb run-id))
	 (first  (if (null? all)
		     (begin (debug:print 0 "ERROR: no servers listed, should be at least one by now.") 
			    (sqlite3:finalize! mdb)
			    (exit 1))
		     (car (db:get-rows all))))
	 (header   (db:get-header all))
	 (id       (db:get-value-by-header first header "id"))
	 (hostname (db:get-value-by-header first header "hostname"))
	 (pid      (db:get-value-by-header first header "pid"))
	 (priority (db:get-value-by-header first header "priority")))
    (debug:print 0 "INFO: am-i-the-server got record " first)
    ;; for now a basic check. add tiebreaking by priority later
    (if (and (equal? hostname (get-host-name))
	     (equal? pid      (current-process-id)))
	id
	#f)))
	     
;; Use: (db:get-value-by-header (car (db:get-rows dat)) (db:get-header dat) "fieldname")
;;  to extract info from the structure returned
;;
(define (tasks:server-get-servers-vying-for-run-id mdb run-id)

   (let* ((header (list "id" "hostname" "pid" "interface" "port" "pubport" "state" "run_id" "priority" "start_time"))
	  (selstr (string-intersperse header ","))
	  (res    '()))
    (sqlite3:for-each-row
     (lambda (a . b)
       (set! res (cons (apply vector a b) res)))
     mdb

     (conc "SELECT " selstr " FROM servers WHERE run_id=? ORDER BY start_time DESC;")



     run-id)



    (vector header res)))

(define (tasks:server-update-heartbeat mdb server-id)
  (debug:print-info 1 "Heart beat update of server id=" server-id)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 "WARNING: probable timeout on monitor.db access")
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
	 (heartbeat-delta 99e9))
    (sqlite3:for-each-row
     (lambda (delta)
       (set! heartbeat-delta delta))
     mdb "SELECT strftime('%s','now')-heartbeat FROM servers WHERE id=?;" server-id)
    (< heartbeat-delta 10)))

(define (tasks:client-register mdb pid hostname cmdline)
  (sqlite3:execute
   mdb
   "INSERT OR REPLACE INTO clients (server_id,pid,hostname,cmdline,login_time) VALUES(?,?,?,?,strftime('%s','now'));")
  (tasks:server-get-server-id mdb hostname #f #f pid)
  pid hostname cmdline)

(define (tasks:client-logout mdb pid hostname cmdline)
  (sqlite3:execute
   mdb
   "UPDATE clients SET logout_time=strftime('%s','now') WHERE pid=? AND hostname=? AND cmdline=?;"
   pid hostname cmdline))

(define (tasks:get-logged-in-clients mdb server-id)
  (let ((res '()))
    (sqlite3:for-each-row 
     (lambda (id server-id pid hostname cmdline login-time logout-time)
       (set! res (cons (vector id server-id pid hostname cmdline login-time lougout-time) res)))
     mdb
     "SELECT id,server_id,pid,hostname,cmdline,login_time,logout_time FROM clients WHERE server_id=?;"
     server-id)))

(define (tasks:have-clients? mdb server-id)
  (null? (tasks:get-logged-in-clients mdb server-id)))

;; ping each server in the db and return first found that responds. 
;; remove any others. will not necessarily remove all!
(define (tasks:get-best-server mdb)
  (let ((res '())
	(best #f)
	(transport (if (and *transport-type*
			    (not (eq? *transport-type* 'fs)))
		       (conc *transport-type*)
		       "%")))
    (sqlite3:for-each-row
     (lambda (id interface port pubport transport pid hostname)
       (set! res (cons (vector id interface port pubport transport pid hostname) res))
       ;;(debug:print-info 2 "Found existing server " hostname ":" port " registered in db"))
       )
     mdb
     
     "SELECT id,interface,port,pubport,transport,pid,hostname FROM servers
          WHERE strftime('%s','now')-heartbeat < 10 
          AND mt_version=? AND transport LIKE ? 
          ORDER BY start_time DESC LIMIT 1;" (common:version-signature) transport)
    ;; for now we are keeping only one server registered in the db, return #f or first server found
    (if (null? res) #f (car res))))

;; BUG: This logic is probably needed unless methodology changes completely...
;;
;;     (if (null? res) #f
;; 	(let loop ((hed (car res))
;; 		   (tal (cdr res)))
;; 	  ;; (print "hed=" hed ", tal=" tal)
;; 	  (let* ((host     (list-ref hed 0))
;; 		 (iface    (list-ref hed 1))
;; 		 (port     (list-ref hed 2))
;; 		 (pid      (list-ref hed 4))
;; 		 (alive    (open-run-close tasks:server-alive? tasks:open-db #f hostname: host port: port)))
;; 	    (if alive
;; 		(begin
;; 		  (debug:print-info 2 "Found an existing, alive, server " host ", " port ".")
;; 		  (list host iface port))
;; 		(begin
;; 		  (debug:print-info 1 "Marking " host ":" port " as dead in server registry.")
;; 		  (if port
;; 		      (open-run-close tasks:server-deregister tasks:open-db host port: port)
;; 		      (open-run-close tasks:server-deregister tasks:open-db host pid:  pid))
;; 		  (if (null? tal)
;; 		      #f
;; 		      (loop (car tal)(cdr tal))))))))))

(define (tasks:remove-server-records mdb)
  (sqlite3:execute mdb "DELETE FROM servers;"))

(define (tasks:mark-server hostname port pid state transport)
  (if port
      (open-run-close tasks:server-deregister tasks:open-db hostname port: port)
      (open-run-close tasks:server-deregister tasks:open-db hostname pid:  pid)))


(define (tasks:kill-server status hostname port pid transport)
  (debug:print-info 1 "Removing defunct server record for " hostname ":" port)
  (if port
      (open-run-close tasks:server-deregister tasks:open-db hostname port: port)
      (open-run-close tasks:server-deregister tasks:open-db hostname pid:  pid))
  (if status ;; #t means alive







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
|
|
<
<
<
<


|
<
<

<


|
|
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







188
189
190
191
192
193
194



























195
196
197




198
199
200


201

202
203
204
205







206



























207
208
209
210
211
212
213
	 (heartbeat-delta 99e9))
    (sqlite3:for-each-row
     (lambda (delta)
       (set! heartbeat-delta delta))
     mdb "SELECT strftime('%s','now')-heartbeat FROM servers WHERE id=?;" server-id)
    (< heartbeat-delta 10)))




























(define (tasks:get-server mdb run-id)
  (let ((res  #f)
	(best #f))




    (sqlite3:for-each-row
     (lambda (id interface port pubport transport pid hostname)
       (set! res (vector id interface port pubport transport pid hostname)))


     mdb

     "SELECT id,interface,port,pubport,transport,pid,hostname FROM servers
          WHERE strftime('%s','now')-heartbeat < 10 
          AND mt_version=? AND run_id=? AND state='running'
          ORDER BY start_time DESC LIMIT 1;" (common:version-signature) run-id)







    res))




























(define (tasks:kill-server status hostname port pid transport)
  (debug:print-info 1 "Removing defunct server record for " hostname ":" port)
  (if port
      (open-run-close tasks:server-deregister tasks:open-db hostname port: port)
      (open-run-close tasks:server-deregister tasks:open-db hostname pid:  pid))
  (if status ;; #t means alive
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
		(begin
		  (debug:print-info 1 "Sending signal/term to " pid " on " hostname)
		  (process-signal pid signal/term)  ;; local machine, send sig term
		  (thread-sleep! 5)                 ;; give it five seconds to die peacefully then do a brutal kill
		  (process-signal pid signal/kill)) 
		(debug:print 0 "WARNING: Can't kill frozen server on remote host " hostname))))))



(define (tasks:get-all-servers mdb)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport)
       (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport) res)))
     mdb
     "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport FROM servers ORDER BY start_time DESC;")
    res))
       

;;======================================================================
;; Tasks and Task monitors
;;======================================================================


;;======================================================================
;; Tasks







<
<
<
<
<
<
<
<
<
<
<
<







236
237
238
239
240
241
242












243
244
245
246
247
248
249
		(begin
		  (debug:print-info 1 "Sending signal/term to " pid " on " hostname)
		  (process-signal pid signal/term)  ;; local machine, send sig term
		  (thread-sleep! 5)                 ;; give it five seconds to die peacefully then do a brutal kill
		  (process-signal pid signal/kill)) 
		(debug:print 0 "WARNING: Can't kill frozen server on remote host " hostname))))))













;;======================================================================
;; Tasks and Task monitors
;;======================================================================


;;======================================================================
;; Tasks

Modified tests/Makefile from [7520ced67a] to [f03136dea1].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#
# run some tests

BINPATH=$(shell readlink -m $(PWD)/../bin)
MEGATEST=$(BINPATH)/megatest
DASHBOARD=$(BINPATH)/dashboard
PATH := $(BINPATH):$(PATH)
RUNNAME := $(shell date +w%V.%u.%H.%M)
IPADDR := "-"
# Set SERVER to "-server -"
SERVER  = 
DEBUG   = 1
LOGGING = 

OS  = $(shell grep ID /etc/*-release|cut -d= -f2)
FS  = $(shell df -T .|tail -1|awk '{print $$2}')
VER = $(shell fsl info|grep checkout|awk '{print $$2}'|cut -c 1-5)

# The NEWTARGET causes some tests to fail. Do not use until this is fixed.
NEWTARGET  = "$(OS)/$(FS)/$(VER)"
TARGET     = "ubuntu/nfs/none"

all : test1 test2 test3 test4 test5 test6 test7 test8 test9

server :
	cd ..;make;make install
	cd fullrun;../../bin/megatest -server - -debug 22

stopserver :
	cd ..;make && make install
	cd fullrun;$(MEGATEST) -stop-server 0

repl :
	cd ..;make && make install



|
|
|
|
|
|
|
|
|
|













|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#
# run some tests

BINPATH   = $(shell readlink -m $(PWD)/../bin)
MEGATEST  = $(BINPATH)/megatest
DASHBOARD = $(BINPATH)/dashboard
PATH     := $(BINPATH):$(PATH)
RUNNAME  := $(shell date +w%V.%u.%H.%M)
IPADDR   := "-"
RUNID    := 1
SERVER    = 
DEBUG     = 1
LOGGING   = 

OS  = $(shell grep ID /etc/*-release|cut -d= -f2)
FS  = $(shell df -T .|tail -1|awk '{print $$2}')
VER = $(shell fsl info|grep checkout|awk '{print $$2}'|cut -c 1-5)

# The NEWTARGET causes some tests to fail. Do not use until this is fixed.
NEWTARGET  = "$(OS)/$(FS)/$(VER)"
TARGET     = "ubuntu/nfs/none"

all : test1 test2 test3 test4 test5 test6 test7 test8 test9

server :
	cd ..;make;make install
	cd fullrun;../../bin/megatest -server - -debug 22 -run-id $(RUNID)

stopserver :
	cd ..;make && make install
	cd fullrun;$(MEGATEST) -stop-server 0

repl :
	cd ..;make && make install

Added tests/watch-monitor.sh version [408ccfb929].

















>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
#!/bin/bash

sqlite3 fullrun/db/monitor.db << EOF
.header on
.mode column
select * from servers;
.q
EOF