Megatest

Check-in [6444c1bdfc]
Login
Overview
Comment:Runs through test4 pretty well. No server enabled yet
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.62-no-rpc
Files: files | file ages | folders
SHA1: 6444c1bdfc166bc0eef081e1803eaad7028405c1
User & Date: matt on 2016-11-25 22:30:07
Other Links: branch diff | manifest | tags
Context
2016-11-26
18:16
Cleaned up globals. Added audit script to report on globals, where defined and where used. check-in: 22df9fe278 user: matt tags: v1.62-no-rpc
2016-11-25
22:30
Runs through test4 pretty well. No server enabled yet check-in: 6444c1bdfc user: matt tags: v1.62-no-rpc
11:50
Consolidated to single global for dbstruct. Removed *megatest-db* Removed *inmemdb* Removed *write-frequency* Removed *client-non-blocking-mode* Consolidated db:open-local-db-handle in with db:setup Fixed calls which used db to instead use dbstruct Change repl to use db:setup for getting a db handle check-in: e335fe582a user: matt tags: v1.62-no-rpc
Changes

Modified client.scm from [b597605018] to [5c6ba40366].

178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
 				  ;;              (car (vector-ref logininfo 1))
 				  ;;              #f)))

                                  )))
		(if (and start-res
			 ping-res)
		    (begin
		      (hash-table-set! *runremote* run-id start-res)
		      (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
		      start-res)
		    (begin    ;; login failed but have a server record, clean out the record and try again
		      (debug:print-info 0 *default-log-port* "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat)
		      (case *transport-type* 
			((http)(http-transport:close-connections run-id)))
		      (hash-table-delete! *runremote* run-id)
		      (tasks:kill-server-run-id run-id)
		      (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat)
							   run-id 
							   (tasks:hostinfo-get-interface server-dat)
							   (tasks:hostinfo-get-port      server-dat)
							   " client:setup (server-dat = #t)")
		      (if (> remaining-tries 8)







|






|







178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
 				  ;;              (car (vector-ref logininfo 1))
 				  ;;              #f)))

                                  )))
		(if (and start-res
			 ping-res)
		    (begin
		      (set! *runremote* start-res) ;; (hash-table-set! *runremote* run-id start-res)
		      (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
		      start-res)
		    (begin    ;; login failed but have a server record, clean out the record and try again
		      (debug:print-info 0 *default-log-port* "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat)
		      (case *transport-type* 
			((http)(http-transport:close-connections run-id)))
		      (set! *runremote* #f)  ;; (hash-table-delete! *runremote* run-id)
		      (tasks:kill-server-run-id run-id)
		      (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat)
							   run-id 
							   (tasks:hostinfo-get-interface server-dat)
							   (tasks:hostinfo-get-port      server-dat)
							   " client:setup (server-dat = #t)")
		      (if (> remaining-tries 8)

Modified common.scm from [d6de275ae9] to [03889704ec].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
;;======================================================================
;; Copyright 2006-2012, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.
;;======================================================================

(use srfi-1 posix regex-case base64 format dot-locking csv-xml z3 sql-de-lite hostinfo md5 message-digest typed-records)
(require-extension regex posix)

(require-extension (srfi 18) extras tcp rpc)

(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:))












|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
;;======================================================================
;; Copyright 2006-2012, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.
;;======================================================================

(use srfi-1 posix regex-case base64 format dot-locking csv-xml z3 sql-de-lite hostinfo md5 message-digest typed-records directory-utils)
(require-extension regex posix)

(require-extension (srfi 18) extras tcp rpc)

(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:))

88
89
90
91
92
93
94

95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
(define *dbstruct-db*         #f) ;; used when local access is triggered in rmt.scm

(define *db-stats*            (make-hash-table)) ;; hash of vectors < count duration-total >
(define *db-stats-mutex*      (make-mutex))
(define *db-sync-mutex*       (make-mutex))
(define *db-multi-sync-mutex* (make-mutex))
(define *db-local-sync*       (make-hash-table)) ;; used to record last touch of db

(define *last-db-access*      (current-seconds))  ;; update when db is accessed via server
(define *db-write-access*     #t)
(define *task-db*             #f) ;; (vector db path-to-db)
(define *db-access-allowed*   #t) ;; flag to allow access
(define *db-access-mutex*     (make-mutex))
(define *db-cache-path*       #f)

;; SERVER
(define *my-client-signature* #f)
(define *transport-type*    'http)
(define *transport-type*    'http)             ;; override with [server] transport http|rpc|nmsg
(define *runremote*         (make-hash-table)) ;; if set up for server communication this will hold <host port>
(define *max-cache-size*    0)
(define *logged-in-clients* (make-hash-table))
(define *server-id*         #f)
(define *server-info*       #f)
(define *time-to-exit*      #f)
(define *received-response* #f)
(define *default-numtries*  10)







>
|










|







88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
(define *dbstruct-db*         #f) ;; used when local access is triggered in rmt.scm

(define *db-stats*            (make-hash-table)) ;; hash of vectors < count duration-total >
(define *db-stats-mutex*      (make-mutex))
(define *db-sync-mutex*       (make-mutex))
(define *db-multi-sync-mutex* (make-mutex))
(define *db-local-sync*       (make-hash-table)) ;; used to record last touch of db
(define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened 
(define *last-db-access*      (current-seconds)) ;; update when db is accessed via server
(define *db-write-access*     #t)
(define *task-db*             #f) ;; (vector db path-to-db)
(define *db-access-allowed*   #t) ;; flag to allow access
(define *db-access-mutex*     (make-mutex))
(define *db-cache-path*       #f)

;; SERVER
(define *my-client-signature* #f)
(define *transport-type*    'http)
(define *transport-type*    'http)             ;; override with [server] transport http|rpc|nmsg
(define *runremote*         #f)                ;; if set up for server communication this will hold <host port>
(define *max-cache-size*    0)
(define *logged-in-clients* (make-hash-table))
(define *server-id*         #f)
(define *server-info*       #f)
(define *time-to-exit*      #f)
(define *received-response* #f)
(define *default-numtries*  10)
198
199
200
201
202
203
204
























205
206
207
208
209
210
211
   'dejunk
   ;; 'adj-testids
   ;; 'old2new
   'new2old
   'schema)
  (if (common:version-changed?)
      (common:set-last-run-version)))

























;; Force a megatest cleanup-db if version is changed and skip-version-check not specified
;;
(define (common:exit-on-version-changed)
  (if (common:version-changed?)
      (let ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config")))
        (debug:print 0 *default-log-port*







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
   'dejunk
   ;; 'adj-testids
   ;; 'old2new
   'new2old
   'schema)
  (if (common:version-changed?)
      (common:set-last-run-version)))

;; Rotate logs, logic: 
;;                 if > 500k and older than 1 week:
;;                     remove previous compressed log and compress this log
;; WARNING: This proc operates assuming that it is in the directory above the
;;          logs directory you wish to log-rotate.
;;
(define (common:rotate-logs)
  (if (not (directory-exists? "logs"))(create-directory "logs"))
  (directory-fold 
   (lambda (file rem)
     (if (and (string-match "^.*.log" file)
	      (> (file-size (conc "logs/" file)) 200000))
	 (let ((gzfile (conc "logs/" file ".gz")))
	   (if (file-exists? gzfile)
	       (begin
		 (debug:print-info 0 *default-log-port* "removing " gzfile)
		 (delete-file gzfile)))
	   (debug:print-info 0 *default-log-port* "compressing " file)
	   (system (conc "gzip logs/" file)))))
   '()
   "logs"))
    


;; Force a megatest cleanup-db if version is changed and skip-version-check not specified
;;
(define (common:exit-on-version-changed)
  (if (common:version-changed?)
      (let ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config")))
        (debug:print 0 *default-log-port*
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
;;======================================================================
;; E X I T   H A N D L I N G
;;======================================================================

(define (common:legacy-sync-recommended)
  (or (and (common:get-homehost)
	   (cdr (common:get-homehost)))
      (args:get-arg "-runtests")
      (args:get-arg "-run")
      (args:get-arg "-server")
      ;; (args:get-arg "-set-run-status")
      (args:get-arg "-remove-runs")
      ;; (args:get-arg "-get-run-status")
      (args:get-arg "-use-db-cache") ;; feels like a bad idea ...
      ))

(define (common:legacy-sync-required)
  (configf:lookup *configdat* "setup" "megatest-db"))

;; run-ids
;;    if #f use *db-local-sync* : or 'local-sync-flags







|
|


|

|







505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
;;======================================================================
;; E X I T   H A N D L I N G
;;======================================================================

(define (common:legacy-sync-recommended)
  (or (and (common:get-homehost)
	   (cdr (common:get-homehost)))
      ;;(args:get-arg "-runtests")
      ;;(args:get-arg "-run")
      (args:get-arg "-server")
      ;; (args:get-arg "-set-run-status")
      ;;(args:get-arg "-remove-runs")
      ;; (args:get-arg "-get-run-status")
      ;;(args:get-arg "-use-db-cache") ;; feels like a bad idea ...
      ))

(define (common:legacy-sync-required)
  (configf:lookup *configdat* "setup" "megatest-db"))

;; run-ids
;;    if #f use *db-local-sync* : or 'local-sync-flags
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
	(last-time   (current-seconds)))
    (if (or (common:legacy-sync-recommended)
	    legacy-sync)
	(let loop ()
	  ;; sync for filesystem local db writes
	  ;;
	  (let ((start-time   (current-seconds)))
	    (common:sync-to-megatest.db 'local-sync-flags)
	    (if (and debug-mode
		     (> (- start-time last-time) 60))
		(begin
		  (set! last-time start-time)
		  (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))

	  ;; keep going unless time to exit







|







559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
	(last-time   (current-seconds)))
    (if (or (common:legacy-sync-recommended)
	    legacy-sync)
	(let loop ()
	  ;; sync for filesystem local db writes
	  ;;
	  (let ((start-time   (current-seconds)))
	    ;; (common:sync-to-megatest.db 'local-sync-flags)
	    (if (and debug-mode
		     (> (- start-time last-time) 60))
		(begin
		  (set! last-time start-time)
		  (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))

	  ;; keep going unless time to exit

Modified db.scm from [8de1aa48cd] to [05634fcfcf].

194
195
196
197
198
199
200

201
202
203
204
205
206
207
208
	 (file-write   (if file-exists
			   (file-write-access? fname)
			   dir-writable )))
    (if file-write ;; dir-writable
	(let (;; (lock    (obtain-dot-lock fname 1 5 10))
	      (db      (sqlite3:open-database fname)))
	  (sqlite3:set-busy-handler! db (make-busy-timeout 136000))

	  (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = 0;")
	  (if (not file-exists)
	      (begin
		(if (string-match "^/tmp/.*" fname) ;; this is a file in /tmp
		    (sqlite3:execute db "PRAGMA journal_mode=WAL;")
		    (print "Creating " fname " in NON-WAL mode."))
		(initproc db)))
	  ;; (release-dot-lock fname)







>
|







194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
	 (file-write   (if file-exists
			   (file-write-access? fname)
			   dir-writable )))
    (if file-write ;; dir-writable
	(let (;; (lock    (obtain-dot-lock fname 1 5 10))
	      (db      (sqlite3:open-database fname)))
	  (sqlite3:set-busy-handler! db (make-busy-timeout 136000))
	  ;; (db:set-sync db)
	  (sqlite3:execute db "PRAGMA synchronous = NORMAL;")
	  (if (not file-exists)
	      (begin
		(if (string-match "^/tmp/.*" fname) ;; this is a file in /tmp
		    (sqlite3:execute db "PRAGMA journal_mode=WAL;")
		    (print "Creating " fname " in NON-WAL mode."))
		(initproc db)))
	  ;; (release-dot-lock fname)
308
309
310
311
312
313
314

315
316
317
318
319


320
321
322
323
324
325
326
	;; (rundb  (dbr:dbstruct-rundb dbstruct))
	;; (inmem  (dbr:dbstruct-inmem dbstruct))
	;; (maindb (dbr:dbstruct-main  dbstruct))
	;; (refdb  (dbr:dbstruct-refdb dbstruct))
        (tmpdb   (dbr:dbstruct-tmpdb dbstruct))
	(mtdb    (dbr:dbstruct-mtdb dbstruct))
        (refndb  (dbr:dbstruct-refndb dbstruct))

	;; (runid  (dbr:dbstruct-run-id dbstruct))
	)
    (debug:print-info 4 *default-log-port* "Syncing for run-id: " run-id)
    ;; (mutex-lock! *http-mutex*)
    (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb)))


;;    (if (eq? run-id 0)
;;	;; runid equal to 0 is main.db
;;	(if maindb
;;	    (if (or (not (number? mtime))
;;		    (not (number? stime))
;;		    (> mtime stime)
;;		    force-sync)







>



|
|
>
>







309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
	;; (rundb  (dbr:dbstruct-rundb dbstruct))
	;; (inmem  (dbr:dbstruct-inmem dbstruct))
	;; (maindb (dbr:dbstruct-main  dbstruct))
	;; (refdb  (dbr:dbstruct-refdb dbstruct))
        (tmpdb   (dbr:dbstruct-tmpdb dbstruct))
	(mtdb    (dbr:dbstruct-mtdb dbstruct))
        (refndb  (dbr:dbstruct-refndb dbstruct))
	(start-t (current-seconds))
	;; (runid  (dbr:dbstruct-run-id dbstruct))
	)
    (debug:print-info 4 *default-log-port* "Syncing for run-id: " run-id)
    (mutex-lock! *db-sync-mutex*)
    (db:sync-tables (db:sync-all-tables-list dbstruct) (cons *db-last-sync* "last_update") tmpdb refndb mtdb)
    (set! *db-last-sync* start-t)
    (mutex-unlock! *db-sync-mutex*)))
;;    (if (eq? run-id 0)
;;	;; runid equal to 0 is main.db
;;	(if maindb
;;	    (if (or (not (number? mtime))
;;		    (not (number? stime))
;;		    (> mtime stime)
;;		    force-sync)
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
;; 	  (dbr:dbstruct-localdb-set! dbstruct run-id #f)
;; 	  (dbr:dbstruct-inmem-set! dbstruct #f)))))

;; close all opened run-id dbs
(define (db:close-all dbstruct)
  (if (dbr:dbstruct? dbstruct)
      (begin
        (db:sync-touched dbstruct 0 force-sync: #t)
        (let ((tdb (db:dbdat-get-db (dbr:dbstruct-tmpdb  dbstruct)))
              (mdb (db:dbdat-get-db (dbr:dbstruct-mtdb   dbstruct)))
              (rdb (db:dbdat-get-db (dbr:dbstruct-refndb dbstruct))))
          (if tdb (sqlite3:finalize! tdb))
          (if mdb (sqlite3:finalize! mdb))
          (if rdb (sqlite3:finalize! rdb))))))
  







|







373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
;; 	  (dbr:dbstruct-localdb-set! dbstruct run-id #f)
;; 	  (dbr:dbstruct-inmem-set! dbstruct #f)))))

;; close all opened run-id dbs
(define (db:close-all dbstruct)
  (if (dbr:dbstruct? dbstruct)
      (begin
        ;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server.
        (let ((tdb (db:dbdat-get-db (dbr:dbstruct-tmpdb  dbstruct)))
              (mdb (db:dbdat-get-db (dbr:dbstruct-mtdb   dbstruct)))
              (rdb (db:dbdat-get-db (dbr:dbstruct-refndb dbstruct))))
          (if tdb (sqlite3:finalize! tdb))
          (if mdb (sqlite3:finalize! mdb))
          (if rdb (sqlite3:finalize! rdb))))))
  
3153
3154
3155
3156
3157
3158
3159


3160
3161
3162
3163
3164
3165
3166
3167




3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
		(regexp "_") "=" msg #t)))
	   (lambda ()(deserialize)))
	 (begin
	   (debug:print-error 0 *default-log-port* "reception failed. Received " msg " but cannot translate it.")
	   msg))) ;; crude reply for when things go awry
    ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize))))
    (else msg))) ;; rpc



(define (db:test-set-status-state dbstruct run-id test-id status state msg)
  (let ((dbdat  (db:get-db dbstruct run-id)))
    (if (member state '("LAUNCHED" "REMOTEHOSTSTART"))
	(db:general-call dbdat 'set-test-start-time (list test-id)))
    (if msg
	(db:general-call dbdat 'state-status-msg (list state status msg test-id))
	(db:general-call dbdat 'state-status     (list state status test-id)))




     (mt:process-triggers run-id test-id state status)))

;; state is the priority rollup of all states
;; status is the priority rollup of all completed states
;;
;; if test-name is an integer work off that instead of test-name test-path
;;
(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
  ;; establish info on incoming test followed by info on top level test
  (let* ((db           (db:dbdat-get-db (dbr:dbstruct-tmpdb dbstruct)))
	 (testdat      (if (number? test-name)
			   (db:get-test-info-by-id dbstruct run-id test-name)
			   (db:get-test-info       dbstruct run-id test-name item-path)))
	 (test-id      (db:test-get-id testdat))
	 (test-name    (if (number? test-name)
			   (db:test-get-testname testdat)
			   test-name))
	 (item-path    (db:test-get-item-path testdat))
         (tl-testdat   (db:get-test-info dbstruct run-id test-name ""))







>
>





|
|
|
>
>
>
>
|










|







3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
		(regexp "_") "=" msg #t)))
	   (lambda ()(deserialize)))
	 (begin
	   (debug:print-error 0 *default-log-port* "reception failed. Received " msg " but cannot translate it.")
	   msg))) ;; crude reply for when things go awry
    ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize))))
    (else msg))) ;; rpc

;; This is to be the big daddy call

(define (db:test-set-status-state dbstruct run-id test-id status state msg)
  (let ((dbdat  (db:get-db dbstruct run-id)))
    (if (member state '("LAUNCHED" "REMOTEHOSTSTART"))
	(db:general-call dbdat 'set-test-start-time (list test-id)))
    ;; (if msg
    ;; 	(db:general-call dbdat 'state-status-msg (list state status msg test-id))
    ;; 	(db:general-call dbdat 'state-status     (list state status test-id)))
    (db:set-state-status-and-roll-up-items dbstruct run-id test-id #f state status msg)
    ;; process the test_data table
    (if (and test-id state status (equal? status "AUTO")) 
	(db:test-data-rollup dbstruct run-id test-id status))
    (mt:process-triggers run-id test-id state status)))

;; state is the priority rollup of all states
;; status is the priority rollup of all completed states
;;
;; if test-name is an integer work off that instead of test-name test-path
;;
(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
  ;; establish info on incoming test followed by info on top level test
  (let* ((db           (db:dbdat-get-db (dbr:dbstruct-tmpdb dbstruct)))
	 (testdat      (if (number? test-name)
			   (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id
			   (db:get-test-info       dbstruct run-id test-name item-path)))
	 (test-id      (db:test-get-id testdat))
	 (test-name    (if (number? test-name)
			   (db:test-get-testname testdat)
			   test-name))
	 (item-path    (db:test-get-item-path testdat))
         (tl-testdat   (db:get-test-info dbstruct run-id test-name ""))
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
						   statuses)
					     statuses)))
				      *common:std-statuses* >))
		  (newstate          (if (null? all-curr-states) "NOT_STARTED" (car all-curr-states)))
		  (newstatus         (if (null? all-curr-statuses) "n/a" (car all-curr-statuses))))
	     ;; (print "Setting toplevel to: " newstate "/" newstatus)
	     (db:test-set-state-status-by-id dbstruct run-id tl-test-id newstate newstatus #f)))))))
        
(define db:roll-up-pass-fail-counts db:set-state-status-and-roll-up-items)

;; call with state = #f to roll up with out accounting for state/status of this item
;;
;;    (define (db:roll-up-pass-fail-counts dbstruct run-id test-name item-path state status)
;;      (if (not (equal? item-path "")) ;; if asked to do this for a specific item then do an incremental update
;;          (let* ((dbdat         (db:get-db dbstruct run-id))







|







3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
						   statuses)
					     statuses)))
				      *common:std-statuses* >))
		  (newstate          (if (null? all-curr-states) "NOT_STARTED" (car all-curr-states)))
		  (newstatus         (if (null? all-curr-statuses) "n/a" (car all-curr-statuses))))
	     ;; (print "Setting toplevel to: " newstate "/" newstatus)
	     (db:test-set-state-status-by-id dbstruct run-id tl-test-id newstate newstatus #f)))))))

(define db:roll-up-pass-fail-counts db:set-state-status-and-roll-up-items)

;; call with state = #f to roll up with out accounting for state/status of this item
;;
;;    (define (db:roll-up-pass-fail-counts dbstruct run-id test-name item-path state status)
;;      (if (not (equal? item-path "")) ;; if asked to do this for a specific item then do an incremental update
;;          (let* ((dbdat         (db:get-db dbstruct run-id))
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
                                      (SELECT status FROM tests WHERE id=?) NOT IN ('WARN','FAIL')
                                    THEN 'PASS'
                                    ELSE status
                                    END WHERE id=?;") ;; DONE
	'(test-set-log            "UPDATE tests SET final_logf=? WHERE id=?;")      ;; DONE
	;; '(test-set-rundir-by-test-id "UPDATE tests SET rundir=? WHERE id=?")        ;; DONE
	;; '(test-set-rundir         "UPDATE tests SET rundir=? AND testname=? AND item_path=?;") ;; DONE
	'(test-set-rundir-shortdir "UPDATE tests SET rundir=?,shortdir=? WHERE testname=? AND item_path=?;")    ;; BROKEN!!! NEEDS run-id
	'(delete-tests-in-state   ;; "DELETE FROM tests WHERE state=?;")                  ;; DONE
	  "UPDATE tests SET state='DELETED' WHERE state=?")
	'(tests:test-set-toplog   "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';")
	'(update-cpuload-diskfree "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;") ;; DONE
	'(update-uname-host       "UPDATE tests SET uname=?,host=? WHERE id=?;")       ;; DONE
	'(update-test-state       "UPDATE tests SET state=? WHERE state=? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));")
	'(update-test-status      "UPDATE tests SET status=? WHERE status like ? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));")
	;; stuff for roll-up-pass-fail-counts
	'(update-pass-fail-counts "UPDATE tests 
             SET fail_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('FAIL','CHECK','INCOMPLETE','ABORT')),
                 pass_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('PASS','WARN','WAIVED'))
             WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE  ;; BROKEN!!! NEEDS run-id
	'(top-test-set          "UPDATE tests SET state=? WHERE testname=? AND item_path='';") ;; DONE   ;; BROKEN!!! NEEDS run-id
	'(top-test-set-running  "UPDATE tests SET state='RUNNING' WHERE testname=? AND item_path='';") ;; DONE   ;; BROKEN!!! NEEDS run-id


	;; Might be the following top-test-set-per-pf-counts query could be better based off of something like this:
	;;
	;; select state,status,count(state) from tests where run_id=59 AND testname='runfirst' group by state,status;
	;;
	'(top-test-set-per-pf-counts "UPDATE tests







|












<
|







3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342

3343
3344
3345
3346
3347
3348
3349
3350
                                      (SELECT status FROM tests WHERE id=?) NOT IN ('WARN','FAIL')
                                    THEN 'PASS'
                                    ELSE status
                                    END WHERE id=?;") ;; DONE
	'(test-set-log            "UPDATE tests SET final_logf=? WHERE id=?;")      ;; DONE
	;; '(test-set-rundir-by-test-id "UPDATE tests SET rundir=? WHERE id=?")        ;; DONE
	;; '(test-set-rundir         "UPDATE tests SET rundir=? AND testname=? AND item_path=?;") ;; DONE
	'(test-set-rundir-shortdir "UPDATE tests SET rundir=?,shortdir=? WHERE testname=? AND item_path=? AND run_id=?;")    ;; BROKEN!!! NEEDS run-id
	'(delete-tests-in-state   ;; "DELETE FROM tests WHERE state=?;")                  ;; DONE
	  "UPDATE tests SET state='DELETED' WHERE state=?")
	'(tests:test-set-toplog   "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';")
	'(update-cpuload-diskfree "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;") ;; DONE
	'(update-uname-host       "UPDATE tests SET uname=?,host=? WHERE id=?;")       ;; DONE
	'(update-test-state       "UPDATE tests SET state=? WHERE state=? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));")
	'(update-test-status      "UPDATE tests SET status=? WHERE status like ? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));")
	;; stuff for roll-up-pass-fail-counts
	'(update-pass-fail-counts "UPDATE tests 
             SET fail_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('FAIL','CHECK','INCOMPLETE','ABORT')),
                 pass_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('PASS','WARN','WAIVED'))
             WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE  ;; BROKEN!!! NEEDS run-id

	'(top-test-set-running  "UPDATE tests SET state='RUNNING' WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE   ;; BROKEN!!! NEEDS run-id


	;; Might be the following top-test-set-per-pf-counts query could be better based off of something like this:
	;;
	;; select state,status,count(state) from tests where run_id=59 AND testname='runfirst' group by state,status;
	;;
	'(top-test-set-per-pf-counts "UPDATE tests
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
			       immediate
			       flush
			       sync
			       set-verbosity
			       killserver
			       ))

(define (db:login dbstruct calling-path calling-version run-id client-signature)
  (cond 
   ((not (equal? calling-path *toppath*))
    (list #f "Login failed due to mismatch paths: " calling-path ", " *toppath*))
   ((not (equal? *run-id* run-id))
    (list #f "Login failed due to mismatch run-id: " run-id ", " *run-id*))
   ((not (equal? megatest-version calling-version))
    (list #f "Login failed due to mismatch megatest version: " calling-version ", " megatest-version))
   (else
    (hash-table-set! *logged-in-clients* client-signature (current-seconds))
    '(#t "successful login"))))

(define (db:general-call dbdat stmtname params)







|



|
|







3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
			       immediate
			       flush
			       sync
			       set-verbosity
			       killserver
			       ))

(define (db:login dbstruct calling-path calling-version client-signature)
  (cond 
   ((not (equal? calling-path *toppath*))
    (list #f "Login failed due to mismatch paths: " calling-path ", " *toppath*))
   ;; ((not (equal? *run-id* run-id))
   ;;  (list #f "Login failed due to mismatch run-id: " run-id ", " *run-id*))
   ((not (equal? megatest-version calling-version))
    (list #f "Login failed due to mismatch megatest version: " calling-version ", " megatest-version))
   (else
    (hash-table-set! *logged-in-clients* client-signature (current-seconds))
    '(#t "successful login"))))

(define (db:general-call dbdat stmtname params)

Modified http-transport.scm from [d70882fd0b] to [9e8b14e328].

109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
    (http-transport:try-start-server run-id ipaddrstr start-port server-id)))

;; This is recursively run by http-transport:run until sucessful
;;
(define (http-transport:try-start-server run-id ipaddrstr portnum server-id)
  (let ((config-hostname (configf:lookup *configdat* "server" "hostname"))
	(tdbdat          (tasks:open-db)))
    (debug:print-info 0 *default-log-port* "http-transport:try-start-server run-id=" run-id " ipaddrsstr=" ipaddrstr " portnum=" portnum " server-id=" server-id " config-hostname=" config-hostname)
    (handle-exceptions
     exn
     (begin
       (print-error-message exn)
       (if (< portnum 64000)
	   (begin 
	     (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...")







|







109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
    (http-transport:try-start-server run-id ipaddrstr start-port server-id)))

;; This is recursively run by http-transport:run until sucessful
;;
(define (http-transport:try-start-server run-id ipaddrstr portnum server-id)
  (let ((config-hostname (configf:lookup *configdat* "server" "hostname"))
	(tdbdat          (tasks:open-db)))
    (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " run-id=" run-id " ipaddrsstr=" ipaddrstr " portnum=" portnum " server-id=" server-id " config-hostname=" config-hostname)
    (handle-exceptions
     exn
     (begin
       (print-error-message exn)
       (if (< portnum 64000)
	   (begin 
	     (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...")
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
					 (db:string->obj 
					  (handle-exceptions
					   exn
					   (begin
					     (set! success #f)
					     (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
					     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
					     (hash-table-delete! *runremote* run-id)
					     ;; Killing associated server to allow clean retry.")
					     ;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
					     (mutex-unlock! *http-mutex*)
					     ;;; (signal (make-composite-condition
					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
					     ;;; "communications failed"
					     (db:obj->string #f))







|







258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
					 (db:string->obj 
					  (handle-exceptions
					   exn
					   (begin
					     (set! success #f)
					     (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
					     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
					     (set! *runremote* #f) ;; (hash-table-delete! *runremote* run-id)
					     ;; Killing associated server to allow clean retry.")
					     ;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
					     (mutex-unlock! *http-mutex*)
					     ;;; (signal (make-composite-condition
					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
					     ;;; "communications failed"
					     (db:obj->string #f))
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
		      (make-property-condition 
		       'timeout
		       'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))

;; careful closing of connections stored in *runremote*
;;
(define (http-transport:close-connections run-id)
  (let* ((server-dat (hash-table-ref/default *runremote* run-id #f)))
    (if (vector? server-dat)
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (close-connection! api-dat)
	  #t)
	#f)))









|







305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
		      (make-property-condition 
		       'timeout
		       'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))

;; careful closing of connections stored in *runremote*
;;
(define (http-transport:close-connections run-id)
  (let* ((server-dat *runremote*)) ;; (hash-table-ref/default *runremote* run-id #f)))
    (if (vector? server-dat)
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (close-connection! api-dat)
	  #t)
	#f)))


396
397
398
399
400
401
402


403
404
405
406
407
408
409
410

      ;; Use this opportunity to sync the tmp db to megatest.db
      (if *dbstruct-db* 
	  (let ((start-time (current-milliseconds))
		(sync-time  #f)
		(rem-time   #f))
	    (condition-case


	     (db:sync-touched *dbstruct-db* *run-id* force-sync: #t)
	     ((sync-failed)(cond
			    ((> bad-sync-count 10) ;; time to give up
			     (http-transport:server-shutdown server-id port))
			    (else ;; (> bad-sync-count 0)  ;; we've had a fail or two, delay and loop
			     (thread-sleep! 5)
			     (loop count server-state (+ bad-sync-count 1)))))
	     ((exn)







>
>
|







396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412

      ;; Use this opportunity to sync the tmp db to megatest.db
      (if *dbstruct-db* 
	  (let ((start-time (current-milliseconds))
		(sync-time  #f)
		(rem-time   #f))
	    (condition-case
	     ;; (if (and (member (mutex-state *db-sync-mutex*) '(abandoned not-abandoned))
	     ;;	      (> (- (current-seconds) *db-last-sync*) 5)) ;; if not currently being synced nor recently synced
	     (db:sync-touched *dbstruct-db* *run-id* force-sync: #t) ;; usually done in the watchdog, not here.
	     ((sync-failed)(cond
			    ((> bad-sync-count 10) ;; time to give up
			     (http-transport:server-shutdown server-id port))
			    (else ;; (> bad-sync-count 0)  ;; we've had a fail or two, delay and loop
			     (thread-sleep! 5)
			     (loop count server-state (+ bad-sync-count 1)))))
	     ((exn)
426
427
428
429
430
431
432
433

434
435
436
437
438
439
440
	  (if (eq? server-state 'available)
	      (let ((new-server-id (tasks:server-am-i-the-server? (db:delay-if-busy tdbdat) run-id))) ;; try to ensure no double registering of servers
		(if (equal? new-server-id server-id)
		    (begin
		      (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep")
		      (thread-sleep! 0.5) ;; give some margin for queries to complete before switching from file based access to server based access
		      (set! *dbstruct-db*  (db:setup)) ;;  run-id))
		      (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running"))

		    (begin ;; gotta exit nicely
		      (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision")
		      (http-transport:server-shutdown server-id port))))))
      
      (if (< count 1) ;; 3x3 = 9 secs aprox
	  (loop (+ count 1) 'running bad-sync-count))
      







|
>







428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
	  (if (eq? server-state 'available)
	      (let ((new-server-id (tasks:server-am-i-the-server? (db:delay-if-busy tdbdat) run-id))) ;; try to ensure no double registering of servers
		(if (equal? new-server-id server-id)
		    (begin
		      (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep")
		      (thread-sleep! 0.5) ;; give some margin for queries to complete before switching from file based access to server based access
		      (set! *dbstruct-db*  (db:setup)) ;;  run-id))
		      (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running")
		      (server:write-dotserver *toppath* (conc iface ":" port)))
		    (begin ;; gotta exit nicely
		      (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision")
		      (http-transport:server-shutdown server-id port))))))
      
      (if (< count 1) ;; 3x3 = 9 secs aprox
	  (loop (+ count 1) 'running bad-sync-count))
      
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
	      (loop 0 server-state bad-sync-count))
	    (http-transport:server-shutdown server-id port))))))
  
(define (http-transport:server-shutdown server-id port)
  (let ((tdbdat (tasks:open-db)))
    (debug:print-info 0 *default-log-port* "Starting to shutdown the server.")
    ;; need to delete only *my* server entry (future use)
    (if *dbstruct-db* (db:sync-touched *dbstruct-db* *run-id* force-sync: #t))
    (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up
    ;;
    ;; start_shutdown
    ;;
    (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down")
    (portlogger:open-run-close portlogger:set-port port "released")
    (thread-sleep! 5)







|







486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
	      (loop 0 server-state bad-sync-count))
	    (http-transport:server-shutdown server-id port))))))
  
(define (http-transport:server-shutdown server-id port)
  (let ((tdbdat (tasks:open-db)))
    (debug:print-info 0 *default-log-port* "Starting to shutdown the server.")
    ;; need to delete only *my* server entry (future use)
    ;; (if *dbstruct-db* (db:sync-touched *dbstruct-db* *run-id* force-sync: #t)) ;; handled in the watchdog only
    (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up
    ;;
    ;; start_shutdown
    ;;
    (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down")
    (portlogger:open-run-close portlogger:set-port port "released")
    (thread-sleep! 5)
508
509
510
511
512
513
514


515
516
517
518
519
520
521
		      (if (eq? *number-non-write-queries* 0)
			  "n/a (no queries)"
			  (/ *total-non-write-delay* 
			     *number-non-write-queries*))
		      " ms")
    (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting")
    (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running complete")


    (exit)))

;; all routes though here end in exit ...
;;
;; start_server? 
;;
(define (http-transport:launch run-id)







>
>







511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
		      (if (eq? *number-non-write-queries* 0)
			  "n/a (no queries)"
			  (/ *total-non-write-delay* 
			     *number-non-write-queries*))
		      " ms")
    (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting")
    (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running complete")
    ;; if the .server file contained :myport then we can remove it
    (server:remove-dotserver-file *toppath* port)
    (exit)))

;; all routes though here end in exit ...
;;
;; start_server? 
;;
(define (http-transport:launch run-id)

Modified launch.scm from [2ea5c9f7cf] to [8b28cbf454].

894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
	 (lnkbase   (conc linktree "/" target "/" runname))
	 (lnkpath   (conc lnkbase "/" testname))
	 (lnkpathf  (conc lnkpath (if not-iterated "" "/") item-path))
	 (lnktarget (conc lnkpath "/" item-path)))

    ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical
    ;;                                                 rundir   shortdir
    (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path)

    (debug:print 2 *default-log-port* "INFO:\n       lnkbase=" lnkbase "\n       lnkpath=" lnkpath "\n  toptest-path=" toptest-path "\n     test-path=" test-path)
    (if (not (file-exists? linktree))
	(begin
	  (debug:print 0 *default-log-port* "WARNING: linktree did not exist! Creating it now at " linktree)
	  (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree))))
    ;; create the directory for the tests dir links, this is needed no matter what...







|







894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
	 (lnkbase   (conc linktree "/" target "/" runname))
	 (lnkpath   (conc lnkbase "/" testname))
	 (lnkpathf  (conc lnkpath (if not-iterated "" "/") item-path))
	 (lnktarget (conc lnkpath "/" item-path)))

    ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical
    ;;                                                 rundir   shortdir
    (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path run-id)

    (debug:print 2 *default-log-port* "INFO:\n       lnkbase=" lnkbase "\n       lnkpath=" lnkpath "\n  toptest-path=" toptest-path "\n     test-path=" test-path)
    (if (not (file-exists? linktree))
	(begin
	  (debug:print 0 *default-log-port* "WARNING: linktree did not exist! Creating it now at " linktree)
	  (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree))))
    ;; create the directory for the tests dir links, this is needed no matter what...
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
	  (hash-table-set! *toptest-paths* testname curr-test-path)
	  ;; NB// Was this for the test or for the parent in an iterated test?
	  (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath 
			    (if (file-exists? lnkpath)
				;; (resolve-pathname lnkpath)
				(common:nice-path lnkpath)
				lnkpath)
			    testname "")
	  ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path)
	  (if (or (not curr-test-path)
		  (not (directory-exists? toptest-path)))
	      (begin
		(debug:print-info 2 *default-log-port* "Creating " toptest-path " and link " lnkpath)
		(handle-exceptions
		 exn







|







967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
	  (hash-table-set! *toptest-paths* testname curr-test-path)
	  ;; NB// Was this for the test or for the parent in an iterated test?
	  (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath 
			    (if (file-exists? lnkpath)
				;; (resolve-pathname lnkpath)
				(common:nice-path lnkpath)
				lnkpath)
			    testname "" run-id)
	  ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path)
	  (if (or (not curr-test-path)
		  (not (directory-exists? toptest-path)))
	      (begin
		(debug:print-info 2 *default-log-port* "Creating " toptest-path " and link " lnkpath)
		(handle-exceptions
		 exn

Modified megatest.scm from [bb00f9be7d] to [db7e42e4e9].

341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
;; immediately set MT_TARGET if -reqtarg or -target are available
;;
(let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target"))))
  (if targ (setenv "MT_TARGET" targ)))

;; The watchdog is to keep an eye on things like db sync etc.
;;
(define *watchdog*
  (make-thread 
   (lambda ()
     (thread-sleep! 0.05) ;; delay for startup
     (let ((legacy-sync (common:legacy-sync-required))
	   (debug-mode  (debug:debug-mode 1))
	   (last-time   (current-seconds)))
       (if (common:legacy-sync-recommended)
	   (let loop ()
	     ;; sync for filesystem local db writes
	     ;;
             (let ((start-time   (current-seconds)))
               ;; disabling for now (if legacy-sync (common:sync-to-megatest.db #f))
	       (if (and debug-mode
			(> (- start-time last-time) 60))
		   (begin
		     (set! last-time start-time)
		     (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
	     
	     ;; keep going unless time to exit
	     ;;
	     (if (not *time-to-exit*)
		 (let delay-loop ((count 0))
		   (if (and (not *time-to-exit*)
			    (< count 11)) ;; aprox 5-6 seconds
		       (begin
			 (thread-sleep! 1)
			 (delay-loop (+ count 1))))
		   (loop)))
	     (if (common:low-noise-print 30)
		 (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*)))))
     "Watchdog thread")))

(thread-start! *watchdog*)

(if (args:get-arg "-log")
    (let ((oup (open-output-file (args:get-arg "-log"))))
      (debug:print-info 0 *default-log-port* "Sending log output to " (args:get-arg "-log"))
      (set! *default-log-port* oup)))







|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







341
342
343
344
345
346
347
348































349
350
351
352
353
354
355
;; immediately set MT_TARGET if -reqtarg or -target are available
;;
(let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target"))))
  (if targ (setenv "MT_TARGET" targ)))

;; The watchdog is to keep an eye on things like db sync etc.
;;
(define *watchdog* (make-thread common:watchdog "Watchdog thread"))
































(thread-start! *watchdog*)

(if (args:get-arg "-log")
    (let ((oup (open-output-file (args:get-arg "-log"))))
      (debug:print-info 0 *default-log-port* "Sending log output to " (args:get-arg "-log"))
      (set! *default-log-port* oup)))
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
		(else
		 (pp data))))))
      (if out-file (close-output-port out-port))
      (exit) ;; yes, bending the rules here - need to exit since this is a utility
      ))

(if (args:get-arg "-ping")
    (let* ((run-id        (string->number (args:get-arg "-run-id")))
	   (host:port     (args:get-arg "-ping")))
      (server:ping run-id host:port)))

;;======================================================================
;; Capture, save and manipulate environments
;;======================================================================

;; NOTE: Keep these above the section where the server or client code is setup








|

|







640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
		(else
		 (pp data))))))
      (if out-file (close-output-port out-port))
      (exit) ;; yes, bending the rules here - need to exit since this is a utility
      ))

(if (args:get-arg "-ping")
    (let* (;; (run-id        (string->number (args:get-arg "-run-id")))
	   (host:port     (args:get-arg "-ping")))
      (server:ping host:port)))

;;======================================================================
;; Capture, save and manipulate environments
;;======================================================================

;; NOTE: Keep these above the section where the server or client code is setup

2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
          (debug:print 0 *default-log-port* "Failed to create HTML output in " toppath "/lt/runs-index.html"))
      (set! *didsomething* #t)))

;;======================================================================
;; Exit and clean up
;;======================================================================

(if *runremote* (close-all-connections!))

(if (not *didsomething*)
    (debug:print 0 *default-log-port* help))

(set! *time-to-exit* #t)
(thread-join! *watchdog*)








|







1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
          (debug:print 0 *default-log-port* "Failed to create HTML output in " toppath "/lt/runs-index.html"))
      (set! *didsomething* #t)))

;;======================================================================
;; Exit and clean up
;;======================================================================

(if *runremote* (close-all-connections!)) ;; for http-client

(if (not *didsomething*)
    (debug:print 0 *default-log-port* help))

(set! *time-to-exit* #t)
(thread-join! *watchdog*)

Modified rmt.scm from [ea574182b1] to [2858b3ea4e].

38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58













59
60
61
62
63
64
65
;;  S U P P O R T   F U N C T I O N S
;;======================================================================

;; if a server is either running or in the process of starting call client:setup
;; else return #f to let the calling proc know that there is no server available
;;
(define (rmt:get-connection-info run-id)
  (let ((cinfo (hash-table-ref/default *runremote* run-id #f)))
    (if cinfo
	cinfo
	(if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)
	    (client:setup run-id)
	    #f))))

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected
  ;; clean out old connections
  ;; (mutex-lock! *db-multi-sync-mutex*)













  (rmt:open-qry-close-locally cmd (if rid rid 0) params))

;;   (let ((expire-time (- (current-seconds) (server:get-timeout) 10))) ;; don't forget the 10 second margin
;;     (for-each 
;;      (lambda (run-id)
;;        (let ((connection (hash-table-ref/default *runremote* run-id #f)))
;;          (if (and (vector? connection)







|













>
>
>
>
>
>
>
>
>
>
>
>
>







38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
;;  S U P P O R T   F U N C T I O N S
;;======================================================================

;; if a server is either running or in the process of starting call client:setup
;; else return #f to let the calling proc know that there is no server available
;;
(define (rmt:get-connection-info run-id)
  (let ((cinfo *runremote*)) ;; (hash-table-ref/default *runremote* run-id #f)))
    (if cinfo
	cinfo
	(if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)
	    (client:setup run-id)
	    #f))))

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected
  ;; clean out old connections
  ;; (mutex-lock! *db-multi-sync-mutex*)

  ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in *runremote*
  ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
  ;; 3. do the query, if on homehost use local access

  (if (and #f  ;; FORCE NO GO FOR RIGHT NOW
	   (not *runremote*)                         ;; we trust *runremote* to reflect that a server was found previously
	   (not (member cmd api:read-only-queries))) ;; we don't trust so much the list of write queries
      (let ((serverconn (server:check-if-running *toppath*)))
	(if serverconn
	    (set! *runremote* serverconn) ;; the string can be consumed by the client setup if needed
	    (server:kind-run *toppath*))))
  
  (rmt:open-qry-close-locally cmd (if rid rid 0) params))

;;   (let ((expire-time (- (current-seconds) (server:get-timeout) 10))) ;; don't forget the 10 second margin
;;     (for-each 
;;      (lambda (run-id)
;;        (let ((connection (hash-table-ref/default *runremote* run-id #f)))
;;          (if (and (vector? connection)
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
    (mutex-unlock! *db-stats-mutex*)
    res))

(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
  (let* ((qry-is-write   (not (member cmd api:read-only-queries)))
	 (db-file-path   (db:dbfile-path)) ;;  0))
	 (dbstruct-local (if *dbstruct-db*
			     *dbstruct-db*
			     (let* ((dbstruct (db:setup))) ;; make-dbr:dbstruct path:  dbdir local: #t)))
			       (set! *dbstruct-db* dbstruct)
			       dbstruct)))
	 (read-only      (not (file-write-access? db-file-path)))
	 (start          (current-milliseconds))
	 (resdat         (if (not (and read-only qry-is-write))
			     (api:execute-requests dbstruct-local (vector (symbol->string cmd) params))
			     (vector #t '())))
	 (success        (vector-ref resdat 0))
	 (res            (vector-ref resdat 1))







<
<
|
<
<







228
229
230
231
232
233
234


235


236
237
238
239
240
241
242
			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
    (mutex-unlock! *db-stats-mutex*)
    res))

(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
  (let* ((qry-is-write   (not (member cmd api:read-only-queries)))
	 (db-file-path   (db:dbfile-path)) ;;  0))


	 (dbstruct-local (db:setup))  ;; make-dbr:dbstruct path:  dbdir local: #t)))


	 (read-only      (not (file-write-access? db-file-path)))
	 (start          (current-milliseconds))
	 (resdat         (if (not (and read-only qry-is-write))
			     (api:execute-requests dbstruct-local (vector (symbol->string cmd) params))
			     (vector #t '())))
	 (success        (vector-ref resdat 0))
	 (res            (vector-ref resdat 1))
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
	      (thread-sleep! (/ (random 5000) 1000)) ;; some random delay 
	      (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1)))
	    (begin
	      (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up")
	      #f))
	(begin
	  ;; (rmt:update-db-stats run-id cmd params duration)
	  ;; mark this run as dirty if this was a write
	  (if qry-is-write
	      (let ((start-time (current-seconds)))
		(mutex-lock! *db-multi-sync-mutex*)
		(hash-table-set! *db-local-sync* (or run-id 0) start-time) ;; the oldest "write"
                (mutex-unlock! *db-multi-sync-mutex*)))))
    res))








|







250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
	      (thread-sleep! (/ (random 5000) 1000)) ;; some random delay 
	      (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1)))
	    (begin
	      (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up")
	      #f))
	(begin
	  ;; (rmt:update-db-stats run-id cmd params duration)
	  ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it
	  (if qry-is-write
	      (let ((start-time (current-seconds)))
		(mutex-lock! *db-multi-sync-mutex*)
		(hash-table-set! *db-local-sync* (or run-id 0) start-time) ;; the oldest "write"
                (mutex-unlock! *db-multi-sync-mutex*)))))
    res))

302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318

(define (rmt:login run-id)
  (rmt:send-receive 'login run-id (list *toppath* megatest-version run-id *my-client-signature*)))

;; This login does no retries under the hood - it acts a bit like a ping.
;; Deprecated for nmsg-transport.
;;
(define (rmt:login-no-auto-client-setup connection-info run-id)
  (case *transport-type*
    ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login run-id (list *toppath* megatest-version run-id *my-client-signature*)))
    ;;((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info 'login (list *toppath* megatest-version run-id *my-client-signature*)))
    ))

;; hand off a call to one of the db:queries statements
;; added run-id to make looking up the correct db possible 
;;
(define (rmt:general-call stmtname run-id . params)







|
|
|







311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327

(define (rmt:login run-id)
  (rmt:send-receive 'login run-id (list *toppath* megatest-version run-id *my-client-signature*)))

;; This login does no retries under the hood - it acts a bit like a ping.
;; Deprecated for nmsg-transport.
;;
(define (rmt:login-no-auto-client-setup connection-info)
  (case *transport-type* ;; run-id of 0 is just a placeholder
    ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version *my-client-signature*)))
    ;;((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info 'login (list *toppath* megatest-version run-id *my-client-signature*)))
    ))

;; hand off a call to one of the db:queries statements
;; added run-id to make looking up the correct db possible 
;;
(define (rmt:general-call stmtname run-id . params)

Modified server.scm from [19061b35b0] to [7b40e2c3b9].

102
103
104
105
106
107
108
109
110
111

112
113
114
115
116
117
118
119
120


121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185




186
187
188
189


190














191














192



193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212



213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
     result)))

;; Given a run id start a server process    ### NOTE ### > file 2>&1 
;; if the run-id is zero and the target-host is set 
;; try running on that host
;;   incidental: rotate logs in logs/ dir.
;;
(define  (server:run run-id)
  (let* ((curr-host   (get-host-name))
	 (curr-ip     (server:get-best-guess-address curr-host))

	 (target-host (configf:lookup *configdat* "server" "homehost" ))
	 (testsuite   (common:get-testsuite-name))
	 (logfile     (conc *toppath* "/logs/" run-id ".log"))
	 (cmdln (conc (common:get-megatest-exe)
		      " -server " (or target-host "-") " -run-id " run-id (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
									      (conc " -daemonize -log " logfile)
									      "")
		      " -m testsuite:" testsuite))) ;; (conc " >> " logfile " 2>&1 &")))))
    (debug:print 0 *default-log-port* "INFO: Starting server (" cmdln ") as none running ...")


    (push-directory *toppath*)
    (if (not (directory-exists? "logs"))(create-directory "logs"))
    
    ;; Rotate logs, logic: 
    ;;                 if > 500k and older than 1 week:
    ;;                     remove previous compressed log and compress this log
    ;;
    (directory-fold 
     (lambda (file rem)
       (if (and (string-match "^.*.log" file)
		(> (file-size (conc "logs/" file)) 200000))
	   (let ((gzfile (conc "logs/" file ".gz")))
	     (if (file-exists? gzfile)
		 (begin
		   (debug:print-info 0 *default-log-port* "removing " gzfile)
		   (delete-file gzfile)))
	     (debug:print-info 0 *default-log-port* "compressing " file)
	     (system (conc "gzip logs/" file)))))
     '()
     "logs")
    
    ;; host.domain.tld match host?
    (if (and target-host 
	     ;; look at target host, is it host.domain.tld or ip address and does it 
	     ;; match current ip or hostname
	     (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
	     (not (equal? curr-ip target-host)))
	(begin
	  (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
	  (setenv "TARGETHOST" target-host)))
    
    (setenv "TARGETHOST_LOGF" logfile)
    (common:wait-for-normalized-load 4 " delaying server start due to load" remote-host: (get-environment-variable "TARGETHOST")) ;; do not try starting servers on an already overloaded machine, just wait forever
    (system (conc "nbfake " cmdln))
    (unsetenv "TARGETHOST_LOGF")
    (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
    ;; (system cmdln)
    (pop-directory)))

(define (server:get-client-signature) ;; BB> why is this proc named "get-"?  it returns nothing -- set! has not return value.
  (if *my-client-signature* *my-client-signature*
      (let ((sig (server:mk-signature)))
	(set! *my-client-signature* sig)
	*my-client-signature*)))

;; kind start up of servers, wait 40 seconds before allowing another server for a given
;; run-id to be launched
(define (server:kind-run run-id)
  (let ((last-run-time (hash-table-ref/default *server-kind-run* run-id #f)))
    (if (or (not last-run-time)
	    (> (- (current-seconds) last-run-time) 30))
	(begin
	  (server:run run-id)
	  (hash-table-set! *server-kind-run* run-id (current-seconds))))))

;; The generic run a server command. Dispatches the call to server 0 if run-id != 0
;; 
(define (server:try-running run-id)
  (if (eq? run-id 0)
      (server:run run-id)
      (rmt:start-server run-id)))

(define (server:check-if-running run-id)
  (let ((tdbdat (tasks:open-db)))
    (let loop ((server (tasks:get-server (db:delay-if-busy tdbdat) run-id))




	       (trycount 0))
    (if server
	;; note: client:start will set *runremote*. this needs to be changed
	;;       also, client:start will login to the server, also need to change that.


	;;














	;; client:start returns #t if login was successful.














	;;



	(let ((res (case *transport-type*
		     ((http)(server:ping-server run-id 
						(tasks:hostinfo-get-interface server)
						(tasks:hostinfo-get-port      server)))
		     ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
		     ;;    			 (tasks:hostinfo-get-port      server)
		     ;;    			 timeout: 2))
                     )))
	  ;; if the server didn't respond we must remove the record
	  (if res
	      #t
	      (begin
		(debug:print-info 0 *default-log-port* "server at " server " not responding, removing record")
		(tasks:server-force-clean-running-records-for-run-id (db:delay-if-busy tdbdat) run-id 
				" server:check-if-running")
		res)))
	#f))))

;; called in megatest.scm, host-port is string hostname:port
;;



(define (server:ping run-id host:port)
  (let ((tdbdat (tasks:open-db)))
    (let* ((host-port (let ((slst (string-split   host:port ":")))
			(if (eq? (length slst) 2)
			    (list (car slst)(string->number (cadr slst)))
			    #f)))
	   (toppath       (launch:setup))
	   (server-db-dat (if (not host-port)(tasks:get-server (db:delay-if-busy tdbdat) run-id) #f)))
      (if (not run-id)
	  (begin
	    (debug:print-error 0 *default-log-port* "must specify run-id when doing ping, -run-id n")
	    (print "ERROR: No run-id")
	    (exit 1))
	  (if (and (not host-port)
		   (not server-db-dat))
	      (begin
		(print "ERROR: bad host:port")
		(exit 1))
	      (let* ((iface      (if host-port (car host-port) (tasks:hostinfo-get-interface server-db-dat)))
		     (port       (if host-port (cadr host-port)(tasks:hostinfo-get-port      server-db-dat)))
		     (server-dat (http-transport:client-connect iface port))
		     (login-res  (rmt:login-no-auto-client-setup server-dat run-id)))
		(if (and (list? login-res)
			 (car login-res))
		    (begin
		      (print "LOGIN_OK")
		      (exit 0))
		    (begin
		      (print "LOGIN_FAILED")
		      (exit 1)))))))))

;; run ping in separate process, safest way in some cases
;;
(define (server:ping-server run-id iface port)
  (with-input-from-pipe 
   (conc (common:get-megatest-exe) " -run-id " run-id " -ping " (conc iface ":" port))
   (lambda ()
     (let loop ((inl (read-line))
		(res "NOREPLY"))
       (if (eof-object? inl)
	   (case (string->symbol res)
	     ((NOREPLY)  #f)
	     ((LOGIN_OK) #t)
	     (else       #f))
	   (loop (read-line) inl))))))

(define (server:login toppath)
  (lambda (toppath)
    (set! *last-db-access* (current-seconds))
    (if (equal? *toppath* toppath)
	(begin
	  ;; (debug:print-info 2 *default-log-port* "login successful")
	  #t)
	(begin
	  ;; (debug:print-info 2 *default-log-port* "login failed")
	  #f))))

(define (server:get-timeout)
  (let ((tmo (configf:lookup  *configdat* "server" "timeout")))
    (if (and (string? tmo)
	     (string->number tmo))
	(* 60 60 (string->number tmo))
	;; (* 3 24 60 60) ;; default to three days
	(* 60 1)         ;; default to one minute
	;; (* 60 60 25)      ;; default to 25 hours
	)))








|


>


|

|


|
<
>
>

<
|
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|















|










|
|



|
|








|
|
|
>
>
>
>
|
|
|
<
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
|
|
<
<
|
<
<
|
<

<
<
<
<
|
|
|



>
>
>
|





|
<
|

<
<
<
<
<
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|



|

|














<
<
|
<
<
|











102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120

121
122
123

124
125
















126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177

178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215


216


217

218




219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234

235
236






237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270


271


272
273
274
275
276
277
278
279
280
281
282
283
     result)))

;; Given a run id start a server process    ### NOTE ### > file 2>&1 
;; if the run-id is zero and the target-host is set 
;; try running on that host
;;   incidental: rotate logs in logs/ dir.
;;
(define  (server:run areapath) ;; areapath is ignored for now.
  (let* ((curr-host   (get-host-name))
	 (curr-ip     (server:get-best-guess-address curr-host))
	 (curr-pid    (current-process-id))
	 (target-host (configf:lookup *configdat* "server" "homehost" ))
	 (testsuite   (common:get-testsuite-name))
	 (logfile     (conc *toppath* "/logs/server-" curr-pid ".log"))
	 (cmdln (conc (common:get-megatest-exe)
		      " -server " (or target-host "-") " -run-id " 0 (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
									      (conc " -daemonize -log " logfile)
									      "")
		      " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))

	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread")))
    ;; we want the remote server to start in *toppath* so push there
    (push-directory *toppath*)

    (debug:print 0 *default-log-port* "INFO: Starting server (" cmdln ") as none running ...")
    (thread-start! log-rotate)

















    ;; host.domain.tld match host?
    (if (and target-host 
	     ;; look at target host, is it host.domain.tld or ip address and does it 
	     ;; match current ip or hostname
	     (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
	     (not (equal? curr-ip target-host)))
	(begin
	  (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
	  (setenv "TARGETHOST" target-host)))
    
    (setenv "TARGETHOST_LOGF" logfile)
    (common:wait-for-normalized-load 4 " delaying server start due to load" remote-host: (get-environment-variable "TARGETHOST")) ;; do not try starting servers on an already overloaded machine, just wait forever
    (system (conc "nbfake " cmdln))
    (unsetenv "TARGETHOST_LOGF")
    (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
    (thread-join! log-rotate)
    (pop-directory)))

(define (server:get-client-signature) ;; BB> why is this proc named "get-"?  it returns nothing -- set! has not return value.
  (if *my-client-signature* *my-client-signature*
      (let ((sig (server:mk-signature)))
	(set! *my-client-signature* sig)
	*my-client-signature*)))

;; kind start up of servers, wait 40 seconds before allowing another server for a given
;; run-id to be launched
(define (server:kind-run areapath)
  (let ((last-run-time (hash-table-ref/default *server-kind-run* areapath #f)))
    (if (or (not last-run-time)
	    (> (- (current-seconds) last-run-time) 30))
	(begin
	  (server:run areapath)
	  (hash-table-set! *server-kind-run* areapath (current-seconds))))))

;; The generic run a server command. Dispatches the call to server 0 if run-id != 0
;; 
(define (server:try-running run-id)
  (if (eq? run-id 0)
      (server:run run-id)
      (rmt:start-server run-id)))

(define (server:read-dotserver areapath)
  (let ((dotfile (conc areapath "/.server")))
    (if (and (file-exists? dotfile)
	     (file-read-access? dotfile))
	(with-input-from-file
	    dotfile
	  (lambda ()
	    (read-line)))
	#f)))


;; write a .server file in *toppath* with hostport
;; return #t on success, #f otherwise
;;
(define (server:write-dotserver areapath hostport)
  (let ((lock-file   (conc areapath "/.server.lock"))
	(server-file (conc areapath "/.server")))
    (if (common:simple-file-lock lock-file)
	(let ((res (handle-exceptions
		    exn
		    #f ;; failed for some reason, for the moment simply return #f
		    (with-output-to-file server-file
		      (lambda ()
			(print hostport)))
		    #t)))
	  (common:simple-file-release-lock lock-file)
	  res)
	#f)))

(define (server:remove-dotserver-file areapath hostport)
  (let ((dotserver   (server:read-dotserver areapath))
	(server-file (conc areapath "/.server"))
	(lock-file   (conc areapath "/.server.lock")))
    (if (string-match (conc ".*:" hostport "$") dotserver) ;; port matches, good enough info to decide to remove the file
	(if (common:simple-file-lock lock-file)
	    (begin
	      (handle-exceptions
	       exn
	       #f
	       (delete-file* server-file))
	      (common:simple-file-release-lock lock-file))))))

;; no longer care if multiple servers are started by accident. older servers will drop off in time.
;;
(define (server:check-if-running areapath)
  (let* ((dotserver (server:read-dotserver areapath))) ;; tdbdat (tasks:open-db)))
    (if dotserver
	(let* ((res (case *transport-type*
		      ((http)(server:ping-server dotserver))


		      ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)


		      )))

	  (if res




	      dotserver
	      #f))
	#f)))

;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;;       in the same process as the server.
;;
(define (server:ping host:port)
  (let ((tdbdat (tasks:open-db)))
    (let* ((host-port (let ((slst (string-split   host:port ":")))
			(if (eq? (length slst) 2)
			    (list (car slst)(string->number (cadr slst)))
			    #f)))
	   (toppath       (launch:setup)))

      (if (not host-port)
	  (begin






	    (print "ERROR: bad host:port")
	    (exit 1))
	  (let* ((iface      (if host-port (car host-port) (tasks:hostinfo-get-interface server-db-dat)))
		 (port       (if host-port (cadr host-port)(tasks:hostinfo-get-port      server-db-dat)))
		 (server-dat (http-transport:client-connect iface port))
		 (login-res  (rmt:login-no-auto-client-setup server-dat)))
	    (if (and (list? login-res)
		     (car login-res))
		(begin
		  (print "LOGIN_OK")
		  (exit 0))
		(begin
		  (print "LOGIN_FAILED")
		  (exit 1))))))))

;; run ping in separate process, safest way in some cases
;;
(define (server:ping-server ifaceport)
  (with-input-from-pipe 
   (conc (common:get-megatest-exe) " -ping " ifaceport)
   (lambda ()
     (let loop ((inl (read-line))
		(res "NOREPLY"))
       (if (eof-object? inl)
	   (case (string->symbol res)
	     ((NOREPLY)  #f)
	     ((LOGIN_OK) #t)
	     (else       #f))
	   (loop (read-line) inl))))))

(define (server:login toppath)
  (lambda (toppath)
    (set! *last-db-access* (current-seconds))
    (if (equal? *toppath* toppath)


	#t


	#f)))

(define (server:get-timeout)
  (let ((tmo (configf:lookup  *configdat* "server" "timeout")))
    (if (and (string? tmo)
	     (string->number tmo))
	(* 60 60 (string->number tmo))
	;; (* 3 24 60 60) ;; default to three days
	(* 60 1)         ;; default to one minute
	;; (* 60 60 25)      ;; default to 25 hours
	)))

Modified tests.scm from [84fbf4d5d8] to [8ec0971889].

395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415

    (debug:print 4 *default-log-port* "real-status " real-status ", waived " waived ", status " status)

    ;; update the primary record IF state AND status are defined
    (if (and state status)
	(begin
	  (rmt:test-set-status-state run-id test-id real-status state (if waived waived comment))
	  (mt:process-triggers run-id test-id state real-status)
	  ))
    
    ;; if status is "AUTO" then call rollup (note, this one modifies data in test
    ;; run area, it does remote calls under the hood.
    (if (and test-id state status (equal? status "AUTO")) 
	(rmt:test-data-rollup run-id test-id status))

    ;; add metadata (need to do this way to avoid SQL injection issues)

    ;; :first_err
    ;; (let ((val (hash-table-ref/default otherdat ":first_err" #f)))
    ;;   (if val
    ;;       (sqlite3:execute db "UPDATE tests SET first_err=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path)))







|




|
|







395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415

    (debug:print 4 *default-log-port* "real-status " real-status ", waived " waived ", status " status)

    ;; update the primary record IF state AND status are defined
    (if (and state status)
	(begin
	  (rmt:test-set-status-state run-id test-id real-status state (if waived waived comment))
	  ;; (mt:process-triggers run-id test-id state real-status) ;; triggers are called in test-set-status-state
	  ))
    
    ;; if status is "AUTO" then call rollup (note, this one modifies data in test
    ;; run area, it does remote calls under the hood.
    ;; (if (and test-id state status (equal? status "AUTO")) 
    ;; 	(rmt:test-data-rollup run-id test-id status))

    ;; add metadata (need to do this way to avoid SQL injection issues)

    ;; :first_err
    ;; (let ((val (hash-table-ref/default otherdat ":first_err" #f)))
    ;;   (if val
    ;;       (sqlite3:execute db "UPDATE tests SET first_err=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path)))
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
	    force)
	(let ((my-start-time (current-seconds))
	      (lockf         (conc outputfilename ".lock")))
	  (let loop ((have-lock  (common:simple-file-lock lockf)))
	    (if have-lock
		(let ((script (configf:lookup *configdat* "testrollup" test-name)))
		  (print "Obtained lock for " outputfilename)
		  ;; (rmt:top-test-set-per-pf-counts run-id test-name)
		  (rmt:roll-up-pass-fail-counts run-id test-name "" #f #f #f)
		  (rmt:top-test-set-per-pf-counts run-id test-name)
		  (if script
		      (system (conc script " > " outputfilename " & "))
		      (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename))
		  (common:simple-file-release-lock lockf)
		  (change-directory orig-dir)
		  ;; NB// tests:test-set-toplog! is remote internal...
		  (tests:test-set-toplog! run-id test-name outputfilename))







<

|







479
480
481
482
483
484
485

486
487
488
489
490
491
492
493
494
	    force)
	(let ((my-start-time (current-seconds))
	      (lockf         (conc outputfilename ".lock")))
	  (let loop ((have-lock  (common:simple-file-lock lockf)))
	    (if have-lock
		(let ((script (configf:lookup *configdat* "testrollup" test-name)))
		  (print "Obtained lock for " outputfilename)

		  (rmt:roll-up-pass-fail-counts run-id test-name "" #f #f #f)
		  ;; (rmt:test-set-status-state run-id test-name #f #f #f) ;; (rmt:top-test-set-per-pf-counts run-id test-name)
		  (if script
		      (system (conc script " > " outputfilename " & "))
		      (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename))
		  (common:simple-file-release-lock lockf)
		  (change-directory orig-dir)
		  ;; NB// tests:test-set-toplog! is remote internal...
		  (tests:test-set-toplog! run-id test-name outputfilename))