Megatest

Check-in [8a26c9c9d7]
Login
Overview
Comment:Merged latest from v1.64 into v1.65
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.65
Files: files | file ages | folders
SHA1: 8a26c9c9d71007dad93508ff17c5971ff7dc13b3
User & Date: matt on 2017-06-03 09:47:31
Other Links: branch diff | manifest | tags
Context
2017-06-05
10:28
Add missing area to init in tests Makefile. check-in: b0c45c85fe user: mrwellan tags: v1.65
2017-06-03
09:47
Merged latest from v1.64 into v1.65 check-in: 8a26c9c9d7 user: matt tags: v1.65
2017-06-02
09:54
Added little more feedback for sync. Return numbers as numbers from no-sync metadata check-in: 8528e93d3c user: mrwellan tags: v1.64, v1.6417
2017-05-25
23:12
Merging in recent fixed from v1.64 check-in: b44a2afd2a user: matt tags: v1.65
Changes

Modified Makefile from [a5174e32ef] to [a8f9c9938f].

302
303
304
305
306
307
308




	if csi -ne '(use postgresql)';then \
	   echo "(use postgresql)(hash-table-set! *available-db* 'postgresql #t)" >> altdb.scm;\
	fi

portlogger-example : portlogger-example.scm api.o archive.o client.o common.o configf.o daemon.o dashboard-tests.o db.o dcommon.o ezsteps.o filedb.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o megatest-version.o mt.o ods.o portlogger.o process.o rmt.o rpc-transport.o runconfig.o runs.o sdb.o server.o synchash.o tasks.o tdb.o tests.o tree.o
	csc $(CSCOPTS) portlogger-example.scm api.o archive.o client.o common.o configf.o daemon.o dashboard-tests.o db.o dcommon.o ezsteps.o filedb.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o megatest-version.o mt.o ods.o portlogger.o process.o rmt.o rpc-transport.o runconfig.o runs.o sdb.o server.o synchash.o tasks.o tdb.o tests.o tree.o












>
>
>
>
302
303
304
305
306
307
308
309
310
311
312
	if csi -ne '(use postgresql)';then \
	   echo "(use postgresql)(hash-table-set! *available-db* 'postgresql #t)" >> altdb.scm;\
	fi

portlogger-example : portlogger-example.scm api.o archive.o client.o common.o configf.o daemon.o dashboard-tests.o db.o dcommon.o ezsteps.o filedb.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o megatest-version.o mt.o ods.o portlogger.o process.o rmt.o rpc-transport.o runconfig.o runs.o sdb.o server.o synchash.o tasks.o tdb.o tests.o tree.o
	csc $(CSCOPTS) portlogger-example.scm api.o archive.o client.o common.o configf.o daemon.o dashboard-tests.o db.o dcommon.o ezsteps.o filedb.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o megatest-version.o mt.o ods.o portlogger.o process.o rmt.o rpc-transport.o runconfig.o runs.o sdb.o server.o synchash.o tasks.o tdb.o tests.o tree.o

# create a pdf dot graphviz diagram from notations in rmt.scm
rmt.pdf : rmt.scm
	grep ';;DOT' rmt.scm | sed -e 's/.*;;DOT //' > rmt.dot;dot -Tpdf rmt.dot -o rmt.pdf

Modified api.scm from [9ab20f89e3] to [656c55c8ea].

54
55
56
57
58
59
60

61
62
63
64
65
66
67
    get-all-run-ids
    get-prev-run-ids
    get-run-ids-matching-target
    get-runs-by-patt
    get-steps-data
    get-steps-for-test
    read-test-data

    login
    tasks-get-last
    testmeta-get-record
    have-incompletes?
    synchash-get
    ))








>







54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
    get-all-run-ids
    get-prev-run-ids
    get-run-ids-matching-target
    get-runs-by-patt
    get-steps-data
    get-steps-for-test
    read-test-data
    read-test-data*
    login
    tasks-get-last
    testmeta-get-record
    have-incompletes?
    synchash-get
    ))

124
125
126
127
128
129
130

131
132
133
134
135
136
137
     (debug:print 0 *default-log-port* " message: "  ((condition-property-accessor 'exn 'message) exn))       
     (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
   (cond
    ((not (vector? dat))                    ;; it is an error to not receive a vector
     (vector #f (vector #f "remote must be called with a vector")))
    ((> *api-process-request-count* 20) ;; 20)
     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")

     (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
    (else  
     (let* ((cmd-in            (vector-ref dat 0))
            (cmd               (if (symbol? cmd-in)
				   cmd-in
				   (string->symbol cmd-in)))
            (params            (vector-ref dat 1))







>







125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
     (debug:print 0 *default-log-port* " message: "  ((condition-property-accessor 'exn 'message) exn))       
     (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
   (cond
    ((not (vector? dat))                    ;; it is an error to not receive a vector
     (vector #f (vector #f "remote must be called with a vector")))
    ((> *api-process-request-count* 20) ;; 20)
     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")
     (set! *server-overloaded* #t)
     (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
    (else  
     (let* ((cmd-in            (vector-ref dat 0))
            (cmd               (if (symbol? cmd-in)
				   cmd-in
				   (string->symbol cmd-in)))
            (params            (vector-ref dat 1))
255
256
257
258
259
260
261

262
263
264
265
266
267
268

                   ;; STEPS
                   ((get-steps-data)               (apply db:get-steps-data dbstruct params))
                   ((get-steps-for-test)           (apply db:get-steps-for-test dbstruct params))

                   ;; TEST DATA
                   ((read-test-data)               (apply db:read-test-data dbstruct params))


                   ;; MISC
                   ((get-latest-host-load)         (apply db:get-latest-host-load dbstruct params))
                   ((have-incompletes?)            (apply db:have-incompletes? dbstruct params))
                   ((login)                        (apply db:login dbstruct params))
                   ((general-call)                 (let ((stmtname   (car params))
                                                         (run-id     (cadr params))







>







257
258
259
260
261
262
263
264
265
266
267
268
269
270
271

                   ;; STEPS
                   ((get-steps-data)               (apply db:get-steps-data dbstruct params))
                   ((get-steps-for-test)           (apply db:get-steps-for-test dbstruct params))

                   ;; TEST DATA
                   ((read-test-data)               (apply db:read-test-data dbstruct params))
                   ((read-test-data*)              (apply db:read-test-data* dbstruct params))

                   ;; MISC
                   ((get-latest-host-load)         (apply db:get-latest-host-load dbstruct params))
                   ((have-incompletes?)            (apply db:have-incompletes? dbstruct params))
                   ((login)                        (apply db:login dbstruct params))
                   ((general-call)                 (let ((stmtname   (car params))
                                                         (run-id     (cadr params))

Modified common.scm from [3f11599eaa] to [954ff50c00].

41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
	      (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val)
	    (setenv key val))
	  (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val))))

(define home (getenv "HOME"))
(define user (getenv "USER"))

;; GLOBAL GLETCHES

;; CONTEXTS
(defstruct cxt
  (taskdb #f)
  (cmutex (make-mutex)))
;; (define *contexts* (make-hash-table))
;; (define *context-mutex* (make-mutex))







|







41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
	      (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val)
	    (setenv key val))
	  (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val))))

(define home (getenv "HOME"))
(define user (getenv "USER"))

;; GLOBALS

;; CONTEXTS
(defstruct cxt
  (taskdb #f)
  (cmutex (make-mutex)))
;; (define *contexts* (make-hash-table))
;; (define *context-mutex* (make-mutex))
124
125
126
127
128
129
130

131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150



151
152
153
154
155
156
157
(define *run-id*            #f)
(define *server-kind-run*   (make-hash-table))
(define *home-host*         #f)
;; (define *total-non-write-delay* 0)
(define *heartbeat-mutex*   (make-mutex))
(define *api-process-request-count* 0)
(define *max-api-process-requests* 0)


;; client
(define *rmt-mutex*         (make-mutex))     ;; remote access calls mutex 

;; RPC transport
(define *rpc:listener*      #f)

;; KEY info
(define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys*              (make-hash-table)) ;; cache the keys here
(define *keyvals*           (make-hash-table))
(define *toptest-paths*     (make-hash-table)) ;; cache toptest path settings here
(define *test-paths*        (make-hash-table)) ;; cache test-id to test run paths here
(define *test-ids*          (make-hash-table)) ;; cache run-id, testname, and item-path => test-id
(define *test-info*         (make-hash-table)) ;; cache the test info records, update the state, status, run_duration etc. from testdat.db

(define *run-info-cache*     (make-hash-table)) ;; run info is stable, no need to reget
(define *launch-setup-mutex* (make-mutex))     ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set
(define *homehost-mutex*     (make-mutex))




(defstruct remote
  (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    (server:get-timeout)) ;; default from server:get-timeout







>




















>
>
>







124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
(define *run-id*            #f)
(define *server-kind-run*   (make-hash-table))
(define *home-host*         #f)
;; (define *total-non-write-delay* 0)
(define *heartbeat-mutex*   (make-mutex))
(define *api-process-request-count* 0)
(define *max-api-process-requests* 0)
(define *server-overloaded*  #f)

;; client
(define *rmt-mutex*         (make-mutex))     ;; remote access calls mutex 

;; RPC transport
(define *rpc:listener*      #f)

;; KEY info
(define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys*              (make-hash-table)) ;; cache the keys here
(define *keyvals*           (make-hash-table))
(define *toptest-paths*     (make-hash-table)) ;; cache toptest path settings here
(define *test-paths*        (make-hash-table)) ;; cache test-id to test run paths here
(define *test-ids*          (make-hash-table)) ;; cache run-id, testname, and item-path => test-id
(define *test-info*         (make-hash-table)) ;; cache the test info records, update the state, status, run_duration etc. from testdat.db

(define *run-info-cache*     (make-hash-table)) ;; run info is stable, no need to reget
(define *launch-setup-mutex* (make-mutex))     ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set
(define *homehost-mutex*     (make-mutex))

;; Miscellaneous
(define *triggers-mutex*     (make-mutex))     ;; block overlapping processing of triggers

(defstruct remote
  (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    (server:get-timeout)) ;; default from server:get-timeout
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
    (and (common:on-homehost?)
	 (args:get-arg "-server")))

;;   (let ((ohh (common:on-homehost?))
;; 	(srv (args:get-arg "-server")))
;;     (and ohh srv)))
    ;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv)

;;;; run-ids
;;    if #f use *db-local-sync* : or 'local-sync-flags
;;    if #t use timestamps      : or 'timestamps
(define (common:sync-to-megatest.db dbstruct) 
  (let ((start-time         (current-seconds))
	(res                (db:multi-db-sync dbstruct 'new2old)))
    (let ((sync-time (- (current-seconds) start-time)))
      (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id))
      (if (common:low-noise-print 30 "sync new to old")
	  (debug:print-info 0 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id))))
    res))




(define *wdnum* 0)
(define *wdnum*mutex (make-mutex))
;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)







<
<
<
<
<
<
<
<
<
<
<
<
<







636
637
638
639
640
641
642













643
644
645
646
647
648
649
    (and (common:on-homehost?)
	 (args:get-arg "-server")))

;;   (let ((ohh (common:on-homehost?))
;; 	(srv (args:get-arg "-server")))
;;     (and ohh srv)))
    ;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv)
















(define *wdnum* 0)
(define *wdnum*mutex (make-mutex))
;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
675
676
677
678
679
680
681
682

683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
        (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync)
        (if (and (not *time-to-exit*)
                 (< duration-since-last-sync sync-cool-off-duration))
            (thread-sleep! (- sync-cool-off-duration duration-since-last-sync)))
        (if (not *time-to-exit*)
            (let ((golden-mtdb-mtime (file-modification-time golden-mtpath))
                  (tmp-mtdb-mtime    (file-modification-time tmp-mtpath)))
              (if (> golden-mtdb-mtime tmp-mtdb-mtime)

                  (let ((res (db:multi-db-sync dbstruct 'old2new)))
                    (debug:print-info 13 *default-log-port* "rosync called, " res " records transferred.")))
              (loop (current-seconds)))
            #t)))
    (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath)))


        
(define (common:writable-watchdog dbstruct)
  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync (common:run-sync?))
	(debug-mode  (debug:debug-mode 1))
	(last-time   (current-seconds))
        (this-wd-num     (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x))))
    (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
    (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num)
    (if (and legacy-sync (not *time-to-exit*))
	(let* (;;(dbstruct (db:setup))
	       (mtdb     (dbr:dbstruct-mtdb dbstruct))
	       (mtpath   (db:dbdat-get-path mtdb)))
	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
	  (let loop ()
	    ;; sync for filesystem local db writes
	    ;;
	    (mutex-lock! *db-multi-sync-mutex*)
	    (let* ((need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
		   (sync-in-progress *db-sync-in-progress*)
		   (should-sync      (and (not *time-to-exit*)
                                          (> (- (current-seconds) *db-last-sync*) 5))) ;; sync every five seconds minimum
		   (start-time       (current-seconds))
		   (mt-mod-time      (file-modification-time mtpath))
		   (recently-synced  (< (- start-time mt-mod-time) 4))
		   (will-sync        (and (or need-sync should-sync)
					  (not sync-in-progress)
					  (not recently-synced))))
              (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop.  need-sync="need-sync" sync-in-progress="sync-in-progress" should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync)
	      ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
	      ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
	      (if will-sync (set! *db-sync-in-progress* #t))
	      (mutex-unlock! *db-multi-sync-mutex*)
	      (if will-sync
		  (let ((res (common:sync-to-megatest.db dbstruct))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
		    (if (> res 0) ;; some records were transferred, keep the db alive
			(begin
			  (mutex-lock! *heartbeat-mutex*)
			  (set! *db-last-access* (current-seconds))
			  (mutex-unlock! *heartbeat-mutex*)
			  (debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
			(debug:print-info 2 *default-log-port* "sync called but zero records transferred"))))
	      (if will-sync
		  (begin
		    (mutex-lock! *db-multi-sync-mutex*)
		    (set! *db-sync-in-progress* #f)
		    (set! *db-last-sync* start-time)
		    (mutex-unlock! *db-multi-sync-mutex*)))
	      (if (and debug-mode
		       (> (- start-time last-time) 60))
		  (begin
		    (set! last-time start-time)
		    (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
	    
	    ;; keep going unless time to exit
	    ;;
	    (if (not *time-to-exit*)
		(let delay-loop ((count 0))
                  ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
                                                            
		  (if (and (not *time-to-exit*)
			   (< count 4)) ;; was 11, changing to 4. 
		      (begin
			(thread-sleep! 1)
			(delay-loop (+ count 1))))
		  (if (not *time-to-exit*) (loop))))
	    (if (common:low-noise-print 30)
		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" this-wd-num="this-wd-num)))))))

;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
(define (common:watchdog)
  (debug:print-info 13 *default-log-port* "common:watchdog entered.")
  (if (launch:setup)
      (if (common:on-homehost?)
	  (let ((dbstruct (db:setup #t)))
	    (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct)
	    (cond
	     ((dbr:dbstruct-read-only dbstruct)
	      (debug:print-info 13 *default-log-port* "loading read-only watchdog")
	      (common:readonly-watchdog dbstruct))
	     (else
	      (debug:print-info 13 *default-log-port* "loading writable-watchdog.")
	      (common:writable-watchdog dbstruct)))
	    (debug:print-info 13 *default-log-port* "watchdog done."))
	  (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))


(define (std-exit-procedure)
  (on-exit (lambda () 0))
  ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)







|
>
|
|




<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<













|







666
667
668
669
670
671
672
673
674
675
676
677
678
679
680






































































681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
        (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync)
        (if (and (not *time-to-exit*)
                 (< duration-since-last-sync sync-cool-off-duration))
            (thread-sleep! (- sync-cool-off-duration duration-since-last-sync)))
        (if (not *time-to-exit*)
            (let ((golden-mtdb-mtime (file-modification-time golden-mtpath))
                  (tmp-mtdb-mtime    (file-modification-time tmp-mtpath)))
	      (if (> golden-mtdb-mtime tmp-mtdb-mtime)
		  (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back
		      (let ((res (db:multi-db-sync dbstruct 'old2new)))
			(debug:print-info 13 *default-log-port* "rosync called, " res " records transferred."))))
              (loop (current-seconds)))
            #t)))
    (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath)))







































































;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
(define (common:watchdog)
  (debug:print-info 13 *default-log-port* "common:watchdog entered.")
  (if (launch:setup)
      (if (common:on-homehost?)
	  (let ((dbstruct (db:setup #t)))
	    (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct)
	    (cond
	     ((dbr:dbstruct-read-only dbstruct)
	      (debug:print-info 13 *default-log-port* "loading read-only watchdog")
	      (common:readonly-watchdog dbstruct))
	     (else
	      (debug:print-info 13 *default-log-port* "loading writable-watchdog.")
	      (server:writable-watchdog dbstruct)))
	    (debug:print-info 13 *default-log-port* "watchdog done."))
	  (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))


(define (std-exit-procedure)
  (on-exit (lambda () 0))
  ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
791
792
793
794
795
796
797

798
799
800
801
802
803
804
805
806
807
808
				  (let ((db (cdr *task-db*)))
				    (if (sqlite3:database? db)
					(begin
					  (sqlite3:interrupt! db)
					  (sqlite3:finalize! db #t)
					  ;; (vector-set! *task-db* 0 #f)
					  (set! *task-db* #f)))))

                              (if (and *runremote*
                                       (remote-conndat *runremote*))
                                  (begin
                                    (http-client#close-all-connections!))) ;; for http-client
                              (if (not (eq? *default-log-port* (current-error-port)))
                                  (close-output-port *default-log-port*))
			      (set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))
	  (th2 (make-thread (lambda ()
			      (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...")
			      (if no-hurry
                                  (begin







>
|
|
|
|







713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
				  (let ((db (cdr *task-db*)))
				    (if (sqlite3:database? db)
					(begin
					  (sqlite3:interrupt! db)
					  (sqlite3:finalize! db #t)
					  ;; (vector-set! *task-db* 0 #f)
					  (set! *task-db* #f)))))
                              (http-client#close-all-connections!)
                              ;; (if (and *runremote*
                              ;;          (remote-conndat *runremote*))
                              ;;     (begin
                              ;;       (http-client#close-all-connections!))) ;; for http-client
                              (if (not (eq? *default-log-port* (current-error-port)))
                                  (close-output-port *default-log-port*))
			      (set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))
	  (th2 (make-thread (lambda ()
			      (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...")
			      (if no-hurry
                                  (begin

Modified db.scm from [25f1970af9] to [5231964fde].

400
401
402
403
404
405
406















407
408
409
410
411
412
413
414
415
416
417
418
419
420
421

422
423
424
425
426
427
428
429
430
431
432
433
      (mutex-unlock! *db-multi-sync-mutex*)
      (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb refndb mtdb))
    (mutex-lock! *db-multi-sync-mutex*)
    (set! *db-last-sync* start-t)
    (set! *db-last-access* start-t)
    (mutex-unlock! *db-multi-sync-mutex*)
    (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)))
















;; close all opened run-id dbs
(define (db:close-all dbstruct)
  (if (dbr:dbstruct? dbstruct)
      (handle-exceptions
	  exn
	  (begin
	    (debug:print 0 *default-log-port* "WARNING: Finalizing failed, "  ((condition-property-accessor 'exn 'message) exn))
	    (print-call-chain *default-log-port*))
	;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server.
        (let ((tdbs (map db:dbdat-get-db 
                         (stack->list (dbr:dbstruct-dbstack dbstruct))))
              (mdb (db:dbdat-get-db (dbr:dbstruct-mtdb   dbstruct)))
              (rdb (db:dbdat-get-db (dbr:dbstruct-refndb dbstruct))))
          (map (lambda (db)

		 (if (sqlite3:database? db)
		     (sqlite3:finalize! db)))
	       tdbs)
          (if (sqlite3:database? mdb) (sqlite3:finalize! mdb))
          (if (sqlite3:database? rdb) (sqlite3:finalize! rdb))))))

;;   (let ((locdbs (dbr:dbstruct-locdbs dbstruct)))
;;     (if (hash-table? locdbs)
;; 	(for-each (lambda (run-id)
;; 		    (db:close-run-db dbstruct run-id))
;; 		  (hash-table-keys locdbs)))))








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>















>
|
|

|
|







400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
      (mutex-unlock! *db-multi-sync-mutex*)
      (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb refndb mtdb))
    (mutex-lock! *db-multi-sync-mutex*)
    (set! *db-last-sync* start-t)
    (set! *db-last-access* start-t)
    (mutex-unlock! *db-multi-sync-mutex*)
    (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)))

(define (db:safely-close-sqlite3-db db #!key (try-num 3))
  (if (<= try-num 0)
      #f
      (handle-exceptions
	  exn
	  (begin
	    (thread-sleep! 3)
	    (sqlite3:interrupt! db)
	    (db:safely-close-sqlite3-db db try-num: (- try-num 1)))
	(if (sqlite3:database? db)
	    (begin
	      (sqlite3:finalize! db)
	      #t)
	    #f))))

;; close all opened run-id dbs
(define (db:close-all dbstruct)
  (if (dbr:dbstruct? dbstruct)
      (handle-exceptions
	  exn
	  (begin
	    (debug:print 0 *default-log-port* "WARNING: Finalizing failed, "  ((condition-property-accessor 'exn 'message) exn))
	    (print-call-chain *default-log-port*))
	;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server.
        (let ((tdbs (map db:dbdat-get-db 
                         (stack->list (dbr:dbstruct-dbstack dbstruct))))
              (mdb (db:dbdat-get-db (dbr:dbstruct-mtdb   dbstruct)))
              (rdb (db:dbdat-get-db (dbr:dbstruct-refndb dbstruct))))
          (map (lambda (db)
		 (db:safely-close-sqlite3-db db))
;; 		 (if (sqlite3:database? db)
;; 		     (sqlite3:finalize! db)))
	       tdbs)
          (db:safely-close-sqlite3-db mdb)     ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb))
          (db:safely-close-sqlite3-db rdb))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb))))))

;;   (let ((locdbs (dbr:dbstruct-locdbs dbstruct)))
;;     (if (hash-table? locdbs)
;; 	(for-each (lambda (run-id)
;; 		    (db:close-run-db dbstruct run-id))
;; 		  (hash-table-keys locdbs)))))

651
652
653
654
655
656
657
658
659

660




661
662
663
664
665







666




667
668
669
670
671
672
673
674
675
676
677
678
679
680
     (let ((stmts       (make-hash-table)) ;; table-field => stmt
	   (all-stmts   '())              ;; ( ( stmt1 value1 ) ( stml2 value2 ))
	   (numrecs     (make-hash-table))
	   (start-time  (current-milliseconds))
	   (tot-count   0))
       (for-each ;; table
	(lambda (tabledat)
	  (let* ((tablename  (car tabledat))
		 (fields     (cdr tabledat))

		 (use-last-update  (if last-update




				       (if (pair? last-update)
					   (member (car last-update)    ;; last-update field name
						   (map car fields))
					   (begin
					     (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair, received: " last-update) ;; found in fields







					     #f))




				       #f))
		 (num-fields (length fields))
		 (field->num (make-hash-table))
		 (num->field (apply vector (map car fields)))
		 (full-sel   (conc "SELECT " (string-intersperse (map car fields) ",") 
				   " FROM " tablename (if use-last-update ;; apply last-update criteria
							  (conc " " (car last-update) ">=" (cdr last-update))
							  "")
				   ";"))
		 (full-ins   (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) "
				   " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );"))
		 (fromdat    '())
		 (fromdats   '())
		 (totrecords 0)







|
|
>
|
>
>
>
>
|
|
|
|
|
>
>
>
>
>
>
>
|
>
>
>
>
|





|







667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
     (let ((stmts       (make-hash-table)) ;; table-field => stmt
	   (all-stmts   '())              ;; ( ( stmt1 value1 ) ( stml2 value2 ))
	   (numrecs     (make-hash-table))
	   (start-time  (current-milliseconds))
	   (tot-count   0))
       (for-each ;; table
	(lambda (tabledat)
	  (let* ((tablename        (car tabledat))
		 (fields           (cdr tabledat))
		 (has-last-update  (member "last_update" fields))
		 (use-last-update  (cond
				    ((and has-last-update
					  (member "last_update" fields))
				     #t) ;; if given a number, just use it for all fields
				    ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table
				    ((and (pair? last-update)
					  (member (car last-update)    ;; last-update field name
						  (map car fields))) #t)
				    (last-update
				     (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update) ;; found in fields
				     #f)
				    (else
				     #f)))
		 (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for
					(if (number? last-update)
					    last-update
					    (cdr last-update))
					#f))
		 (last-update-field (if use-last-update
					(if (number? last-update)
					    "last_update"
					    (car last-update))
					#f))
		 (num-fields (length fields))
		 (field->num (make-hash-table))
		 (num->field (apply vector (map car fields)))
		 (full-sel   (conc "SELECT " (string-intersperse (map car fields) ",") 
				   " FROM " tablename (if use-last-update ;; apply last-update criteria
							  (conc " WHERE " last-update-field " >= " last-update-value)
							  "")
				   ";"))
		 (full-ins   (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) "
				   " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );"))
		 (fromdat    '())
		 (fromdats   '())
		 (totrecords 0)
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
	     (res      '())
	     (last-update (if use-last-update (cons "last_update" targ-db-last-mod) #f)))
	(db:sync-tables (db:sync-main-list source-db) last-update source-db cache-db)
	(db:sync-tables db:sync-tests-only last-update source-db cache-db)
	(hash-table-set! *global-db-store* target cache-db)
	cache-db)))

;; call a proc with a cached db
;;
(define (db:call-with-cached-db proc . params)
  ;; first cache the db in /tmp
  (let* ((cname-part (conc "megatest_cache/" (common:get-testsuite-name)))
	 (fname      (conc  (common:get-area-path-signature) ".db"))
	 (cache-dir  (common:get-create-writeable-dir
		      (list (conc "/tmp/" (current-user-name) "/" cname-part)
			    (conc "/tmp/" (current-user-name) "-" cname-part)
			     (conc "/tmp/" (current-user-name) "_" cname-part))))
	 (megatest-db (conc *toppath* "/megatest.db")))
    ;; (debug:print-info 0 *default-log-port* "Using cache dir " cache-dir)
    (if (not cache-dir)
	(begin
	  (debug:print 0 *default-log-port* "ERROR: Failed to find an area to write the cache db")
	  (exit 1))
	(let* ((th1      (make-thread
			  (lambda ()
			    (if (and (file-exists? megatest-db)
				     (file-write-access? megatest-db))
				(begin
				  (common:sync-to-megatest.db 'timestamps) ;; internally mutexes on *db-local-sync*
				  (debug:print-info 2 *default-log-port* "Done syncing to megatest.db"))))
			  "call-with-cached-db sync-to-megatest.db"))
	       (cache-db (db:cache-for-read-only
			  megatest-db
			  (conc cache-dir "/" fname)
			  use-last-update: #t)))
	  (thread-start! th1)
	  (apply proc cache-db params)
	  ))))

;; options:
;;
;;  'killservers  - kills all servers
;;  'dejunk       - removes junk records
;;  'adj-testids  - move test-ids into correct ranges
;;  'old2new      - sync megatest.db to /tmp/.../megatest.db and /tmp/.../megatest_ref.db







|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
	     (res      '())
	     (last-update (if use-last-update (cons "last_update" targ-db-last-mod) #f)))
	(db:sync-tables (db:sync-main-list source-db) last-update source-db cache-db)
	(db:sync-tables db:sync-tests-only last-update source-db cache-db)
	(hash-table-set! *global-db-store* target cache-db)
	cache-db)))

;; ;; call a proc with a cached db
;; ;;
;; (define (db:call-with-cached-db proc . params)
;;   ;; first cache the db in /tmp
;;   (let* ((cname-part (conc "megatest_cache/" (common:get-testsuite-name)))
;; 	 (fname      (conc  (common:get-area-path-signature) ".db"))
;; 	 (cache-dir  (common:get-create-writeable-dir
;; 		      (list (conc "/tmp/" (current-user-name) "/" cname-part)
;; 			    (conc "/tmp/" (current-user-name) "-" cname-part)
;; 			     (conc "/tmp/" (current-user-name) "_" cname-part))))
;; 	 (megatest-db (conc *toppath* "/megatest.db")))
;;     ;; (debug:print-info 0 *default-log-port* "Using cache dir " cache-dir)
;;     (if (not cache-dir)
;; 	(begin
;; 	  (debug:print 0 *default-log-port* "ERROR: Failed to find an area to write the cache db")
;; 	  (exit 1))
;; 	(let* ((th1      (make-thread
;; 			  (lambda ()
;; 			    (if (and (file-exists? megatest-db)
;; 				     (file-write-access? megatest-db))
;; 				(begin
;; 				  (db:sync-to-megatest.db dbstruct 'timestamps) ;; internally mutexes on *db-local-sync*
;; 				  (debug:print-info 2 *default-log-port* "Done syncing to megatest.db"))))
;; 			  "call-with-cached-db sync-to-megatest.db"))
;; 	       (cache-db (db:cache-for-read-only
;; 			  megatest-db
;; 			  (conc cache-dir "/" fname)
;; 			  use-last-update: #t)))
;; 	  (thread-start! th1)
;; 	  (apply proc cache-db params)
;; 	  ))))

;; options:
;;
;;  'killservers  - kills all servers
;;  'dejunk       - removes junk records
;;  'adj-testids  - move test-ids into correct ranges
;;  'old2new      - sync megatest.db to /tmp/.../megatest.db and /tmp/.../megatest_ref.db
996
997
998
999
1000
1001
1002





























1003
1004
1005
1006
1007
1008
1009
	  (db:patch-schema-rundb  (db:dbdat-get-db mtdb))
	  (db:patch-schema-rundb  (db:dbdat-get-db tmpdb))
	  (db:patch-schema-rundb  (db:dbdat-get-db refndb))))
       
       (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb))
     options)
    data-synced))






























;; keeping it around for debugging purposes only
(define (open-run-close-no-exception-handling  proc idb . params)
  (debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params)
  (print "I don't work anymore. open-run-close-no-exception-handling needs fixing or removing...")
  (exit)
  (if (or *db-write-access*







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
	  (db:patch-schema-rundb  (db:dbdat-get-db mtdb))
	  (db:patch-schema-rundb  (db:dbdat-get-db tmpdb))
	  (db:patch-schema-rundb  (db:dbdat-get-db refndb))))
       
       (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb))
     options)
    data-synced))

(define (db:tmp->megatest.db-sync dbstruct last-update)
  (let* ((mtdb        (dbr:dbstruct-mtdb dbstruct))
	 (tmpdb       (db:get-db dbstruct))
	 (refndb      (dbr:dbstruct-refndb dbstruct)))
    (db:sync-tables (db:sync-all-tables-list dbstruct) last-update tmpdb refndb mtdb)))

;;;; run-ids
;;    if #f use *db-local-sync* : or 'local-sync-flags
;;    if #t use timestamps      : or 'timestamps
(define (db:sync-to-megatest.db dbstruct #!key (no-sync-db #f)) 
  (let* ((start-time         (current-seconds))
	 (last-update        (if no-sync-db
				 (db:no-sync-get/default no-sync-db "LAST_UPDATE" 0)
				 0)) ;; (or (db:get-var dbstruct "LAST_UPDATE") 0))
	 (sync-needed        (> (- start-time last-update) 6))
	 (res                (if sync-needed ;; don't sync if a sync already occurred in the past 6 seconds
				 (begin
				   (if no-sync-db
				       (db:no-sync-set no-sync-db "LAST_UPDATE" start-time))
				   (db:tmp->megatest.db-sync dbstruct last-update))
				 0))
	 (sync-time           (- (current-seconds) start-time)))
      (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id))
      (if (common:low-noise-print 30 "sync new to old")
          (if sync-needed
              (debug:print-info 0 *default-log-port* "Sync of " res " records from newdb to olddb completed in " sync-time " seconds pid="(current-process-id))
              (debug:print-info 0 *default-log-port* "No sync needed, last updated " (- start-time last-update) " seconds ago")))
      res))

;; keeping it around for debugging purposes only
(define (open-run-close-no-exception-handling  proc idb . params)
  (debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params)
  (print "I don't work anymore. open-run-close-no-exception-handling needs fixing or removing...")
  (exit)
  (if (or *db-write-access*
1766
1767
1768
1769
1770
1771
1772


































1773
1774
1775
1776
1777
1778
1779
	      (lambda (db)
		(sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" var val))))

(define (db:del-var dbstruct var)
  (db:with-db dbstruct #f #t 
	      (lambda (db)
		(sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var))))



































;; use a global for some primitive caching, it is just silly to
;; re-read the db over and over again for the keys since they never
;; change

;; why get the keys from the db? why not get from the *configdat*
;; using keys:config-get-fields?







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
	      (lambda (db)
		(sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" var val))))

(define (db:del-var dbstruct var)
  (db:with-db dbstruct #f #t 
	      (lambda (db)
		(sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var))))

;;======================================================================
;; no-sync.db - small bits of data to be shared between servers
;;======================================================================

(define (db:open-no-sync-db)
  (let* ((dbpath (db:dbfile-path))
	 (dbname (conc dbpath "/no-sync.db"))
	 (db     (sqlite3:open-database dbname)))
    (sqlite3:set-busy-handler! db (make-busy-timeout 136000))
    (sqlite3:execute db "PRAGMA synchronous = 0;")
    (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));")
    db))

(define (db:no-sync-set db var val)
  (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val))

(define (db:no-sync-get/default db var default)
  (let ((res default))
    (sqlite3:for-each-row
     (lambda (val)
       (set! res val))
     db
     "SELECT val FROM no_sync_metadat WHERE var=?;"
     var)
    (if res
        (let ((newres (string->number res)))
          (if newres
              newres
              res))
        res)))

(define (db:no-sync-close-db db)
  (db:safely-close-sqlite3-db db))

;; use a global for some primitive caching, it is just silly to
;; re-read the db over and over again for the keys since they never
;; change

;; why get the keys from the db? why not get from the *configdat*
;; using keys:config-get-fields?
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
			     0                                              ;; 2 ;; Tolerance
			     "n/a"					    ;; 3 ;; Units
			     (configf:lookup dat entry-name "message")      ;; 4 ;; Comment
			     (configf:lookup dat entry-name "exit-status")  ;; 5 ;; Status
			     "logpro"                                       ;; 6 ;; Type
			     ))))
	   (let* ((value     (or (configf:lookup dat entry-name "measured")  "n/a"))
		  (expected  (or (configf:lookup dat entry-name "expected")  "n/a"))
		  (tolerance (or (configf:lookup dat entry-name "tolerance") "n/a"))
		  (comment   (or (configf:lookup dat entry-name "comment")
				 (configf:lookup dat entry-name "desc")      "n/a"))
		  (status    (or (configf:lookup dat entry-name "status")    "n/a"))
		  (type      (or (configf:lookup dat entry-name "expected")  "n/a")))
	     (set! res (append
			res  
			(list (list stepname







|
|







3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
			     0                                              ;; 2 ;; Tolerance
			     "n/a"					    ;; 3 ;; Units
			     (configf:lookup dat entry-name "message")      ;; 4 ;; Comment
			     (configf:lookup dat entry-name "exit-status")  ;; 5 ;; Status
			     "logpro"                                       ;; 6 ;; Type
			     ))))
	   (let* ((value     (or (configf:lookup dat entry-name "measured")  "n/a"))
		  (expected  (or (configf:lookup dat entry-name "expected")  0.0))
		  (tolerance (or (configf:lookup dat entry-name "tolerance") 0.0))
		  (comment   (or (configf:lookup dat entry-name "comment")
				 (configf:lookup dat entry-name "desc")      "n/a"))
		  (status    (or (configf:lookup dat entry-name "status")    "n/a"))
		  (type      (or (configf:lookup dat entry-name "expected")  "n/a")))
	     (set! res (append
			res  
			(list (list stepname
3127
3128
3129
3130
3131
3132
3133















3134
3135
3136
3137
3138
3139
3140
     (lambda (db)
       (sqlite3:for-each-row 
	(lambda (id test_id category variable value expected tol units comment status type)
	  (set! res (cons (vector id test_id category variable value expected tol units comment status type) res)))
	db
	"SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt)
       (reverse res)))))
















;;======================================================================
;; Misc. test related queries
;;======================================================================

(define (db:get-run-ids-matching-target dbstruct keynames target res runname testpatt statepatt statuspatt)
  (db:with-db







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
     (lambda (db)
       (sqlite3:for-each-row 
	(lambda (id test_id category variable value expected tol units comment status type)
	  (set! res (cons (vector id test_id category variable value expected tol units comment status type) res)))
	db
	"SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt)
       (reverse res)))))

;; This routine moved from tdb.scm, tdb:read-test-data
;;
(define (db:read-test-data* dbstruct run-id test-id categorypatt varpatt)
  (let* ((res '()))
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (sqlite3:for-each-row 
	(lambda (id test_id category variable value expected tol units comment status type)
	  (set! res (cons (vector id test_id category variable value expected tol units comment status type) res)))
	db
	"SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? AND variable LIKE ? ORDER BY category,variable;" test-id categorypatt varpatt)
       (reverse res)))))


;;======================================================================
;; Misc. test related queries
;;======================================================================

(define (db:get-run-ids-matching-target dbstruct keynames target res runname testpatt statepatt statuspatt)
  (db:with-db

Modified docs/manual/megatest_manual.html from [7412bce352] to [393543b9c4].

1
2
3
4
5
6
7
8
9
10
11
12
<!DOCTYPE html>
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="generator" content="AsciiDoc 8.6.7">
<title>The Megatest Users Manual</title>
<style type="text/css">
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */

/* Default font. */
body {
  font-family: Georgia,serif;




|







1
2
3
4
5
6
7
8
9
10
11
12
<!DOCTYPE html>
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="generator" content="AsciiDoc 8.6.9">
<title>The Megatest Users Manual</title>
<style type="text/css">
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */

/* Default font. */
body {
  font-family: Georgia,serif;
82
83
84
85
86
87
88
89



90
91
92



93
94
95
96
97
98
99

ul, ol, li > p {
  margin-top: 0;
}
ul > li     { color: #aaa; }
ul > li > * { color: black; }

pre {



  padding: 0;
  margin: 0;
}




#author {
  color: #527bbd;
  font-weight: bold;
  font-size: 1.1em;
}
#email {







|
>
>
>



>
>
>







82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105

ul, ol, li > p {
  margin-top: 0;
}
ul > li     { color: #aaa; }
ul > li > * { color: black; }

.monospaced, code, pre {
  font-family: "Courier New", Courier, monospace;
  font-size: inherit;
  color: navy;
  padding: 0;
  margin: 0;
}
pre {
  white-space: pre-wrap;
}

#author {
  color: #527bbd;
  font-weight: bold;
  font-size: 1.1em;
}
#email {
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228

div.exampleblock > div.content {
  border-left: 3px solid #dddddd;
  padding-left: 0.5em;
}

div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; }
a.image:visited { color: white; }

dl {
  margin-top: 0.8em;
  margin-bottom: 0.8em;
}
dt {







|







220
221
222
223
224
225
226
227
228
229
230
231
232
233
234

div.exampleblock > div.content {
  border-left: 3px solid #dddddd;
  padding-left: 0.5em;
}

div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; }

dl {
  margin-top: 0.8em;
  margin-bottom: 0.8em;
}
dt {
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429


/*
 * xhtml11 specific
 *
 * */

tt {
  font-family: "Courier New", Courier, monospace;
  font-size: inherit;
  color: navy;
}

div.tableblock {
  margin-top: 1.0em;
  margin-bottom: 1.5em;
}
div.tableblock > table {
  border: 3px solid #527bbd;
}







<
<
<
<
<
<







416
417
418
419
420
421
422






423
424
425
426
427
428
429


/*
 * xhtml11 specific
 *
 * */







div.tableblock {
  margin-top: 1.0em;
  margin-bottom: 1.5em;
}
div.tableblock > table {
  border: 3px solid #527bbd;
}
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468


/*
 * html5 specific
 *
 * */

.monospaced {
  font-family: "Courier New", Courier, monospace;
  font-size: inherit;
  color: navy;
}

table.tableblock {
  margin-top: 1.0em;
  margin-bottom: 1.5em;
}
thead, p.tableblock.header {
  font-weight: bold;
  color: #527bbd;







<
<
<
<
<
<







449
450
451
452
453
454
455






456
457
458
459
460
461
462


/*
 * html5 specific
 *
 * */







table.tableblock {
  margin-top: 1.0em;
  margin-bottom: 1.5em;
}
thead, p.tableblock.header {
  font-weight: bold;
  color: #527bbd;
534
535
536
537
538
539
540


541
542
543
544
545
546
547
body.manpage div.sectionbody {
  margin-left: 3em;
}

@media print {
  body.manpage div#toc { display: none; }
}


@media screen {
  body {
    max-width: 50em; /* approximately 80 characters wide */
    margin-left: 16em;
  }

  #toc {







>
>







528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
body.manpage div.sectionbody {
  margin-left: 3em;
}

@media print {
  body.manpage div#toc { display: none; }
}


@media screen {
  body {
    max-width: 50em; /* approximately 80 characters wide */
    margin-left: 16em;
  }

  #toc {
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
<div class="listingblock">
<div class="content monospaced">
<pre>$MT_MEGATEST -env2file .ezsteps/${stepname}</pre>
</div></div>
</div>
<div class="sect2">
<h3 id="_triggers">Triggers</h3>
<div class="paragraph"><p>In your testconfig triggers can be specified</p></div>
<div class="listingblock">
<div class="content monospaced">
<pre>[triggers]

# Call script running.sh when test goes to state=RUNNING, status=PASS
RUNNING/PASS running.sh

# Call script running.sh any time state goes to RUNNING
RUNNING/ running.sh

# Call script onpass.sh any time status goes to PASS
PASS/ onpass.sh</pre>
</div></div>
<div class="paragraph"><p>Scripts called will have; test-id test-rundir trigger, added to the commandline.</p></div>
<div class="paragraph"><p>HINT</p></div>
<div class="paragraph"><p>To start an xterm (useful for debugging), use a command line like the following:</p></div>
<div class="listingblock">
<div class="content monospaced">
<pre>[triggers]
COMPLETED/ xterm -e bash -s --</pre>
</div></div>
<div class="admonitionblock">
<table><tr>
<td class="icon">
<img src="/nfs/pdx/disks/ice.disk.002/icfadm/pkgs/asciidoc/8.6.7/images/icons/note.png" alt="Note">
</td>
<td class="content">There is a trailing space after the --</td>
</tr></table>
</div>
<div class="paragraph"><p>There are a number of environment variables available to the trigger script
but since triggers can be called in various contexts not all variables are
available at all times. The trigger script should check for the variable and







|













|










|







1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
<div class="listingblock">
<div class="content monospaced">
<pre>$MT_MEGATEST -env2file .ezsteps/${stepname}</pre>
</div></div>
</div>
<div class="sect2">
<h3 id="_triggers">Triggers</h3>
<div class="paragraph"><p>In your testconfig or megatest.config triggers can be specified</p></div>
<div class="listingblock">
<div class="content monospaced">
<pre>[triggers]

# Call script running.sh when test goes to state=RUNNING, status=PASS
RUNNING/PASS running.sh

# Call script running.sh any time state goes to RUNNING
RUNNING/ running.sh

# Call script onpass.sh any time status goes to PASS
PASS/ onpass.sh</pre>
</div></div>
<div class="paragraph"><p>Scripts called will have; test-id test-rundir trigger test-name item-path state status event-time, added to the commandline.</p></div>
<div class="paragraph"><p>HINT</p></div>
<div class="paragraph"><p>To start an xterm (useful for debugging), use a command line like the following:</p></div>
<div class="listingblock">
<div class="content monospaced">
<pre>[triggers]
COMPLETED/ xterm -e bash -s --</pre>
</div></div>
<div class="admonitionblock">
<table><tr>
<td class="icon">
<img src="/usr/images/icons/note.png" alt="Note">
</td>
<td class="content">There is a trailing space after the --</td>
</tr></table>
</div>
<div class="paragraph"><p>There are a number of environment variables available to the trigger script
but since triggers can be called in various contexts not all variables are
available at all times. The trigger script should check for the variable and
2149
2150
2151
2152
2153
2154
2155
2156

2157
2158
2159
2160
</div>
</div>
</div>
<div id="footnotes"><hr></div>
<div id="footer">
<div id="footer-text">
Version 1.0<br>
Last updated 2017-05-10 08:25:49 PDT

</div>
</div>
</body>
</html>







|
>




2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
</div>
</div>
</div>
<div id="footnotes"><hr></div>
<div id="footer">
<div id="footer-text">
Version 1.0<br>
Last updated
 2017-05-21 21:58:43 MST
</div>
</div>
</body>
</html>

Modified docs/manual/reference.txt from [c2be003328] to [6da1e5377a].

468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
----------------------------
$MT_MEGATEST -env2file .ezsteps/${stepname}	  
----------------------------

Triggers
~~~~~~~~

In your testconfig triggers can be specified 

-----------------
[triggers]

# Call script running.sh when test goes to state=RUNNING, status=PASS
RUNNING/PASS running.sh

# Call script running.sh any time state goes to RUNNING
RUNNING/ running.sh

# Call script onpass.sh any time status goes to PASS
PASS/ onpass.sh
-----------------

Scripts called will have; test-id test-rundir trigger, added to the commandline.

HINT

To start an xterm (useful for debugging), use a command line like the following:

-----------------
[triggers]







|














|







468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
----------------------------
$MT_MEGATEST -env2file .ezsteps/${stepname}	  
----------------------------

Triggers
~~~~~~~~

In your testconfig or megatest.config triggers can be specified 

-----------------
[triggers]

# Call script running.sh when test goes to state=RUNNING, status=PASS
RUNNING/PASS running.sh

# Call script running.sh any time state goes to RUNNING
RUNNING/ running.sh

# Call script onpass.sh any time status goes to PASS
PASS/ onpass.sh
-----------------

Scripts called will have; test-id test-rundir trigger test-name item-path state status event-time, added to the commandline.

HINT

To start an xterm (useful for debugging), use a command line like the following:

-----------------
[triggers]

Modified docs/manual/server.png from [ae7d7ee58e] to [d192675d70].

cannot compute difference between binary files

Modified http-transport.scm from [c98c92ea3b] to [2868f6cbd2].

292
293
294
295
296
297
298





299
300
301
302
303
304
305
306
307
(define (http-transport:close-connections #!key (area-dat #f))
  (let* ((runremote  (or area-dat *runremote*))
	 (server-dat (if runremote
                         (remote-conndat runremote)
                         #f))) ;; (hash-table-ref/default *runremote* run-id #f)))
    (if (vector? server-dat)
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))





	  (close-connection! api-dat)
	  #t)
	#f)))


(define (make-http-transport:server-dat)(make-vector 6))
(define (http-transport:server-dat-get-iface         vec)    (vector-ref  vec 0))
(define (http-transport:server-dat-get-port          vec)    (vector-ref  vec 1))
(define (http-transport:server-dat-get-api-uri       vec)    (vector-ref  vec 2))







>
>
>
>
>
|
|







292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
(define (http-transport:close-connections #!key (area-dat #f))
  (let* ((runremote  (or area-dat *runremote*))
	 (server-dat (if runremote
                         (remote-conndat runremote)
                         #f))) ;; (hash-table-ref/default *runremote* run-id #f)))
    (if (vector? server-dat)
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (handle-exceptions
	    exn
	    (begin
	      (print-call-chain *default-log-port*)
	      (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn)))
	    (close-connection! api-dat)
	    #t))
	#f)))


(define (make-http-transport:server-dat)(make-vector 6))
(define (http-transport:server-dat-get-iface         vec)    (vector-ref  vec 0))
(define (http-transport:server-dat-get-port          vec)    (vector-ref  vec 1))
(define (http-transport:server-dat-get-api-uri       vec)    (vector-ref  vec 2))
440
441
442
443
444
445
446

447
448
449
450
451
452
453
454
	       (< (- (current-seconds) server-start-time) 3600)) ;; do not update log or touch log if we've been running for more than one hour.
          (if (common:low-noise-print 120 "server continuing")
              (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
	      (let ((curr-time (current-seconds)))
		(handle-exceptions
		    exn
		    (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk?")

		  (change-file-times server-log-file curr-time curr-time))))
          (loop 0 server-state bad-sync-count (current-milliseconds)))
         (else
          (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
          (http-transport:server-shutdown port)))))))

(define (http-transport:server-shutdown port)
  (begin







>
|







445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
	       (< (- (current-seconds) server-start-time) 3600)) ;; do not update log or touch log if we've been running for more than one hour.
          (if (common:low-noise-print 120 "server continuing")
              (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
	      (let ((curr-time (current-seconds)))
		(handle-exceptions
		    exn
		    (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk?")
		  (if (not *server-overloaded*)
		      (change-file-times server-log-file curr-time curr-time)))))
          (loop 0 server-state bad-sync-count (current-milliseconds)))
         (else
          (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
          (http-transport:server-shutdown port)))))))

(define (http-transport:server-shutdown port)
  (begin
483
484
485
486
487
488
489


490
491
492
493
494
495
496
497
498
499
500
501
502
503
    (exit)))

;; all routes though here end in exit ...
;;
;; start_server? 
;;
(define (http-transport:launch)


  ;; (if (args:get-arg "-daemonize")
  ;;     (begin
  ;; 	(daemon:ize)
  ;; 	(if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it
  ;; 	    (begin
  ;; 	      (current-error-port *alt-log-file*)
  ;; 	      (current-output-port *alt-log-file*)))))
  (let* ((th2 (make-thread (lambda ()
			     (debug:print-info 0 *default-log-port* "Server run thread started")
			     (http-transport:run 
			      (if (args:get-arg "-server")
				  (args:get-arg "-server")
				  "-")
			      )) "Server run"))







>
>
|
|
<
<
|
<
|







489
490
491
492
493
494
495
496
497
498
499


500

501
502
503
504
505
506
507
508
    (exit)))

;; all routes though here end in exit ...
;;
;; start_server? 
;;
(define (http-transport:launch)
  ;; lets not even bother to start if there are already three or more server files ready to go
  (let* ((num-alive   (server:get-num-alive (server:get-list *toppath*))))
    (if (> num-alive 3)
	(begin


	  (debug:print 0 *default-log-port* "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")

	  (exit))))
  (let* ((th2 (make-thread (lambda ()
			     (debug:print-info 0 *default-log-port* "Server run thread started")
			     (http-transport:run 
			      (if (args:get-arg "-server")
				  (args:get-arg "-server")
				  "-")
			      )) "Server run"))

Modified launch.scm from [68a0486978] to [35a6e601a3].

547
548
549
550
551
552
553

554



555
556
557
558
559
560
561
	    (set-signal-handler! signal/term sighand)
	    ) ;; (set-signal-handler! signal/stop sighand)
	  
	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
	  ;;
	  (let* ((test-info (rmt:get-test-info-by-id run-id test-id))

		 (test-host (db:test-get-host        test-info))



		 (test-pid  (db:test-get-process_id  test-info)))
	    (cond
	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "KILLREQ" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      ) ;; prime it for running







>
|
>
>
>







547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
	    (set-signal-handler! signal/term sighand)
	    ) ;; (set-signal-handler! signal/stop sighand)
	  
	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
	  ;;
	  (let* ((test-info (rmt:get-test-info-by-id run-id test-id))
		 (test-host (if test-info
				(db:test-get-host        test-info)
				(begin
				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
				  (exit))))
		 (test-pid  (db:test-get-process_id  test-info)))
	    (cond
	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "KILLREQ" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      ) ;; prime it for running
1221
1222
1223
1224
1225
1226
1227

1228
1229
1230
1231
1232
1233
1234
1235
  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
  (let* ((item-path       (item-list->path itemdat))
	 (contour         #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
    (let loop ((delta        (- (current-seconds) *last-launch*))
	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1)))
      (if (> launch-delay delta)
	  (begin

	    (debug:print-info 0 *default-log-port* "Delaying launch of " test-name " for " (- launch-delay delta) " seconds")
	    (thread-sleep! (- launch-delay delta))
	    (loop (- (current-seconds) *last-launch*) launch-delay))))
    (change-directory *toppath*)
    (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
     (append
      (list
       (list "MT_RUN_AREA_HOME" *toppath*)







>
|







1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
  (let* ((item-path       (item-list->path itemdat))
	 (contour         #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
    (let loop ((delta        (- (current-seconds) *last-launch*))
	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1)))
      (if (> launch-delay delta)
	  (begin
	    (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
		(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
	    (thread-sleep! (- launch-delay delta))
	    (loop (- (current-seconds) *last-launch*) launch-delay))))
    (change-directory *toppath*)
    (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
     (append
      (list
       (list "MT_RUN_AREA_HOME" *toppath*)

Modified megatest.config from [7cee0eda7e] to [524db789d0].

27
28
29
30
31
32
33

# quick areas=fullrun,ext-tests;   selector=QUICKPATT/quick
# full  areas=fullrun,ext-tests; selector=MAXPATT/
# short areas=fullrun,ext-tests; selector=MAXPATT/
# all   areas=fullrun,ext-tests
# snazy selector=QUICKPATT/

[nopurpose]








>
27
28
29
30
31
32
33
34
# quick areas=fullrun,ext-tests;   selector=QUICKPATT/quick
# full  areas=fullrun,ext-tests; selector=MAXPATT/
# short areas=fullrun,ext-tests; selector=MAXPATT/
# all   areas=fullrun,ext-tests
# snazy selector=QUICKPATT/

[nopurpose]

Modified megatest.scm from [5793c013b1] to [59b442eee0].

95
96
97
98
99
100
101

102
103
104
105
106
107
108
  -reqtarg key1/key2/...  : run for key1, key2, etc. but key1/key2 must be in runconfigs
  -testpatt patt1/patt2,patt3/...  : % is wildcard
  -runname                : required, name for this particular test run
  -state                  : Applies to runs, tests or steps depending on context
  -status                 : Applies to runs, tests or steps depending on context
  --modepatt key          : load testpatt from <key> in runconfigs instead of default TESTPATT if -testpatt and -tagexpr are not specified
  -tagexpr tag1,tag2%,..  : select tests with tags matching expression


Test helpers (for use inside tests)
  -step stepname
  -test-status            : set the state and status of a test (use :state and :status)
  -setlog logfname        : set the path/filename to the final log relative to the test
                            directory. may be used with -test-status
  -set-toplog logfname    : set the overall log for a suite of sub-tests







>







95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
  -reqtarg key1/key2/...  : run for key1, key2, etc. but key1/key2 must be in runconfigs
  -testpatt patt1/patt2,patt3/...  : % is wildcard
  -runname                : required, name for this particular test run
  -state                  : Applies to runs, tests or steps depending on context
  -status                 : Applies to runs, tests or steps depending on context
  --modepatt key          : load testpatt from <key> in runconfigs instead of default TESTPATT if -testpatt and -tagexpr are not specified
  -tagexpr tag1,tag2%,..  : select tests with tags matching expression
  

Test helpers (for use inside tests)
  -step stepname
  -test-status            : set the state and status of a test (use :state and :status)
  -setlog logfname        : set the path/filename to the final log relative to the test
                            directory. may be used with -test-status
  -set-toplog logfname    : set the overall log for a suite of sub-tests
136
137
138
139
140
141
142

143
144
145
146
147
148
149
  -dumpmode MODE          : dump in MODE format instead of sexpr, MODE=json,ini,sexp etc. (add -debug 0,9 to see which file contributes each line)
  -show-cmdinfo           : dump the command info for a test (run in test environment)
  -section sectionName
  -var varName            : for config and runconfig lookup value for sectionName varName
  -since N                : get list of runs changed since time N (Unix seconds)
  -fields fieldspec       : fields to include in json dump; runs:id,runame+tests:testname+steps
  -sort fieldname         : in -list-runs sort tests by this field


Misc 
  -start-dir path         : switch to this directory before running megatest
  -contour cname          : add a level of hierarcy to the linktree and run paths
  -rebuild-db             : bring the database schema up to date
  -cleanup-db             : remove any orphan records, vacuum the db
  -import-megatest.db     : push data from megatest.db to cache db files in /tmp/$USER







>







137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
  -dumpmode MODE          : dump in MODE format instead of sexpr, MODE=json,ini,sexp etc. (add -debug 0,9 to see which file contributes each line)
  -show-cmdinfo           : dump the command info for a test (run in test environment)
  -section sectionName
  -var varName            : for config and runconfig lookup value for sectionName varName
  -since N                : get list of runs changed since time N (Unix seconds)
  -fields fieldspec       : fields to include in json dump; runs:id,runame+tests:testname+steps
  -sort fieldname         : in -list-runs sort tests by this field
  -testdata-csv [categorypatt/]varpatt  : dump testdata for given category

Misc 
  -start-dir path         : switch to this directory before running megatest
  -contour cname          : add a level of hierarcy to the linktree and run paths
  -rebuild-db             : bring the database schema up to date
  -cleanup-db             : remove any orphan records, vacuum the db
  -import-megatest.db     : push data from megatest.db to cache db files in /tmp/$USER
224
225
226
227
228
229
230

231
232
233
234
235
236
237
			":runname"
			"-runname"
			":state"  
			"-state"
			":status"
			"-status"
			"-list-runs"

			"-testpatt"
                        "--modepatt"
                        "-tagexpr"
			"-itempatt"
			"-setlog"
			"-set-toplog"
			"-runstep"







>







226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
			":runname"
			"-runname"
			":state"  
			"-state"
			":status"
			"-status"
			"-list-runs"
                        "-testdata-csv"
			"-testpatt"
                        "--modepatt"
                        "-tagexpr"
			"-itempatt"
			"-setlog"
			"-set-toplog"
			"-runstep"
399
400
401
402
403
404
405

406
407
408
409
410
411
412
			(common:watchdog)))
		    "Watchdog thread"))

;;(if (not (args:get-arg "-server"))
;;    (thread-start! *watchdog*)) ;; if starting a server; wait till we get to running state before kicking off watchdog
(let* ((no-watchdog-args
       '("-list-runs"

         "-list-servers"
         "-server"
         "-list-disks"
         "-list-targets"
         "-show-runconfig"
         ;;"-list-db-targets"
         "-show-runconfig"







>







402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
			(common:watchdog)))
		    "Watchdog thread"))

;;(if (not (args:get-arg "-server"))
;;    (thread-start! *watchdog*)) ;; if starting a server; wait till we get to running state before kicking off watchdog
(let* ((no-watchdog-args
       '("-list-runs"
         "-testdata-csv"
         "-list-servers"
         "-server"
         "-list-disks"
         "-list-targets"
         "-show-runconfig"
         ;;"-list-db-targets"
         "-show-runconfig"
1045
1046
1047
1048
1049
1050
1051








































































































1052
1053
1054
1055
1056
1057
1058
(define (get-value-by-fieldname datavec test-field-index fieldname)
  (let ((indx (hash-table-ref/default test-field-index fieldname #f)))
    (if indx
	(if (>= indx (vector-length datavec))
	    #f ;; index too high, should raise an error I suppose
	    (vector-ref datavec indx))
	#f)))









































































































;; NOTE: list-runs and list-db-targets operate on local db!!!
;;
;; IDEA: megatest list -runname blah% ...
;;
(if (or (args:get-arg "-list-runs")
	(args:get-arg "-list-db-targets"))







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
(define (get-value-by-fieldname datavec test-field-index fieldname)
  (let ((indx (hash-table-ref/default test-field-index fieldname #f)))
    (if indx
	(if (>= indx (vector-length datavec))
	    #f ;; index too high, should raise an error I suppose
	    (vector-ref datavec indx))
	#f)))





(when (args:get-arg "-testdata-csv")
  (if (launch:setup)
      (let* ((keys        (rmt:get-keys)) ;; (db:get-keys dbstruct))
             (runpatt     (or (args:get-arg "-runname") "%"))
             (testpatt    (common:args-get-testpatt #f))
             (datapatt    (args:get-arg "-testdata-csv"))
             (match-data  (string-match "^([^/]+)/(.*)" (args:get-arg "-testdata-csv")))
             (categorypatt (if match-data (list-ref match-data 1) "%"))
             (setvarpatt  (if match-data
                              (list-ref match-data 2)
                              (args:get-arg "-testdata-csv")))
             (runsdat     (rmt:get-runs-by-patt keys (or runpatt "%") 
                                                (common:args-get-target) #f #f '("id" "runname" "state" "status" "owner" "event_time" "comment") 0))
             (header      (db:get-header runsdat))
             (access-mode (db:get-access-mode))
             (testpatt    (common:args-get-testpatt #f))
             (fields-spec (if (args:get-arg "-fields")
                              (extract-fields-constraints (args:get-arg "-fields"))
                              (list (cons "runs" (append keys (list "id" "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count")))
                                    (cons "tests"  db:test-record-fields) ;; "id" "testname" "test_path")
                                    (list "steps" "id" "stepname"))))
             (tests-spec  (let ((t (alist-ref "tests" fields-spec equal?)))
                            (if (and t (null? t)) ;; all fields
                                db:test-record-fields
                                t)))
             (adj-tests-spec (delete-duplicates (if tests-spec (cons "id" tests-spec) db:test-record-fields))) 
             (test-field-index (make-hash-table))
             (runs (db:get-rows runsdat))
             )
        (if (and tests-spec (not (null? tests-spec))) ;; do some validation and processing of the test-spec
            (let ((invalid-tests-spec (filter (lambda (x)(not (member x db:test-record-fields))) tests-spec)))
              (if (null? invalid-tests-spec)
                  ;; generate the lookup map test-field-name => index-number
                  (let loop ((hed (car adj-tests-spec))
                             (tal (cdr adj-tests-spec))
                             (idx 0))
                    (hash-table-set! test-field-index hed idx)
                    (if (not (null? tal))(loop (car tal)(cdr tal)(+ idx 1))))
                  (begin
                    (debug:print-error 0 *default-log-port* "Invalid test fields specified: " (string-intersperse invalid-tests-spec ", "))
                    (exit)))))
        (let* ((table-header (string-split "target,run,test,itempath,category,var,value,comment" ","))
               (table-rows
                (apply append (map  
                               (lambda (run)
                                 (let* ((target (string-intersperse (map (lambda (x)
							 (db:get-value-by-header run header x))
						       keys) "/"))
                                        (statuses (string-split (or (args:get-arg "-status") "") ","))
                                        (run-id  (db:get-value-by-header run header "id"))
                                        (runname (db:get-value-by-header run header "runname")) 
                                        (states  (string-split (or (args:get-arg "-state") "") ","))
                                        (tests   (if tests-spec
                                                     (db:dispatch-query access-mode rmt:get-tests-for-run db:get-tests-for-run run-id testpatt states statuses #f #f #f 'testname 'asc ;; (db:get-tests-for-run dbstruct run-id testpatt '() '() #f #f #f 'testname 'asc 
                                                                        ;; use qryvals if test-spec provided
                                                                        (if tests-spec
                                                                            (string-intersperse adj-tests-spec ",")
                                                                            ;; db:test-record-fields
                                                                            #f)
                                                                        #f
                                                                        'normal)
                                                     '())))
                                   (apply append
                                          (map
                                           (lambda (test)
                                             (let* (
                                                    (test-id      (if (member "id"           tests-spec)(get-value-by-fieldname test test-field-index "id"          ) #f)) ;; (db:test-get-id         test))
                                                    (testname     (if (member "testname"     tests-spec)(get-value-by-fieldname test test-field-index "testname"    ) #f)) ;; (db:test-get-testname   test))
                                                    (itempath     (if (member "item_path"    tests-spec)(get-value-by-fieldname test test-field-index "item_path"   ) #f)) ;; (db:test-get-item-path  test))
                                                    (fullname     (conc testname
                                                                        (if (equal? itempath "")
                                                                            "" 
                                                                            (conc "/" itempath ))))
                                                    (testdat-raw (map vector->list (rmt:read-test-data* run-id test-id categorypatt setvarpatt)))
                                                    (testdat (filter
                                                              (lambda (x)
                                                                (not (equal? "logpro"
                                                                             (list-ref x 10))))
                                                              testdat-raw)))
                                               (map 
                                                (lambda (item)
                                                  (receive (id test_id category
                                                               variable value expected
                                                               tol units comment status type)
                                                      (apply values item)
                                                    (list target runname testname itempath category variable value comment)))
                                                testdat)))
                                           tests))))
                               runs))))
          (print (string-join table-header ","))
          (for-each (lambda(table-row)
                      (print (string-join (map ->string table-row) ",")))

                    
                            table-rows))))
  (set! *didsomething* #t)
  (set! *time-to-exit* #t))



;; NOTE: list-runs and list-db-targets operate on local db!!!
;;
;; IDEA: megatest list -runname blah% ...
;;
(if (or (args:get-arg "-list-runs")
	(args:get-arg "-list-db-targets"))

Modified mt.scm from [410c526eee] to [db2ac226d1].

125
126
127
128
129
130
131



































132
133

134
135
136
137
138




139
140
141

142
143
144
145
146
147
148
149
150

151

152
153



154

155

156
157
158
159
160
161
162
163
164
165
166
167


168
169
170
171
172
173
174
175
			    (debug:print 0 *default-log-port* "Discarding test " testn "(" test-dat ") due to " failed-test)
			    res)
			  (cons testn res)))))))))

;;======================================================================
;;  T R I G G E R S
;;======================================================================




































(define (mt:process-triggers dbstruct run-id test-id newstate newstatus)

  (let* ((test-dat      (db:get-test-info-by-id dbstruct run-id test-id)))
    (if test-dat
	(let* ((test-rundir   ;; (rmt:sdb-qry 'getstr ;; (filedb:get-path *fdb*
		(db:test-get-rundir test-dat)) ;; ) ;; )
	       (test-name     (db:test-get-testname test-dat))




	       (tconfig       #f)
	       (state         (if newstate  newstate  (db:test-get-state  test-dat)))
	       (status        (if newstatus newstatus (db:test-get-status test-dat))))

	  (if (and test-name
		   test-rundir   ;; #f means no dir set yet
		   (file-exists? test-rundir)
		   (directory? test-rundir))
	      (call-with-environment-variables
	       (list (cons "MT_TEST_NAME" test-name)
		     (cons "MT_TEST_RUN_DIR" test-rundir)
		     (cons "MT_ITEMPATH"     (db:test-get-item-path test-dat)))
	       (lambda ()

		 (push-directory test-rundir)

		 (set! tconfig (mt:lazy-read-test-config test-name))
		 (for-each (lambda (trigger)



			     (let ((cmd  (configf:lookup tconfig "triggers" trigger))

				   (logf (conc  test-rundir "/last-trigger.log")))

			       (if cmd
				   ;; Putting the commandline into ( )'s means no control over the shell. 
				   ;; stdout and stderr will be caught in the NBFAKE or mt_launch.log files
				   ;; or equivalent. No need to do this. Just run it?
				   (let ((fullcmd (conc cmd " " test-id " " test-rundir " " trigger "&")))
				     (debug:print-info 0 *default-log-port* "TRIGGERED on " trigger ", running command " fullcmd)
				     (process-run fullcmd)))))
			   (list
			    (conc state "/" status)
			    (conc state "/")
			    (conc "/" status)))
		 (pop-directory))


	       ))))))

;;======================================================================
;;  S T A T E   A N D   S T A T U S   F O R   T E S T S 
;;======================================================================

;; speed up for common cases with a little logic
(define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


>
|
|
<
|
|
>
>
>
>
|
|
|
>
|
|
|
|
|
|
|
|
|
>
|
>
|
|
>
>
>
|
>
|
>
|
<
<
<
<
<
<
|
|
|
|
|
>
>
|







125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171

172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203






204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
			    (debug:print 0 *default-log-port* "Discarding test " testn "(" test-dat ") due to " failed-test)
			    res)
			  (cons testn res)))))))))

;;======================================================================
;;  T R I G G E R S
;;======================================================================

(define (mt:run-trigger cmd test-id test-rundir trigger logname test-name item-path event-time actual-state actual-status)
  ;; Putting the commandline into ( )'s means no control over the shell. 
  ;; stdout and stderr will be caught in the NBFAKE or mt_launch.log files
  ;; or equivalent. No need to do this. Just run it?
  (let* ((fullcmd (conc "nbfake "
			cmd           " "
			test-id       " "
			test-rundir   " "
			trigger       " "
			test-name     " "
			item-path     " " ;; has / prepended to deal with toplevel tests
			actual-state  " "
			actual-status " "
			event-time
			))
	 (prev-nbfake-log (get-environment-variable "NBFAKE_LOG")))
    (setenv "NBFAKE_LOG" (conc (cond
				((and (directory-exists? test-rundir)
				      (file-write-access? test-rundir))
				 test-rundir)
				((and (directory-exists? *toppath*)
				      (file-write-access? *toppath*))
				 *toppath*)
				(else (conc "/tmp/" (current-user-name))))
			       "/" logname))
    (debug:print-info 0 *default-log-port* "TRIGGERED on " trigger ", running command " fullcmd " output at " (get-environment-variable "NBFAKE_LOG"))
    ;; (call-with-environment-variables
    ;;  `(("NBFAKE_LOG" . ,(conc test-rundir "/" logname)))
    ;;  (lambda ()
    (process-run fullcmd)
    (if prev-nbfake-log
	(setenv "NBFAKE_LOG" prev-nbfake-log)
	(unsetenv "NBFAKE_LOG"))
    )) ;; ))

(define (mt:process-triggers dbstruct run-id test-id newstate newstatus)
  (if test-id 
      (let* ((test-dat      (db:get-test-info-by-id dbstruct run-id test-id)))
	(if test-dat

	    (let* ((test-rundir   (db:test-get-rundir       test-dat)) ;; ) ;; )
		   (test-name     (db:test-get-testname     test-dat))
		   (item-path     (db:test-get-item-path    test-dat))
		   (duration      (db:test-get-run_duration test-dat))
		   (comment       (db:test-get-comment      test-dat))
		   (event-time    (db:test-get-event_time   test-dat))
		   (tconfig       #f)
		   (state         (if newstate  newstate  (db:test-get-state  test-dat)))
		   (status        (if newstatus newstatus (db:test-get-status test-dat))))
	      ;; (mutex-lock! *triggers-mutex*)
	      (if (and test-name
		       test-rundir)   ;; #f means no dir set yet
		       ;; (file-exists? test-rundir)
		       ;; (directory? test-rundir))
		  (call-with-environment-variables
		   (list (cons "MT_TEST_NAME"    (or test-name "no such test"))
			 (cons "MT_TEST_RUN_DIR" (or test-rundir "no test directory yet"))
			 (cons "MT_ITEMPATH"     (or item-path "")))
		   (lambda ()
		     (if (directory-exists? test-rundir)
			 (push-directory test-rundir)
			 (push-directory *toppath*))
		     (set! tconfig (mt:lazy-read-test-config test-name))
		     (for-each (lambda (trigger)
				 (let* ((munged-trigger (string-translate trigger "/ " "--"))
					(logname        (conc "last-trigger-" munged-trigger ".log")))
				   ;; first any triggers from the testconfig
				   (let ((cmd  (configf:lookup tconfig "triggers" trigger)))
				     (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "tconfig-" logname) test-name item-path event-time state status)))
				   ;; next any triggers from megatest.config
				   (let ((cmd  (configf:lookup *configdat* "triggers" trigger)))
				     (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "mtconfig-" logname) test-name item-path event-time state status)))))






			       (list
				(conc state "/" status)
				(conc state "/")
				(conc "/" status)))
		     (pop-directory))
		   ))
	      ;; (mutex-unlock! *triggers-mutex*)
	      )))))

;;======================================================================
;;  S T A T E   A N D   S T A T U S   F O R   T E S T S 
;;======================================================================

;; speed up for common cases with a little logic
(define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)

Modified rmt.scm from [30d8789a16] to [c3ed4a3bb8].

44
45
46
47
48
49
50






51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73



74
75
76
77
78




79
80
81
82
83
84
85


86
87
88
89
90



91
92
93
94
95
96
97
98



99
100
101
102
103
104
105
106






107
108
109
110
111
112

113
114
115




116
117
118
119
120
121
122
123



124
125
126
127
128
129

130
131
132
133



134
135
136
137
138
139
140
141
142



143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159



160
161
162
163
164
165
166
167
168
169
170




171
172
173
174
175
176
177




178
179
180
181
182
183
184

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected







  ;; do all the prep locked under the rmt-mutex
  (mutex-lock! *rmt-mutex*)

  ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
  ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
  ;; 3. do the query, if on homehost use local access
  ;;
  (let* ((start-time    (current-seconds)) ;; snapshot time so all use cases get same value
         (areapath      *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
	 (runremote     (or area-dat
			    *runremote*))
	 (readonly-mode (if (and runremote
				 (remote-ro-mode-checked runremote))
			    (remote-ro-mode runremote)
			    (let* ((dbfile  (conc *toppath* "/megatest.db"))
				   (ro-mode (not (file-write-access? dbfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
			      (if runremote
				  (begin
				    (remote-ro-mode-set! runremote ro-mode)
				    (remote-ro-mode-checked-set! runremote #t)
				    ro-mode)
				  ro-mode)))))




     ;; ensure we have a record for our connection for given area
     (if (not runremote)                   ;; can remove this one. should never get here.         
	 (begin
	   (set! *runremote* (make-remote))
	   (set! runremote   *runremote*))) ;; new runremote will come from this on next iteration




     ;; ensure we have a homehost record
     (if (not (pair? (remote-hh-dat runremote)))  ;; not on homehost
	 (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
	 (remote-hh-dat-set! runremote (common:get-homehost)))

     ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
    (cond


     ;; give up if more than 15 attempts
     ((> attemptnum 15)
      (debug:print 0 *default-log-port* "ERROR: 15 tries to start/connect to server. Giving up.")
      (exit 1))




     ;; readonly mode, read request-  handle it - case 20
     ((and readonly-mode
           (member cmd api:read-only-queries)) 
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 20")
      (rmt:open-qry-close-locally cmd 0 params)
      )




     ;; readonly mode, write request.  Do nothing, return #f
     (readonly-mode
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 21")
      (debug:print 0 *default-log-port* "WARNING: write transaction requested on a readonly area.  cmd="cmd" params="params)
      #f
      )







     ;; reset the connection if it has been unused too long
     ((and runremote
           (remote-conndat runremote)
	   (let ((expire-time (+ (- start-time (remote-server-timeout runremote))(random 10)))) ;; Subtract or add the random value? Seems like it should be substract but Neither fixes the "WARNING: failure in with-input-from-request to #<request>.\n message: Server closed connection before sending response"
	     (< (http-transport:server-dat-get-last-access (remote-conndat runremote)) expire-time)))
      (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")

      (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
      (mutex-unlock! *rmt-mutex*)
      (rmt:send-receive cmd rid params attemptnum: attemptnum))




     ;; on homehost and this is a read
     ((and (not (remote-force-server runremote))      ;; honor forced use of server
	   (cdr (remote-hh-dat runremote))     ;; on homehost
           (member cmd api:read-only-queries)) ;; this is a read
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  3")
      (rmt:open-qry-close-locally cmd 0 params))




     ;; on homehost and this is a write, we already have a server, but server has died
     ((and (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote)             ;; have a server
           (not (server:ping (remote-server-url runremote))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
      (set! *runremote* (make-remote))

      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4.1")
      (rmt:send-receive cmd rid params attemptnum: attemptnum))




     ;; on homehost and this is a write, we already have a server
     ((and (not (remote-force-server runremote))     ;; honor forced use of server
	   (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote))            ;; have a server
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4")
      (rmt:open-qry-close-locally cmd 0 params))




     ;;  on homehost, no server contact made and this is a write, passively start a server 
     ((and (not (remote-force-server runremote)) ;; honor forced use of server
	   (cdr (remote-hh-dat runremote))       ;; new
           (not (remote-server-url runremote))
	   (not (member cmd api:read-only-queries)))
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5")
      (let ((server-url  (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
	(if server-url
	    (remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed
	    (if (common:force-server?)
		(server:start-and-wait *toppath*)
		(server:kind-run *toppath*))))
      (remote-force-server-set! runremote (common:force-server?))
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5.1")
      (rmt:open-qry-close-locally cmd 0 params))




     ((or (and (remote-force-server runremote)              ;; we are forcing a server and don't yet have a connection to one
	       (not (remote-conndat runremote)))
	  (and (not (cdr (remote-hh-dat runremote)))        ;; not on a homehost 
	       (not (remote-conndat runremote))))           ;; and no connection
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  6  hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
      (mutex-unlock! *rmt-mutex*)
      (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up?
	  (server:start-and-wait *toppath*))
      (remote-force-server-set! runremote (common:force-server?))
      (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http
      (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as




     ;; all set up if get this far, dispatch the query
     ((and (not (remote-force-server runremote))
	   (cdr (remote-hh-dat runremote))) ;; we are on homehost
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  7")
      (rmt:open-qry-close-locally cmd (if rid rid 0) params))





     ;; not on homehost, do server query
     (else
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9")
      (mutex-lock! *rmt-mutex*)
      (let* ((conninfo (remote-conndat runremote))
	     (dat      (case (remote-transport runremote)







>
>
>
>
>
>


|




















>
>
>
|
|
|
|
|
>
>
>
>
|
|
|
|
|
|

>
>





>
>
>
|



|



>
>
>



|

|
|
|
>
>
>
>
>
>
|
|
|
|
|
|
>
|
|
|
>
>
>
>

|
|
|

|


>
>
>






>

|


>
>
>

|




|


>
>
>

|
|
|
|
|








|


>
>
>




|



<


>
>
>
>




|


>
>
>
>







44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212

213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected

  ;;DOT digraph megatest_state_status {
  ;;DOT   ranksep=0;
  ;;DOT   // rankdir=LR;
  ;;DOT   node [shape="box"];
  ;;DOT "rmt:send-receive" -> MUTEXLOCK;
  ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; }
  ;; do all the prep locked under the rmt-mutex
  (mutex-lock! *rmt-mutex*)
  
  ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
  ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
  ;; 3. do the query, if on homehost use local access
  ;;
  (let* ((start-time    (current-seconds)) ;; snapshot time so all use cases get same value
         (areapath      *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
	 (runremote     (or area-dat
			    *runremote*))
	 (readonly-mode (if (and runremote
				 (remote-ro-mode-checked runremote))
			    (remote-ro-mode runremote)
			    (let* ((dbfile  (conc *toppath* "/megatest.db"))
				   (ro-mode (not (file-write-access? dbfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
			      (if runremote
				  (begin
				    (remote-ro-mode-set! runremote ro-mode)
				    (remote-ro-mode-checked-set! runremote #t)
				    ro-mode)
				  ro-mode)))))

    ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity
    ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"];
    ;; DOT INIT_RUNREMOTE -> MUTEXLOCK;
    ;; ensure we have a record for our connection for given area
    (if (not runremote)                   ;; can remove this one. should never get here.         
	(begin
	  (set! *runremote* (make-remote))
	  (set! runremote   *runremote*))) ;; new runremote will come from this on next iteration
    
    ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
    ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
    ;; DOT SET_HOMEHOST -> MUTEXLOCK;
    ;; ensure we have a homehost record
    (if (not (pair? (remote-hh-dat runremote)))  ;; not on homehost
	(thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
	(remote-hh-dat-set! runremote (common:get-homehost)))
    
    ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
    (cond
     ;;DOT EXIT;
     ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" }
     ;; give up if more than 15 attempts
     ((> attemptnum 15)
      (debug:print 0 *default-log-port* "ERROR: 15 tries to start/connect to server. Giving up.")
      (exit 1))

     ;;DOT CASE2 [label="local\nreadonly\nquery"];
     ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2}
     ;;DOT CASE2 -> "rmt:open-qry-close-locally";
     ;; readonly mode, read request-  handle it - case 2
     ((and readonly-mode
           (member cmd api:read-only-queries)) 
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2")
      (rmt:open-qry-close-locally cmd 0 params)
      )

     ;;DOT CASE3 [label="write in\nread-only mode"];
     ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3}
     ;;DOT CASE3 -> "#f";
     ;; readonly mode, write request.  Do nothing, return #f
     (readonly-mode
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 3")
      (debug:print 0 *default-log-port* "WARNING: write transaction requested on a readonly area.  cmd="cmd" params="params)
      #f)

     ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
     ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
     ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
     ;;
     ;; ;;DOT CASE4 [label="reset\nconnection"];
     ;; ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4}
     ;; ;;DOT CASE4 -> "rmt:send-receive";
     ;; ;; reset the connection if it has been unused too long
     ;; ((and runremote
     ;;       (remote-conndat runremote)
     ;; 	   (let ((expire-time (+ (- start-time (remote-server-timeout runremote))(random 10)))) ;; Subtract or add the random value? Seems like it should be substract but Neither fixes the "WARNING: failure in with-input-from-request to #<request>.\n message: Server closed connection before sending response"
     ;; 	     (< (http-transport:server-dat-get-last-access (remote-conndat runremote)) expire-time)))
     ;;  (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
     ;;  (http-transport:close-connections area-dat: runremote)
     ;;  (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
     ;;  (mutex-unlock! *rmt-mutex*)
     ;;  (rmt:send-receive cmd rid params attemptnum: attemptnum))

     ;;DOT CASE5 [label="local\nread"];
     ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
     ;;DOT CASE5 -> "rmt:open-qry-close-locally";
     ;; on homehost and this is a read
     ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))       ;; on homehost
           (member cmd api:read-only-queries))   ;; this is a read
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5")
      (rmt:open-qry-close-locally cmd 0 params))

     ;;DOT CASE6 [label="init\nremote"];
     ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
     ;;DOT CASE6 -> "rmt:send-receive";
     ;; on homehost and this is a write, we already have a server, but server has died
     ((and (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote)             ;; have a server
           (not (server:ping (remote-server-url runremote))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
      (set! *runremote* (make-remote))
      (remote-force-server-set! runremote (common:force-server?))
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  6")
      (rmt:send-receive cmd rid params attemptnum: attemptnum))

     ;;DOT CASE7 [label="homehost\nwrite"];
     ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7};
     ;;DOT CASE7 -> "rmt:open-qry-close-locally";
     ;; on homehost and this is a write, we already have a server
     ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote))            ;; have a server
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4.1")
      (rmt:open-qry-close-locally cmd 0 params))

     ;;DOT CASE8 [label="force\nserver"];
     ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8};
     ;;DOT CASE8 -> "rmt:open-qry-close-locally";
     ;;  on homehost, no server contact made and this is a write, passively start a server 
     ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))           ;; have homehost
           (not (remote-server-url runremote))       ;; no connection yet
	   (not (member cmd api:read-only-queries))) ;; not a read-only query
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  8")
      (let ((server-url  (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
	(if server-url
	    (remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed
	    (if (common:force-server?)
		(server:start-and-wait *toppath*)
		(server:kind-run *toppath*))))
      (remote-force-server-set! runremote (common:force-server?))
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  8.1")
      (rmt:open-qry-close-locally cmd 0 params))

     ;;DOT CASE9 [label="force server\nnot on homehost"];
     ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9};
     ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive";
     ((or (and (remote-force-server runremote)              ;; we are forcing a server and don't yet have a connection to one
	       (not (remote-conndat runremote)))
	  (and (not (cdr (remote-hh-dat runremote)))        ;; not on a homehost 
	       (not (remote-conndat runremote))))           ;; and no connection
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
      (mutex-unlock! *rmt-mutex*)
      (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up?
	  (server:start-and-wait *toppath*))

      (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http
      (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as

     ;;DOT CASE10 [label="on homehost"];
     ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10};
     ;;DOT CASE10 -> "rmt:open-qry-close-locally";
     ;; all set up if get this far, dispatch the query
     ((and (not (remote-force-server runremote))
	   (cdr (remote-hh-dat runremote))) ;; we are on homehost
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10")
      (rmt:open-qry-close-locally cmd (if rid rid 0) params))

     ;;DOT CASE11 [label="send_receive"];
     ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
     ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
     ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
     ;; not on homehost, do server query
     (else
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9")
      (mutex-lock! *rmt-mutex*)
      (let* ((conninfo (remote-conndat runremote))
	     (dat      (case (remote-transport runremote)
198
199
200
201
202
203
204

205
206
207
208
209
210
211
212

213
214
215
216
217
218


219
220
221
222
223
224
225
	(if success ;; success only tells us that the transport was successful, have to examine the data to see if there was a detected issue at the other end
	    (if (and (vector? res)
		     (eq? (vector-length res) 2)
		     (eq? (vector-ref res 1) 'overloaded)) ;; since we are looking at the data to carry the error we'll use a fairly obtuse combo to minimise the chances of some sort of collision.
		(let ((wait-delay (+ attemptnum (* attemptnum 10))))
		  (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
		  (mutex-lock! *rmt-mutex*)

		  (set! *runremote* #f) ;; force starting over
		  (mutex-unlock! *rmt-mutex*)
		  (thread-sleep! wait-delay)
		  (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
		res) ;; All good, return res
	    (begin
	      (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
	      (remote-conndat-set!    runremote #f)

	      (remote-server-url-set! runremote #f)
	      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9.1")
	      (if (not (server:check-if-running *toppath*))
		  (server:start-and-wait *toppath*))
	      (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))))))))



;; (define (rmt:update-db-stats run-id rawcmd params duration)
;;   (mutex-lock! *db-stats-mutex*)
;;   (handle-exceptions
;;    exn
;;    (begin
;;      (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats")
;;      (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))







>








>


|
|


>
>







250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
	(if success ;; success only tells us that the transport was successful, have to examine the data to see if there was a detected issue at the other end
	    (if (and (vector? res)
		     (eq? (vector-length res) 2)
		     (eq? (vector-ref res 1) 'overloaded)) ;; since we are looking at the data to carry the error we'll use a fairly obtuse combo to minimise the chances of some sort of collision.
		(let ((wait-delay (+ attemptnum (* attemptnum 10))))
		  (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
		  (mutex-lock! *rmt-mutex*)
		  (http-transport:close-connections area-dat: runremote)
		  (set! *runremote* #f) ;; force starting over
		  (mutex-unlock! *rmt-mutex*)
		  (thread-sleep! wait-delay)
		  (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
		res) ;; All good, return res
	    (begin
	      (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
	      (remote-conndat-set!    runremote #f)
	      (http-transport:close-connections area-dat: runremote)
	      (remote-server-url-set! runremote #f)
	      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9.1")
	      ;; (if (not (server:check-if-running *toppath*))
	      ;; 	  (server:start-and-wait *toppath*))
	      (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))))))))

    ;;DOT }
    
;; (define (rmt:update-db-stats run-id rawcmd params duration)
;;   (mutex-lock! *db-stats-mutex*)
;;   (handle-exceptions
;;    exn
;;    (begin
;;      (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats")
;;      (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
751
752
753
754
755
756
757



758
759
760
761
762
763
764

;;======================================================================
;;  T E S T   D A T A 
;;======================================================================

(define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) 
  (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt)))




(define (rmt:testmeta-add-record testname)
  (rmt:send-receive 'testmeta-add-record #f (list testname)))

(define (rmt:testmeta-get-record testname)
  (rmt:send-receive 'testmeta-get-record #f (list testname)))








>
>
>







807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823

;;======================================================================
;;  T E S T   D A T A 
;;======================================================================

(define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) 
  (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt)))
(define (rmt:read-test-data* run-id test-id categorypatt varpatt #!key (work-area #f)) 
  (rmt:send-receive 'read-test-data* run-id (list run-id test-id categorypatt varpatt)))


(define (rmt:testmeta-add-record testname)
  (rmt:send-receive 'testmeta-add-record #f (list testname)))

(define (rmt:testmeta-get-record testname)
  (rmt:send-receive 'testmeta-get-record #f (list testname)))

Modified runs.scm from [cca5799598] to [2c1274be3a].

1266
1267
1268
1269
1270
1271
1272



1273
1274
1275
1276
1277
1278
1279
1280
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))
	(runs:dat-regfull-set! runsdat regfull)

	;; every 15 minutes verify the server is there for this run
	(if (and (common:low-noise-print 240 "try start server"  run-id)



		 (not (server:check-if-running *toppath*)))
	    (server:kind-run *toppath*))
	
	(if (> num-running 0)
	  (set! last-time-some-running (current-seconds)))

      (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
	  (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))







>
>
>
|







1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))
	(runs:dat-regfull-set! runsdat regfull)

	;; every 15 minutes verify the server is there for this run
	(if (and (common:low-noise-print 240 "try start server"  run-id)
		 (not (or (and *runremote*
			       (remote-server-url *runremote*)
			       (server:ping (remote-server-url *runremote*)))
			  (server:check-if-running *toppath*))))
	    (server:kind-run *toppath*))
	
	(if (> num-running 0)
	  (set! last-time-some-running (current-seconds)))

      (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
	  (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))

Modified server.scm from [a906848985] to [da462fcbd3].

208
209
210
211
212
213
214
215














216
217
218
219
220
221
222
				     res
				     (cons (append serv-rec (list pid)) res))))
		(if (null? tal)
		    (if (and limit
			     (> (length new-res) limit))
			new-res ;; (take new-res limit)  <= need intelligent sorting before this will work
			new-res)
		      (loop (car tal)(cdr tal) new-res)))))))))















;; given a list of servers get a list of valid servers, i.e. at least
;; 10 seconds old, has started and is less than 1 hour old and is
;; active (i.e. mod-time < 10 seconds
;;
;; mod-time host port start-time pid
;;







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>







208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
				     res
				     (cons (append serv-rec (list pid)) res))))
		(if (null? tal)
		    (if (and limit
			     (> (length new-res) limit))
			new-res ;; (take new-res limit)  <= need intelligent sorting before this will work
			new-res)
		    (loop (car tal)(cdr tal) new-res)))))))))

(define (server:get-num-alive srvlst)
  (let ((num-alive 0))
    (for-each
     (lambda (server)
       (match-let (((mod-time host port start-time pid)
		    server))
	 (let* ((uptime  (- (current-seconds) mod-time))
		(runtime (if start-time
			     (- mod-time start-time)
			     0)))
	   (if (< uptime 5)(set! num-alive (+ num-alive 1))))))
     srvlst)
    num-alive))

;; given a list of servers get a list of valid servers, i.e. at least
;; 10 seconds old, has started and is less than 1 hour old and is
;; active (i.e. mod-time < 10 seconds
;;
;; mod-time host port start-time pid
;;
450
451
452
453
454
455
456









































































     ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
     (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
    (string-intersperse 
     (map number->string
	  (u8vector->list
	   (if res res (hostname->ip hostname)))) ".")))

















































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
     ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
     (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
    (string-intersperse 
     (map number->string
	  (u8vector->list
	   (if res res (hostname->ip hostname)))) ".")))

;; moving this here as it needs access to db and cannot be in common.
;;
(define (server:writable-watchdog dbstruct)
  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync  (common:run-sync?))
	(debug-mode   (debug:debug-mode 1))
	(last-time    (current-seconds))
	(no-sync-db   (db:open-no-sync-db))
        (this-wd-num  (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x))))
    (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
    (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num)
    (if (and legacy-sync (not *time-to-exit*))
	(let* (;;(dbstruct (db:setup))
	       (mtdb     (dbr:dbstruct-mtdb dbstruct))
	       (mtpath   (db:dbdat-get-path mtdb)))
	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
	  (let loop ()
	    ;; sync for filesystem local db writes
	    ;;
	    (mutex-lock! *db-multi-sync-mutex*)
	    (let* ((need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
		   (sync-in-progress *db-sync-in-progress*)
		   (should-sync      (and (not *time-to-exit*)
                                          (> (- (current-seconds) *db-last-sync*) 5))) ;; sync every five seconds minimum
		   (start-time       (current-seconds))
		   (mt-mod-time      (file-modification-time mtpath))
		   (recently-synced  (< (- start-time mt-mod-time) 4))
		   (will-sync        (and (or need-sync should-sync)
					  (not sync-in-progress)
					  (not recently-synced))))
              (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop.  need-sync="need-sync" sync-in-progress="sync-in-progress" should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync)
	      ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
	      ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
	      (if will-sync (set! *db-sync-in-progress* #t))
	      (mutex-unlock! *db-multi-sync-mutex*)
	      (if will-sync
		  (let ((res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
		    (if (> res 0) ;; some records were transferred, keep the db alive
			(begin
			  (mutex-lock! *heartbeat-mutex*)
			  (set! *db-last-access* (current-seconds))
			  (mutex-unlock! *heartbeat-mutex*)
			  (debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
			(debug:print-info 2 *default-log-port* "sync called but zero records transferred"))))
	      (if will-sync
		  (begin
		    (mutex-lock! *db-multi-sync-mutex*)
		    (set! *db-sync-in-progress* #f)
		    (set! *db-last-sync* start-time)
		    (mutex-unlock! *db-multi-sync-mutex*)))
	      (if (and debug-mode
		       (> (- start-time last-time) 60))
		  (begin
		    (set! last-time start-time)
		    (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
	    
	    ;; keep going unless time to exit
	    ;;
	    (if (not *time-to-exit*)
		(let delay-loop ((count 0))
                  ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
                                                            
		  (if (and (not *time-to-exit*)
			   (< count 6)) ;; was 11, changing to 4. 
		      (begin
			(thread-sleep! 1)
			(delay-loop (+ count 1))))
		  (if (not *time-to-exit*) (loop))))
	    ;; time to exit, close the no-sync db here
	    (db:no-sync-close-db no-sync-db)
	    (if (common:low-noise-print 30)
		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" this-wd-num="this-wd-num)))))))

Modified tasks.scm from [7b85a80157] to [5e421a88af].

193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
    (setenv "TARGETHOST_LOGF" (conc logdir "server-kills.log"))

    (system (conc "nbfake kill "kill-switch" "pid))

    (when logfile
      (thread-sleep! 0.5)
      (if (file-exists? gzfile) (delete-file gzfile))
      (system (conc "gzip "logfile))
      
      (unsetenv "TARGETHOST_LOGF")
      (unsetenv "TARGETHOST"))))
    
 
;;======================================================================
;; M O N I T O R S







|







193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
    (setenv "TARGETHOST_LOGF" (conc logdir "server-kills.log"))

    (system (conc "nbfake kill "kill-switch" "pid))

    (when logfile
      (thread-sleep! 0.5)
      (if (file-exists? gzfile) (delete-file gzfile))
      (system (conc "gzip " logfile))
      
      (unsetenv "TARGETHOST_LOGF")
      (unsetenv "TARGETHOST"))))
    
 
;;======================================================================
;; M O N I T O R S

Modified tests.scm from [9a02d23357] to [9579ceb1ed].

418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444


445
446
447
448
449
450
451
    ;; (let ((val (hash-table-ref/default otherdat ":first_warn" #f)))
    ;;   (if val
    ;;       (sqlite3:execute db "UPDATE tests SET first_warn=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path)))

    (let ((category (hash-table-ref/default otherdat ":category" ""))
	  (variable (hash-table-ref/default otherdat ":variable" ""))
	  (value    (hash-table-ref/default otherdat ":value"    #f))
	  (expected (hash-table-ref/default otherdat ":expected" #f))
	  (tol      (hash-table-ref/default otherdat ":tol"      #f))
	  (units    (hash-table-ref/default otherdat ":units"    ""))
	  (type     (hash-table-ref/default otherdat ":type"     ""))
	  (dcomment (hash-table-ref/default otherdat ":comment"  "")))
      (debug:print 4 *default-log-port* 
		   "category: " category ", variable: " variable ", value: " value
		   ", expected: " expected ", tol: " tol ", units: " units)
      (if (and value expected tol) ;; all three required
	  (let ((dat (conc category ","
			   variable ","
			   value    ","
			   expected ","
			   tol      ","
			   units    ","
			   dcomment ",," ;; extra comma for status
			   type     )))
	    ;; This was run remote, don't think that makes sense. Perhaps not, but that is the easiest path for the moment.
	    (rmt:csv->test-data run-id test-id
				dat))))


      
    ;; need to update the top test record if PASS or FAIL and this is a subtest
    ;;;;;; (if (not (equal? item-path ""))
    ;;;;;;     (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status #f) ;;;;;)

    (if (or (and (string? comment)
		 (string-match (regexp "\\S+") comment))







|
|






|










|
>
>







418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
    ;; (let ((val (hash-table-ref/default otherdat ":first_warn" #f)))
    ;;   (if val
    ;;       (sqlite3:execute db "UPDATE tests SET first_warn=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path)))

    (let ((category (hash-table-ref/default otherdat ":category" ""))
	  (variable (hash-table-ref/default otherdat ":variable" ""))
	  (value    (hash-table-ref/default otherdat ":value"    #f))
	  (expected (hash-table-ref/default otherdat ":expected" "n/a"))
	  (tol      (hash-table-ref/default otherdat ":tol"      "n/a"))
	  (units    (hash-table-ref/default otherdat ":units"    ""))
	  (type     (hash-table-ref/default otherdat ":type"     ""))
	  (dcomment (hash-table-ref/default otherdat ":comment"  "")))
      (debug:print 4 *default-log-port* 
		   "category: " category ", variable: " variable ", value: " value
		   ", expected: " expected ", tol: " tol ", units: " units)
      (if (and value) ;; require only value; BB was- all three required
	  (let ((dat (conc category ","
			   variable ","
			   value    ","
			   expected ","
			   tol      ","
			   units    ","
			   dcomment ",," ;; extra comma for status
			   type     )))
	    ;; This was run remote, don't think that makes sense. Perhaps not, but that is the easiest path for the moment.
	    (rmt:csv->test-data run-id test-id
				dat)
            (thread-sleep! 10) ;; add 10 second delay before quit incase rmt needs time to start a server.
            )))
      
    ;; need to update the top test record if PASS or FAIL and this is a subtest
    ;;;;;; (if (not (equal? item-path ""))
    ;;;;;;     (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status #f) ;;;;;)

    (if (or (and (string? comment)
		 (string-match (regexp "\\S+") comment))

Modified tests/fdktestqa/testqa/megatest.config from [d32541500d] to [96a4d22c9d].

1
2
3
4
5
6
7


8
9
10
11
12
13
14
[setup]
testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. >> TEST_TARG_PATH/mt_launch.log 2>> TEST_TARG_PATH/mt_launch.log
# launchwait no
launch-delay 0

[server]
runtime 180



# All these are overridden in ../fdk.config
# [jobtools]
# launcher nbfake
# launcher bsub -q priority -o $MT_TEST_RUN_DIR/openlava.log 

[include ../fdk.config]



|


|
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
[setup]
testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. >> TEST_TARG_PATH/mt_launch.log 2>> TEST_TARG_PATH/mt_launch.log
# launchwait no
launch-delay 0.1

[server]
# runtime 180
# timeout is in hours, this is how long the server will stay alive when not being used.
timeout 0.1

# All these are overridden in ../fdk.config
# [jobtools]
# launcher nbfake
# launcher bsub -q priority -o $MT_TEST_RUN_DIR/openlava.log 

[include ../fdk.config]

Modified tests/fdktestqa/testqa/tests/bigrun2/testconfig from [ccc63b9335] to [37225a43b3].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# Add additional steps here. Format is "stepname script"
[ezsteps]
step1 step1.sh

# Test requirements are specified here
[requirements]
waiton bigrun
priority 0
mode itemwait
itemmap .*/

# Iteration for your tests are controlled by the items section
[items]
NUMBER #{scheme (string-intersperse (map (lambda (x)(conc "blah/" x)) \
                                         (map number->string (sort (let loop ((a 0)(res '())) \
                                                                        (if (<= a (or (any->number (get-environment-variable "NUMTESTS")) 2500)) \
                                                                            (loop (+ a 1)(cons a res)) res)) <))) " ")}


# test_meta is a section for storing additional data on your test
[test_meta]













|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# Add additional steps here. Format is "stepname script"
[ezsteps]
step1 step1.sh

# Test requirements are specified here
[requirements]
waiton bigrun
priority 0
mode itemwait
itemmap .*/

# Iteration for your tests are controlled by the items section
[items]
NUMBER #{scheme (string-intersperse (map (lambda (x)(conc (if (getenv "USEBLAH") "blah/" "") x)) \
                                         (map number->string (sort (let loop ((a 0)(res '())) \
                                                                        (if (<= a (or (any->number (get-environment-variable "NUMTESTS")) 2500)) \
                                                                            (loop (+ a 1)(cons a res)) res)) <))) " ")}


# test_meta is a section for storing additional data on your test
[test_meta]