Megatest

Check-in [7e69485459]
Login
Overview
Comment:merged with latest v1.63
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.63-configdbsync
Files: files | file ages | folders
SHA1: 7e694854593a05cefa2d52deaaca276d82b9b1d8
User & Date: srehman on 2017-01-03 14:03:13
Other Links: branch diff | manifest | tags
Context
2017-01-09
12:04
updated to latest v1.63, WIP check-in: 8e63364af0 user: srehman tags: v1.63-configdbsync
2017-01-03
14:03
merged with latest v1.63 check-in: 7e69485459 user: srehman tags: v1.63-configdbsync
10:50
Commented out a call to BB> that seemed to have trouble ... check-in: e26b0892dd user: mrwellan tags: v1.63
2016-12-23
12:28
added ability to sync delta changes with configdb check-in: 57bb8e5d33 user: srehman tags: v1.63-configdbsync
Changes

Modified api.scm from [19e6c44e9d] to [1debddd502].

60
61
62
63
64
65
66


67
68
69
70
71
72
73
    testmeta-get-record
    have-incompletes?
    synchash-get
    ))

(define api:write-queries
  '(


    ;; SERVERS
    start-server
    kill-server

    ;; TESTS
    test-set-state-status-by-id
    delete-test-records







>
>







60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
    testmeta-get-record
    have-incompletes?
    synchash-get
    ))

(define api:write-queries
  '(
    get-keys-write ;; dummy "write" query to force server start

    ;; SERVERS
    start-server
    kill-server

    ;; TESTS
    test-set-state-status-by-id
    delete-test-records
187
188
189
190
191
192
193

194
195
196
197
198
199
200
	    ;;======================================================================
	    ;; READ ONLY QUERIES
	    ;;======================================================================

	    ;; KEYS
	    ((get-key-val-pairs)               (apply db:get-key-val-pairs dbstruct params))
	    ((get-keys)                        (db:get-keys dbstruct))

	    ((get-key-vals)                    (apply db:get-key-vals dbstruct params))
	    ((get-target)                      (apply db:get-target dbstruct params))
	    ((get-targets)                     (db:get-targets dbstruct))

	    ;; ARCHIVES
	    ((test-get-archive-block-info)     (apply db:test-get-archive-block-info dbstruct params))
	    







>







189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
	    ;;======================================================================
	    ;; READ ONLY QUERIES
	    ;;======================================================================

	    ;; KEYS
	    ((get-key-val-pairs)               (apply db:get-key-val-pairs dbstruct params))
	    ((get-keys)                        (db:get-keys dbstruct))
            ((get-keys-write)                        (db:get-keys dbstruct)) ;; force a dummy "write" query to force server
	    ((get-key-vals)                    (apply db:get-key-vals dbstruct params))
	    ((get-target)                      (apply db:get-target dbstruct params))
	    ((get-targets)                     (db:get-targets dbstruct))

	    ;; ARCHIVES
	    ((test-get-archive-block-info)     (apply db:test-get-archive-block-info dbstruct params))
	    

Modified common.scm from [7213eed61f] to [d2df0eaa3f].

551
552
553
554
555
556
557
558
559
560
561
562





563
564
565
566

567
568
569
570


571
572
573
574
575

576
577
578
579
580
581

582
583
584
585
586
587
588
589
;;;; run-ids
;;    if #f use *db-local-sync* : or 'local-sync-flags
;;    if #t use timestamps      : or 'timestamps
(define (common:sync-to-megatest.db dbstruct) 
  (let ((start-time         (current-seconds))
	(res                (db:multi-db-sync dbstruct 'new2old)))
    (let ((sync-time (- (current-seconds) start-time)))
      (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds")
      (if (common:low-noise-print 30 "sync new to old")
	  (debug:print-info 0 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds")))
    res))






;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
;;
(define (common:watchdog)

  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync (common:run-sync?))
	(debug-mode  (debug:debug-mode 1))
	(last-time   (current-seconds)))


    (debug:print-info 0 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync)
    (if legacy-sync
	(let ((dbstruct (db:setup)))
	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
	  (let loop ()

	    ;; sync for filesystem local db writes
	    ;;
	    (mutex-lock! *db-multi-sync-mutex*)
	    (let* (
                   (need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
		   (sync-in-progress *db-sync-in-progress*)

		   (should-sync      (> (- (current-seconds) *db-last-sync*) 5)) ;; sync every five seconds minimum
		   (will-sync        (and (or need-sync should-sync)
					  (not sync-in-progress)))
		   (start-time       (current-seconds)))
	      ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
	      (if will-sync (set! *db-sync-in-progress* #t))
	      (mutex-unlock! *db-multi-sync-mutex*)
	      (if will-sync







|

|


>
>
>
>
>




>



|
>
>
|
|



>



<
|

>
|







551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587

588
589
590
591
592
593
594
595
596
597
598
;;;; run-ids
;;    if #f use *db-local-sync* : or 'local-sync-flags
;;    if #t use timestamps      : or 'timestamps
(define (common:sync-to-megatest.db dbstruct) 
  (let ((start-time         (current-seconds))
	(res                (db:multi-db-sync dbstruct 'new2old)))
    (let ((sync-time (- (current-seconds) start-time)))
      (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id))
      (if (common:low-noise-print 30 "sync new to old")
	  (debug:print-info 0 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id))))
    res))




(define *wdnum* 0)
(define *wdnum*mutex (make-mutex))
;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
;;
(define (common:watchdog)
  
  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync (common:run-sync?))
	(debug-mode  (debug:debug-mode 1))
	(last-time   (current-seconds))
        (this-wd-num     (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))
        )
    (debug:print-info 0 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num)
    (if (and legacy-sync (not *time-to-exit*))
	(let ((dbstruct (db:setup)))
	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
	  (let loop ()
            (BB> "watchdog loop.  pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
	    ;; sync for filesystem local db writes
	    ;;
	    (mutex-lock! *db-multi-sync-mutex*)

	    (let* ((need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
		   (sync-in-progress *db-sync-in-progress*)
		   (should-sync      (and (not *time-to-exit*)
                                          (> (- (current-seconds) *db-last-sync*) 5))) ;; sync every five seconds minimum
		   (will-sync        (and (or need-sync should-sync)
					  (not sync-in-progress)))
		   (start-time       (current-seconds)))
	      ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
	      (if will-sync (set! *db-sync-in-progress* #t))
	      (mutex-unlock! *db-multi-sync-mutex*)
	      (if will-sync
607
608
609
610
611
612
613


614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
		    (set! last-time start-time)
		    (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
	    
	    ;; keep going unless time to exit
	    ;;
	    (if (not *time-to-exit*)
		(let delay-loop ((count 0))


		  (if (and (not *time-to-exit*)
			   (< count 4)) ;; was 11, changing to 4. 
		      (begin
			(thread-sleep! 1)
			(delay-loop (+ count 1))))
		  (loop)))
	    (if (common:low-noise-print 30)
		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*)))))))

(define (std-exit-procedure)
  (on-exit (lambda () 0))
  (let ((no-hurry  (if *time-to-exit* ;; hurry up
		       #f
		       (begin
			 (set! *time-to-exit* #t)







>
>





|

|







616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
		    (set! last-time start-time)
		    (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
	    
	    ;; keep going unless time to exit
	    ;;
	    (if (not *time-to-exit*)
		(let delay-loop ((count 0))
                  (BB> "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
                                                            
		  (if (and (not *time-to-exit*)
			   (< count 4)) ;; was 11, changing to 4. 
		      (begin
			(thread-sleep! 1)
			(delay-loop (+ count 1))))
		  (if (not *time-to-exit*) (loop))))
	    (if (common:low-noise-print 30)
		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" this-wd-num="this-wd-num)))))))

(define (std-exit-procedure)
  (on-exit (lambda () 0))
  (let ((no-hurry  (if *time-to-exit* ;; hurry up
		       #f
		       (begin
			 (set! *time-to-exit* #t)
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
					  (sqlite3:interrupt! db)
					  (sqlite3:finalize! db #t)
					  ;; (vector-set! *task-db* 0 #f)
					  (set! *task-db* #f)))))
                              (if (and *runremote*
                                       (remote-conndat *runremote*))
                                  (begin
                                    (close-all-connections!))) ;; for http-client
                              (if (not (eq? *default-log-port* (current-error-port)))
                                  (close-output-port *default-log-port*))
			      (set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))
	  (th2 (make-thread (lambda ()
			      (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...")
			      (if no-hurry
                                  (begin







|







650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
					  (sqlite3:interrupt! db)
					  (sqlite3:finalize! db #t)
					  ;; (vector-set! *task-db* 0 #f)
					  (set! *task-db* #f)))))
                              (if (and *runremote*
                                       (remote-conndat *runremote*))
                                  (begin
                                    (http-client#close-all-connections!))) ;; for http-client
                              (if (not (eq? *default-log-port* (current-error-port)))
                                  (close-output-port *default-log-port*))
			      (set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))
	  (th2 (make-thread (lambda ()
			      (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...")
			      (if no-hurry
                                  (begin

Modified http-transport.scm from [ec36961174] to [285317a3d3].

484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
;; (if (and (<= rem-time 4)
;; 	     (> rem-time 0))
;; 	(thread-sleep! rem-time)
;; 	(thread-sleep! 4))) ;; fallback for if the math is changed ...

(define (http-transport:server-shutdown server-id port)
  (let ((tdbdat (tasks:open-db)))
    (debug:print-info 0 *default-log-port* "Starting to shutdown the server.")
    ;;
    ;; start_shutdown
    ;;
    (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down")
    (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up
    (portlogger:open-run-close portlogger:set-port port "released")
    (thread-sleep! 5)







|







484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
;; (if (and (<= rem-time 4)
;; 	     (> rem-time 0))
;; 	(thread-sleep! rem-time)
;; 	(thread-sleep! 4))) ;; fallback for if the math is changed ...

(define (http-transport:server-shutdown server-id port)
  (let ((tdbdat (tasks:open-db)))
    (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
    ;;
    ;; start_shutdown
    ;;
    (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down")
    (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up
    (portlogger:open-run-close portlogger:set-port port "released")
    (thread-sleep! 5)

Modified megatest.scm from [ea009aa7f2] to [e7342b90ae].

348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
  (if targ (setenv "MT_TARGET" targ)))

;; The watchdog is to keep an eye on things like db sync etc.
;;
(define *watchdog* (make-thread common:watchdog "Watchdog thread"))

(thread-start! *watchdog*)

(if (args:get-arg "-log")
    (let ((oup (open-output-file (args:get-arg "-log"))))
      (debug:print-info 0 *default-log-port* "Sending log output to " (args:get-arg "-log"))
      (set! *default-log-port* oup)))

(if (or (args:get-arg "-h")
	(args:get-arg "-help")







|







348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
  (if targ (setenv "MT_TARGET" targ)))

;; The watchdog is to keep an eye on things like db sync etc.
;;
(define *watchdog* (make-thread common:watchdog "Watchdog thread"))

(thread-start! *watchdog*)
;; (BB> "thread-start! watchdog")
(if (args:get-arg "-log")
    (let ((oup (open-output-file (args:get-arg "-log"))))
      (debug:print-info 0 *default-log-port* "Sending log output to " (args:get-arg "-log"))
      (set! *default-log-port* oup)))

(if (or (args:get-arg "-h")
	(args:get-arg "-help")
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008

;;======================================================================
;; Exit and clean up
;;======================================================================

(if (not *didsomething*)
    (debug:print 0 *default-log-port* help))

(thread-join! *watchdog*)
(set! *time-to-exit* #t)

(if (not (eq? *globalexitstatus* 0))
    (if (or (args:get-arg "-run")(args:get-arg "-runtests")(args:get-arg "-runall"))
        (begin
           (debug:print 0 *default-log-port* "NOTE: Subprocesses with non-zero exit code detected: " *globalexitstatus*)
           (exit 0))
        (case *globalexitstatus*
         ((0)(exit 0))
         ((1)(exit 1))
         ((2)(exit 2))
         (else (exit 3)))))







|













1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008

;;======================================================================
;; Exit and clean up
;;======================================================================

(if (not *didsomething*)
    (debug:print 0 *default-log-port* help))
(BB> "thread-join! watchdog")
(thread-join! *watchdog*)
(set! *time-to-exit* #t)

(if (not (eq? *globalexitstatus* 0))
    (if (or (args:get-arg "-run")(args:get-arg "-runtests")(args:get-arg "-runall"))
        (begin
           (debug:print 0 *default-log-port* "NOTE: Subprocesses with non-zero exit code detected: " *globalexitstatus*)
           (exit 0))
        (case *globalexitstatus*
         ((0)(exit 0))
         ((1)(exit 1))
         ((2)(exit 2))
         (else (exit 3)))))

Modified rmt.scm from [7e5abf90aa] to [c70f311cf0].

84
85
86
87
88
89
90











91
92
93
94
95
96
97


98
99
100
101
102
103


104
105
106
107
108
109
110
      (rmt:send-receive cmd rid params attemptnum: attemptnum))
     ;; on homehost and this is a read
     ((and (cdr (remote-hh-dat *runremote*))   ;; on homehost
           (member cmd api:read-only-queries)) ;; this is a read
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  3")
      (rmt:open-qry-close-locally cmd 0 params))











     ;; on homehost and this is a write, we already have a server
     ((and (cdr (remote-hh-dat *runremote*))         ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url *runremote*))          ;; have a server
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4")
      (rmt:open-qry-close-locally cmd 0 params))


     ;; on homehost and this is a write, we have a server (we know because case 4 checked)
     ((and (cdr (remote-hh-dat *runremote*))         ;; on homehost
	   (not (member cmd api:read-only-queries)))
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4.1")
      (rmt:open-qry-close-locally cmd 0 params))


     ;; no server contact made and this is a write, passively start a server 
     ((and (not (remote-server-url *runremote*))
	   (not (member cmd api:read-only-queries)))
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5")
      (let ((serverconn (server:read-dotserver *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
	(if serverconn
	    (remote-server-url-set! *runremote* serverconn) ;; the string can be consumed by the client setup if needed







>
>
>
>
>
>
>
>
>
>
>







>
>
|
|
|
|
|
|
>
>







84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
      (rmt:send-receive cmd rid params attemptnum: attemptnum))
     ;; on homehost and this is a read
     ((and (cdr (remote-hh-dat *runremote*))   ;; on homehost
           (member cmd api:read-only-queries)) ;; this is a read
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  3")
      (rmt:open-qry-close-locally cmd 0 params))

     ;; on homehost and this is a write, we already have a server, but server has died
     ((and (cdr (remote-hh-dat *runremote*))         ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url *runremote*)           ;; have a server
           (not (server:read-dotserver *toppath*)))  ;; server has died.
      (set! *runremote* #f)
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4.1")
      (rmt:send-receive cmd rid params attemptnum: attemptnum))

     ;; on homehost and this is a write, we already have a server
     ((and (cdr (remote-hh-dat *runremote*))         ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url *runremote*))          ;; have a server
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4")
      (rmt:open-qry-close-locally cmd 0 params))

     ;; commented by bb; this was blocking server passive start on write on homehost (case 5)
     ;; ;; on homehost and this is a write, we have a server (we know because case 4 checked)
     ;; ((and (cdr (remote-hh-dat *runremote*))         ;; on homehost
     ;;       (not (member cmd api:read-only-queries)))
     ;;  (mutex-unlock! *rmt-mutex*)
     ;;  (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4.1")
     ;;  (rmt:open-qry-close-locally cmd 0 params))

     
     ;; no server contact made and this is a write, passively start a server 
     ((and (not (remote-server-url *runremote*))
	   (not (member cmd api:read-only-queries)))
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5")
      (let ((serverconn (server:read-dotserver *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
	(if serverconn
	    (remote-server-url-set! *runremote* serverconn) ;; the string can be consumed by the client setup if needed
347
348
349
350
351
352
353





354
355
356
357
358
359
360
  (rmt:send-receive 'get-key-val-pairs run-id (list run-id)))

(define (rmt:get-keys)
  (if *db-keys* *db-keys* 
     (let ((res (rmt:send-receive 'get-keys #f '())))
       (set! *db-keys* res)
       res)))






;; we don't reuse run-id's (except possibly *after* a db cleanup) so it is safe
;; to cache the resuls in a hash
;;
(define (rmt:get-key-vals run-id)
  (or (hash-table-ref/default *keyvals* run-id #f)
      (let ((res (rmt:send-receive 'get-key-vals #f (list run-id))))







>
>
>
>
>







362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
  (rmt:send-receive 'get-key-val-pairs run-id (list run-id)))

(define (rmt:get-keys)
  (if *db-keys* *db-keys* 
     (let ((res (rmt:send-receive 'get-keys #f '())))
       (set! *db-keys* res)
       res)))

(define (rmt:get-keys-write) ;; dummy query to force server start
  (let ((res (rmt:send-receive 'get-keys-write #f '())))
    (set! *db-keys* res)
    res))

;; we don't reuse run-id's (except possibly *after* a db cleanup) so it is safe
;; to cache the resuls in a hash
;;
(define (rmt:get-key-vals run-id)
  (or (hash-table-ref/default *keyvals* run-id #f)
      (let ((res (rmt:send-receive 'get-key-vals #f (list run-id))))