Megatest

Check-in [f3be772e6c]
Login
Overview
Comment:Merged in choosesync branch
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.65
Files: files | file ages | folders
SHA1: f3be772e6ca62c38a11429c4c833cb038efe88e4
User & Date: mrwellan on 2019-06-18 17:09:13
Other Links: branch diff | manifest | tags
Context
2019-06-19
10:49
Changed the word failed to unable in message to aleviate need to add logpro rule check-in: b129805ac8 user: mrwellan tags: v1.65
2019-06-18
17:09
Merged in choosesync branch check-in: f3be772e6c user: mrwellan tags: v1.65
2019-06-14
15:16
removed some debug console prints Closed-Leaf check-in: 234608b6ff user: bjbarcla tags: v1.65-choosesync
2019-06-13
15:48
Updated megatest version check-in: 2cf2b7b144 user: jmoon18 tags: v1.65
Changes

Modified common.scm from [424526ac90] to [4de50f0bf6].

926
927
928
929
930
931
932





933





934
935
936
937
938
939
940
	    (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct)
	    (cond
	     ((dbr:dbstruct-read-only dbstruct)
	      (debug:print-info 13 *default-log-port* "loading read-only watchdog")
	      (common:readonly-watchdog dbstruct))
	     (else
	      (debug:print-info 13 *default-log-port* "loading writable-watchdog.")





	      (server:writable-watchdog dbstruct)))





	    (debug:print-info 13 *default-log-port* "watchdog done."))
	  (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))


(define (std-exit-procedure)
  ;;(common:telemetry-log-close)
  (on-exit (lambda () 0))







>
>
>
>
>
|
>
>
>
>
>







926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
	    (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct)
	    (cond
	     ((dbr:dbstruct-read-only dbstruct)
	      (debug:print-info 13 *default-log-port* "loading read-only watchdog")
	      (common:readonly-watchdog dbstruct))
	     (else
	      (debug:print-info 13 *default-log-port* "loading writable-watchdog.")
              (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "brute-force-sync")))
                (cond
                 ((equal? syncer "brute-force-sync")
                  (server:writable-watchdog-bruteforce dbstruct))
                 ((equal? syncer "delta-sync")
                  (server:writable-watchdog-deltasync dbstruct))
                 (else
                  (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.")
                  (exit 1)))
                ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")")
                )))
	    (debug:print-info 13 *default-log-port* "watchdog done."))
	  (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))


(define (std-exit-procedure)
  ;;(common:telemetry-log-close)
  (on-exit (lambda () 0))

Modified db.scm from [7a0a4d3aa1] to [8e016daa09].

226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251

252
253
254
255
256
257
258
259
                (readyfname  (conc parent-dir "/.ready-" raw-fname))
                (readyexists (common:file-exists? readyfname)))
           (if (not readyexists)
               (common:simple-file-lock-and-wait lockfname))
           (let ((db      (sqlite3:open-database fname)))
             (sqlite3:set-busy-handler! db (make-busy-timeout 136000))
             (sqlite3:execute db "PRAGMA synchronous = 0;")
             (if (not file-exists)
                 (begin
                   (if (and (configf:lookup *configdat* "setup" "tmp_mode") (string-match "^/tmp/.*" fname))
                      (begin
                       (print "DEBUG: Setting tmp_mode for " fname) 
                       (sqlite3:execute db (configf:lookup *configdat* "setup" "tmp_mode"))
                      )
                   )  
                   (if (and (configf:lookup *configdat* "setup" "nfs_mode") (not (string-match "^/tmp/.*" fname)))
                       (begin
                        (print "DEBUG: Setting nfs_mode for " fname)
                        (sqlite3:execute db (configf:lookup *configdat* "setup" "nfs_mode"))
                       )
                   )  
                   (if (and (not (or (configf:lookup *configdat* "setup" "tmp_mode") (configf:lookup *configdat* "setup" "nfs_mode")))  
                            (configf:lookup *configdat* "setup" "use-wal")
                            (string-match "^/tmp/.*" fname)) ;; this is a file in /tmp
                       (sqlite3:execute db "PRAGMA journal_mode=WAL;")
                       (debug:print 2 *default-log-port* "Creating " fname " in NON-WAL mode."))

                   (initproc db)))
             (if (not readyexists)
                 (begin
                   (common:simple-file-release-lock lockfname)
                   (with-output-to-file
                       readyfname
                     (lambda ()
                       (print "Ready at " 







<
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
|







226
227
228
229
230
231
232


233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
                (readyfname  (conc parent-dir "/.ready-" raw-fname))
                (readyexists (common:file-exists? readyfname)))
           (if (not readyexists)
               (common:simple-file-lock-and-wait lockfname))
           (let ((db      (sqlite3:open-database fname)))
             (sqlite3:set-busy-handler! db (make-busy-timeout 136000))
             (sqlite3:execute db "PRAGMA synchronous = 0;")


             (if (and (configf:lookup *configdat* "setup" "tmp_mode") (string-match "^/tmp/.*" fname))
                 (begin
                   ;;(print "DEBUG: Setting tmp_mode for " fname) 
                   (sqlite3:execute db (configf:lookup *configdat* "setup" "tmp_mode"))
                   )
                 )  
             (if (and (configf:lookup *configdat* "setup" "nfs_mode") (not (string-match "^/tmp/.*" fname)))
                 (begin
                   ;;(print "DEBUG: Setting nfs_mode for " fname)
                   (sqlite3:execute db (configf:lookup *configdat* "setup" "nfs_mode"))
                   )
                 )  
             (if (and (not (or (configf:lookup *configdat* "setup" "tmp_mode") (configf:lookup *configdat* "setup" "nfs_mode")))  
                      (configf:lookup *configdat* "setup" "use-wal")
                      (string-match "^/tmp/.*" fname)) ;; this is a file in /tmp
                 (sqlite3:execute db "PRAGMA journal_mode=WAL;")
                 (debug:print 2 *default-log-port* "Creating " fname " in NON-WAL mode."))
             (if (not file-exists)
                 (initproc db))
             (if (not readyexists)
                 (begin
                   (common:simple-file-release-lock lockfname)
                   (with-output-to-file
                       readyfname
                     (lambda ()
                       (print "Ready at " 

Modified server.scm from [868645b72c] to [78810e8804].

591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615









616


























































































































                    ))
              (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres)
              finalres)
            ) ;; end lambda
          ))
    do-a-sync))

(define (server:writable-watchdog dbstruct)
  (thread-sleep! 1) ;; delay for startup
  (let* ((do-a-sync  (server:get-bruteforce-syncer dbstruct))
         (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t)))
    (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync
	       (args:get-arg "-server"))
      
      (let loop ()
	(do-a-sync)
        (if (not *time-to-exit*) (loop))) ;; keep going unless time to exit

      ;; time to exit, close the no-sync db here
      (final-sync)

      (if (common:low-noise-print 30)
	  (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)
			    )))))












































































































































|

















>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
                    ))
              (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres)
              finalres)
            ) ;; end lambda
          ))
    do-a-sync))

(define (server:writable-watchdog-bruteforce dbstruct)
  (thread-sleep! 1) ;; delay for startup
  (let* ((do-a-sync  (server:get-bruteforce-syncer dbstruct))
         (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t)))
    (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync
	       (args:get-arg "-server"))
      
      (let loop ()
	(do-a-sync)
        (if (not *time-to-exit*) (loop))) ;; keep going unless time to exit

      ;; time to exit, close the no-sync db here
      (final-sync)

      (if (common:low-noise-print 30)
	  (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)
			    )))))

(define (server:writable-watchdog-deltasync dbstruct)
  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync  (common:run-sync?))
        (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
	(debug-mode   (debug:debug-mode 1))
	(last-time    (current-seconds))
	(no-sync-db   (db:open-no-sync-db))
        (sync-duration 0) ;; run time of the sync in milliseconds
        ;;(this-wd-num  (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))
        )
    (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
    (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
    (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)  );;  " this-wd-num="this-wd-num)
    (if (and legacy-sync (not *time-to-exit*))
	(let* (;;(dbstruct (db:setup))
	       (mtdb       (dbr:dbstruct-mtdb dbstruct))
	       (mtpath     (db:dbdat-get-path mtdb))
	       (tmp-area   (common:get-db-tmp-area))
	       (start-file (conc tmp-area "/.start-sync"))
	       (end-file   (conc tmp-area "/.end-sync")))
	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
	  (let loop ()
	    ;; sync for filesystem local db writes
	    ;;
	    (mutex-lock! *db-multi-sync-mutex*)
	    (let* ((need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
		   (sync-in-progress *db-sync-in-progress*)
                   (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5))
		   (should-sync      (and (not *time-to-exit*)
                                          (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed
		   (start-time       (current-seconds))
                   (cpu-load-adj     (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)))
		   (mt-mod-time      (file-modification-time mtpath))
		   (last-sync-start  (if (common:file-exists? start-file)
					 (file-modification-time start-file)
					 0))
		   (last-sync-end    (if (common:file-exists? end-file)
					 (file-modification-time end-file)
					 10))
                   (sync-period      (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period
		   (recently-synced  (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db!
					  (< mt-mod-time last-sync-start)))
		   (sync-done        (<= last-sync-start last-sync-end))
                   (sync-stale       (> start-time (+ last-sync-start sync-stale-seconds)))
		   (will-sync        (and (not *time-to-exit*)       ;; do not start a sync if we are in the process of exiting
                                          (or need-sync should-sync)
					  (or sync-done sync-stale)
					  (not sync-in-progress)
					  (not recently-synced))))
              (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop.  need-sync="need-sync" sync-in-progress=" sync-in-progress
				" should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync
				" sync-done=" sync-done " sync-period=" sync-period)
              (if (and (> sync-period 5)
                       (common:low-noise-print 30 "sync-period"))
                  (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds."))
	      ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
	      ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
	      (if will-sync (set! *db-sync-in-progress* #t))
	      (mutex-unlock! *db-multi-sync-mutex*)
	      (if will-sync
                  (let (;; (max-sync-duration  (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK!
                        (sync-start         (current-milliseconds)))
		    (with-output-to-file start-file (lambda ()(print (current-process-id))))
		    
		    ;; put lock here
		    
                    ;; (if (or (not max-sync-duration)
                    ;;        (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally
                        (let ((res        (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
                          (set! sync-duration (- (current-milliseconds) sync-start))
                          (if (> res 0) ;; some records were transferred, keep the db alive
                              (begin
                                (mutex-lock! *heartbeat-mutex*)
                                (set! *db-last-access* (current-seconds))
                                (mutex-unlock! *heartbeat-mutex*)
                                (debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
                              (debug:print-info 2 *default-log-port* "sync called but zero records transferred")))))
;;                         ;; TODO: factor this next routine out into a function
;;                         (with-input-from-pipe ;; this should not block other threads but need to verify this
;;                          (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*)
;;                          (lambda ()
;;                            (let loop ((inl (read-line))
;;                                       (res #f))
;;                              (if (eof-object? inl)
;;                                  (begin
;;                                    (set! sync-duration (- (current-milliseconds) sync-start))
;;                                    (cond
;;                                     ((not res)
;;                                      (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\""))
;;                                     ((> res 0)
;;                                      (mutex-lock! *heartbeat-mutex*)
;;                                      (set! *db-last-access* (current-seconds))
;;                                      (mutex-unlock! *heartbeat-mutex*))))
;;                                  (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl)))
;;                                                      (if matches
;;                                                          (string->number (cadr matches))
;;                                                          #f))))
;;                                    (loop (read-line)
;;                                          (or num-synced res))))))))))
	      (if will-sync
		  (begin
		    (mutex-lock! *db-multi-sync-mutex*)
		    (set! *db-sync-in-progress* #f)
		    (set! *db-last-sync* start-time)
		    (with-output-to-file end-file (lambda ()(print (current-process-id))))

		    ;; release lock here

		    (mutex-unlock! *db-multi-sync-mutex*)))
	      (if (and debug-mode
		       (> (- start-time last-time) 60))
		  (begin
		    (set! last-time start-time)
		    (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
	    
	    ;; keep going unless time to exit
	    ;;
	    (if (not *time-to-exit*)
		(let delay-loop ((count 0))
                  ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
                                                            
		  (if (and (not *time-to-exit*)
			   (< count 6)) ;; was 11, changing to 4. 
		      (begin
			(thread-sleep! 1)
			(delay-loop (+ count 1))))
		  (if (not *time-to-exit*) (loop))))
	    ;; time to exit, close the no-sync db here
	    (db:no-sync-close-db no-sync-db)
	    (if (common:low-noise-print 30)
		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num)))))))