Megatest

Check-in [f6bba6dbbb]
Login
Overview
Comment:wip
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.7001-multi-db-02
Files: files | file ages | folders
SHA1: f6bba6dbbb7f1e53db7734d57c3ea2121c1381d6
User & Date: matt on 2022-03-12 20:28:07
Other Links: branch diff | manifest | tags
Context
2022-03-13
20:27
wip check-in: cd75b861ad user: matt tags: v1.7001-multi-db-02
2022-03-12
20:28
wip check-in: f6bba6dbbb user: matt tags: v1.7001-multi-db-02
15:33
Got enough in place to run db:get-keys check-in: dc9845bf12 user: matt tags: v1.7001-multi-db-02
Changes

Modified Makefile from [ea4c347c2d] to [d5e6715cc1].

26
27
28
29
30
31
32
33




34
35
36
37
38
39
40
           process.scm runs.scm tasks.scm tests.scm genexample.scm	\
           http-transport.scm filedb.scm tdb.scm client.scm mt.scm	\
           ezsteps.scm lock-queue.scm sdb.scm rmt.scm api.scm		\
           subrun.scm portlogger.scm archive.scm env.scm		\
           diff-report.scm cgisetup/models/pgdb.scm

# module source files
MSRCFILES = dbmod.scm dbfile.scm




# ftail.scm rmtmod.scm commonmod.scm removed
# MSRCFILES = ducttape-lib.scm pkts.scm stml2.scm cookie.scm mutils.scm	\
#             mtargs.scm commonmod.scm dbmod.scm adjutant.scm ulex.scm	\
#             rmtmod.scm apimod.scm

GUISRCF = dashboard-context-menu.scm dashboard-tests.scm		\
          dashboard-guimonitor.scm gutils.scm dcommon.scm tree.scm	\







|
>
>
>
>







26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
           process.scm runs.scm tasks.scm tests.scm genexample.scm	\
           http-transport.scm filedb.scm tdb.scm client.scm mt.scm	\
           ezsteps.scm lock-queue.scm sdb.scm rmt.scm api.scm		\
           subrun.scm portlogger.scm archive.scm env.scm		\
           diff-report.scm cgisetup/models/pgdb.scm

# module source files
MSRCFILES = dbmod.scm dbfile.scm # debugprint.scm mtargs.scm

# mofiles/dbfile.o     : mofiles/debugprint.o
# mofiles/debugprint.o : mofiles/mtargs.o

# ftail.scm rmtmod.scm commonmod.scm removed
# MSRCFILES = ducttape-lib.scm pkts.scm stml2.scm cookie.scm mutils.scm	\
#             mtargs.scm commonmod.scm dbmod.scm adjutant.scm ulex.scm	\
#             rmtmod.scm apimod.scm

GUISRCF = dashboard-context-menu.scm dashboard-tests.scm		\
          dashboard-guimonitor.scm gutils.scm dcommon.scm tree.scm	\
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
tests.o db.o launch.o runs.o dashboard-tests.o				\
dashboard-context-menu.o dashboard-guimonitor.o dashboard-main.o	\
monitor.o dashboard.o archive.o megatest.o : db_records.scm megatest-fossil-hash.scm

tests.o runs.o dashboard.o dashboard-tests.o dashboard-context-menu.o dashboard-main.o  : run_records.scm

db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm
db.o api.o : mofiles/dbmod.o mofiles/dbfile.o

tests.o tasks.o dashboard-tasks.o : task_records.scm

runs.o : test_records.scm

megatest.o : megatest-fossil-hash.scm megatest-version.scm








|







164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
tests.o db.o launch.o runs.o dashboard-tests.o				\
dashboard-context-menu.o dashboard-guimonitor.o dashboard-main.o	\
monitor.o dashboard.o archive.o megatest.o : db_records.scm megatest-fossil-hash.scm

tests.o runs.o dashboard.o dashboard-tests.o dashboard-context-menu.o dashboard-main.o  : run_records.scm

db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm
db.o api.o : mofiles/dbmod.o mofiles/dbfile.o mofiles/debugprint.o

tests.o tasks.o dashboard-tasks.o : task_records.scm

runs.o : test_records.scm

megatest.o : megatest-fossil-hash.scm megatest-version.scm

Modified archive.scm from [a7e538df23] to [9231707c41].

395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
   (let* ((bup-exe               (or (configf:lookup *configdat* "archive" "bup") "bup"))
         (archive-internal-path (conc (common:get-testsuite-name) "-megatest-db/" ts "/megatest.db" ))
         (bup-restore-params  (list "-d" archive-path "restore" "-C" *toppath* archive-internal-path)))
		 (debug:print-info 2 *default-log-port* "Restoring archived data to " *toppath* " from archive in " archive-path " ... " archive-internal-path)
		 (run-n-wait bup-exe params: bup-restore-params print-cmd: "Running:"))
      (sleep 2)
      (db:multi-db-sync 
       (db:setup-db *dbstruct-dbs* *toppath* #f)
       'killservers
       ;'dejunk
       ;'adj-testids
       'old2new
       )
      (debug:print-info 1 *default-log-port* "dropping triggers to update linktree") 
      (rmt:drop-all-triggers)







|







395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
   (let* ((bup-exe               (or (configf:lookup *configdat* "archive" "bup") "bup"))
         (archive-internal-path (conc (common:get-testsuite-name) "-megatest-db/" ts "/megatest.db" ))
         (bup-restore-params  (list "-d" archive-path "restore" "-C" *toppath* archive-internal-path)))
		 (debug:print-info 2 *default-log-port* "Restoring archived data to " *toppath* " from archive in " archive-path " ... " archive-internal-path)
		 (run-n-wait bup-exe params: bup-restore-params print-cmd: "Running:"))
      (sleep 2)
      (db:multi-db-sync 
       (db:setup #t) ;; (db:setup-db *dbstruct-dbs* *toppath* #f)
       'killservers
       ;'dejunk
       ;'adj-testids
       'old2new
       )
      (debug:print-info 1 *default-log-port* "dropping triggers to update linktree") 
      (rmt:drop-all-triggers)

Modified common.scm from [85af3678ab] to [2307821bb2].

591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
;;
(define (common:exit-on-version-changed)
  (if (common:on-homehost?)
      (if (common:api-changed?)
	  (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))
                 (dbfile  (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db"))
                 (read-only (not (file-write-access? dbfile)))
                 (dbstruct (db:setup-db *dbstruct-dbs* *toppath* #f))) ;;  #t)))
	    (debug:print 0 *default-log-port*
			 "WARNING: Version mismatch!\n"
			 "   expected: " (common:version-signature) "\n"
			 "   got:      " (common:get-last-run-version))
            (cond
             ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t)
             ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only)







|







591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
;;
(define (common:exit-on-version-changed)
  (if (common:on-homehost?)
      (if (common:api-changed?)
	  (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))
                 (dbfile  (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db"))
                 (read-only (not (file-write-access? dbfile)))
                 (dbstruct (db:setup #t))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;;  #t)))
	    (debug:print 0 *default-log-port*
			 "WARNING: Version mismatch!\n"
			 "   expected: " (common:version-signature) "\n"
			 "   got:      " (common:get-last-run-version))
            (cond
             ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t)
             ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only)
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
(define (common:run-sync?)
    (and (common:on-homehost?)
	 (args:get-arg "-server")))

(define (common:human-time)
  (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S"))

;;======================================================================
;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
;;
(define (common:readonly-watchdog dbstruct)
  (thread-sleep! 0.05) ;; delay for startup
  (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.")
  ;; sync megatest.db to /tmp/.../megatst.db
  (let* ((sync-cool-off-duration   3)
        (golden-mtdb     (dbr:dbstruct-mtdb dbstruct))
        (golden-mtpath   (db:dbdat-get-path golden-mtdb))
        (tmp-mtdb        (dbr:dbstruct-tmpdb dbstruct))
        (tmp-mtpath      (db:dbdat-get-path tmp-mtdb)))
    (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.")
    (let loop ((last-sync-time 0))
      (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath)
      (let* ((duration-since-last-sync (- (current-seconds) last-sync-time)))
        (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync)
        (if (and (not *time-to-exit*)
                 (< duration-since-last-sync sync-cool-off-duration))
            (thread-sleep! (- sync-cool-off-duration duration-since-last-sync)))
        (if (not *time-to-exit*)
            (let ((golden-mtdb-mtime (file-modification-time golden-mtpath))
                  (tmp-mtdb-mtime    (file-modification-time tmp-mtpath)))
	      (if (> golden-mtdb-mtime tmp-mtdb-mtime)
		  (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back
		      (let ((res (db:multi-db-sync dbstruct 'old2new)))
			(debug:print-info 13 *default-log-port* "rosync called, " res " records transferred."))))
              (loop (current-seconds)))
            #t)))
    (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath)))

;;======================================================================
;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
(define (common:watchdog)
  (debug:print-info 13 *default-log-port* "common:watchdog entered.")
  (if (launch:setup)
      (if (common:on-homehost?)
	  (let ((dbstruct (db:setup-db *dbstruct-dbs* *toppath* #f))) ;;  #t)))
	    (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct)
	    (cond
	     ((dbr:dbstruct-read-only dbstruct)
	      (debug:print-info 13 *default-log-port* "loading read-only watchdog")
	      (common:readonly-watchdog dbstruct))
	     (else
	      (debug:print-info 13 *default-log-port* "loading writable-watchdog.")
              (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "delta-sync"))) ;; "brute-force-sync")))
                (cond
                 ((equal? syncer "brute-force-sync")
                  (server:writable-watchdog-bruteforce dbstruct))
                 ((equal? syncer "delta-sync")
                  (server:writable-watchdog-deltasync dbstruct))
                 (else
                  (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.")
                  (exit 1)))
                ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")")
                )))
	    (debug:print-info 13 *default-log-port* "watchdog done."))
	  (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))


(define (std-exit-procedure)
  ;;(common:telemetry-log-close)
  (on-exit (lambda () 0))
  ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
  (let ((no-hurry  (if *time-to-exit* ;; hurry up
		       #f







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







1005
1006
1007
1008
1009
1010
1011




























































1012
1013
1014
1015
1016
1017
1018
(define (common:run-sync?)
    (and (common:on-homehost?)
	 (args:get-arg "-server")))

(define (common:human-time)
  (time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S"))






























































(define (std-exit-procedure)
  ;;(common:telemetry-log-close)
  (on-exit (lambda () 0))
  ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
  (let ((no-hurry  (if *time-to-exit* ;; hurry up
		       #f

Modified db.scm from [91359509c9] to [0f451ec65c].

44
45
46
47
48
49
50

51
52
53
54
55
56
57
     z3
     typed-records
     matchable)

(declare (unit db))
(declare (uses common))
(declare (uses dbmod))

(declare (uses dbfile))
(declare (uses keys))
(declare (uses ods))
(declare (uses client))
(declare (uses mt))

(include "common_records.scm")







>







44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
     z3
     typed-records
     matchable)

(declare (unit db))
(declare (uses common))
(declare (uses dbmod))
;; (declare (uses debugprint))
(declare (uses dbfile))
(declare (uses keys))
(declare (uses ods))
(declare (uses client))
(declare (uses mt))

(include "common_records.scm")
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
      (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb refndb mtdb))
    (mutex-lock! *db-multi-sync-mutex*)
    (set! *db-last-sync* start-t)
    (set! *db-last-access* start-t)
    (mutex-unlock! *db-multi-sync-mutex*)
    (stack-push! (dbr:subdb-dbstack subdb) tmpdb)))

(define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3))
  (if (<= try-num 0)
      #f
      (handle-exceptions
	  exn
	(begin
	  (print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn)
	  (thread-sleep! 3)
	  (sqlite3:interrupt! db)
	  (db:safely-close-sqlite3-db db stmt-cache try-num: (- try-num 1)))
	(if (sqlite3:database? db)
	    (let* ((stmts (and stmt-cache (hash-table-ref/default stmt-cache db #f))))
	      (if stmts (map sqlite3:finalize! (hash-table-values stmts)))
	      (sqlite3:finalize! db)
	      #t)
	    #f))))

;; close all opened run-id dbs
(define (db:close-all dbstruct)
  (if (dbr:dbstruct? dbstruct)
      (handle-exceptions
	  exn
	  (begin
	    (debug:print 0 *default-log-port* "WARNING: Finalizing failed, "  ((condition-property-accessor 'exn 'message) exn) ", note - exn=" exn)
	    (print-call-chain *default-log-port*))
	;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server.
        (let* ((subdbs     (hash-table-values (dbr:dbstruct-subdbs dbstruct))))
	  (for-each
	   (lambda (subdb)
	     (let* ((tdbs       (stack->list (dbr:subdb-dbstack subdb)))
		    (mdb        (dbr:dbdat-dbh (dbr:subdb-mtdb subdb)))
		    (rdb        (dbr:dbdat-dbh (dbr:subdb-refndb subdb))))
		    
	       (map (lambda (dbdat)
		      (let* ((stmt-cache (dbr:dbdat-stmt-cache dbdat))
			     (dbh        (dbr:dbdat-dbh        dbdat)))
			(db:safely-close-sqlite3-db dbh stmt-cache)))
		    tdbs)))
	   subdbs)
          (db:safely-close-sqlite3-db mdb stmt-cache)     ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb))
          (db:safely-close-sqlite3-db rdb stmt-cache))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb))))))

;;   (let ((locdbs (dbr:dbstruct-locdbs dbstruct)))
;;     (if (hash-table? locdbs)
;; 	(for-each (lambda (run-id)
;; 		    (db:close-run-db dbstruct run-id))
;; 		  (hash-table-keys locdbs)))))








<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<







428
429
430
431
432
433
434




































435




436
437
438
439
440
441
442
      (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb refndb mtdb))
    (mutex-lock! *db-multi-sync-mutex*)
    (set! *db-last-sync* start-t)
    (set! *db-last-access* start-t)
    (mutex-unlock! *db-multi-sync-mutex*)
    (stack-push! (dbr:subdb-dbstack subdb) tmpdb)))





































;; db:safely-close-sqlite3-db and db:close-all were here, moved to dbfile





;;   (let ((locdbs (dbr:dbstruct-locdbs dbstruct)))
;;     (if (hash-table? locdbs)
;; 	(for-each (lambda (run-id)
;; 		    (db:close-run-db dbstruct run-id))
;; 		  (hash-table-keys locdbs)))))

4998
4999
5000
5001
5002
5003
5004



5005


































































































































































































































     results)
    ;; brutal clean up
    (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)
    (system "rm -rf tempdir")))

;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%")














































































































































































































































>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
     results)
    ;; brutal clean up
    (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)
    (system "rm -rf tempdir")))

;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%")

;;======================================================================
;; moving watch dogs here due to dependencies
;;======================================================================

;;======================================================================
;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
;;
(define (common:readonly-watchdog dbstruct)
  (thread-sleep! 0.05) ;; delay for startup
  (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.")
  ;; sync megatest.db to /tmp/.../megatst.db
  (let* ((sync-cool-off-duration   3)
        (golden-mtdb     (dbr:dbstruct-mtdb dbstruct))
        (golden-mtpath   (db:dbdat-get-path golden-mtdb))
        (tmp-mtdb        (dbr:dbstruct-tmpdb dbstruct))
        (tmp-mtpath      (db:dbdat-get-path tmp-mtdb)))
    (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.")
    (let loop ((last-sync-time 0))
      (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath)
      (let* ((duration-since-last-sync (- (current-seconds) last-sync-time)))
        (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync)
        (if (and (not *time-to-exit*)
                 (< duration-since-last-sync sync-cool-off-duration))
            (thread-sleep! (- sync-cool-off-duration duration-since-last-sync)))
        (if (not *time-to-exit*)
            (let ((golden-mtdb-mtime (file-modification-time golden-mtpath))
                  (tmp-mtdb-mtime    (file-modification-time tmp-mtpath)))
	      (if (> golden-mtdb-mtime tmp-mtdb-mtime)
		  (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back
		      (let ((res (db:multi-db-sync dbstruct 'old2new)))
			(debug:print-info 13 *default-log-port* "rosync called, " res " records transferred."))))
              (loop (current-seconds)))
            #t)))
    (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath)))

;;======================================================================
;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
(define (common:watchdog)
  (debug:print-info 13 *default-log-port* "common:watchdog entered.")
  (if (launch:setup)
      (if (common:on-homehost?)
	  (let ((dbstruct (db:setup #t))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;;  #t)))
	    (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct)
	    (cond
	     ((dbr:dbstruct-read-only dbstruct)
	      (debug:print-info 13 *default-log-port* "loading read-only watchdog")
	      (common:readonly-watchdog dbstruct))
	     (else
	      (debug:print-info 13 *default-log-port* "loading writable-watchdog.")
              (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "delta-sync"))) ;; "brute-force-sync")))
                (cond
                 ((equal? syncer "brute-force-sync")
                  (server:writable-watchdog-bruteforce dbstruct))
                 ((equal? syncer "delta-sync")
                  (server:writable-watchdog-deltasync dbstruct))
                 (else
                  (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.")
                  (exit 1)))
                ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")")
                )))
	    (debug:print-info 13 *default-log-port* "watchdog done."))
	  (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))

(define (server:writable-watchdog-bruteforce dbstruct)
  (thread-sleep! 1) ;; delay for startup
  (let* ((do-a-sync  (server:get-bruteforce-syncer dbstruct))
         (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t)))
    (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync
	       (args:get-arg "-server"))
      
      (let loop ()
	(do-a-sync)
        (if (not *time-to-exit*) (loop))) ;; keep going unless time to exit

      ;; time to exit, close the no-sync db here
      (final-sync)

      (if (common:low-noise-print 30)
	  (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)
			    )))))

(define (server:writable-watchdog-deltasync dbstruct)
  ;; This is awful complex and convoluted. Plan to redo?
  ;; for now ... skip it.
;; ==> 
;; ==> (thread-sleep! 0.05) ;; delay for startup
;; ==> (let ((legacy-sync  (common:run-sync?))
;; ==>       (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
;; ==> 	(debug-mode   (debug:debug-mode 1))
;; ==> 	(last-time    (current-seconds))
;; ==> 	(no-sync-db   (db:open-no-sync-db))
;; ==> 	(stmt-cache   #f) ;; (dbr:dbstruct-stmt-cache dbstruct))
;; ==> 	(sync-duration 0) ;; run time of the sync in milliseconds
;; ==>       (subdbs       (hash-table-values (dbr:dbstruct-subdbs dbstruct))))
;; ==>   (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
;; ==>   (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
;; ==>   (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)  );;  " this-wd-num="this-wd-num)
;; ==>   
;; ==>   (if (and legacy-sync (not *time-to-exit*))
;; ==> 	(begin
;; ==> 	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
	  (let loop ()
;; ==> 	    ;; sync for filesystem local db writes
;; ==> 	    ;;
;; ==> 	    (mutex-lock! *db-multi-sync-mutex*)
;; ==> 	       (let* ((start-file (conc tmp-area "/.start-sync"))
;; ==> 		      (end-file   (conc tmp-area "/.end-sync"))
;; ==> 			      
;; ==> 		      (need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
;; ==> 		      (sync-in-progress *db-sync-in-progress*)
;; ==> 		      (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5))
;; ==> 		      (should-sync      (and (not *time-to-exit*)
;; ==> 					     (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed
;; ==> 		      (start-time       (current-seconds))
;; ==> 		      (cpu-load-adj     (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)))
;; ==> 		      (mt-mod-time      (file-modification-time mtpath))
;; ==> 		      (last-sync-start  (if (common:file-exists? start-file)
;; ==> 					    (file-modification-time start-file)
;; ==> 					    0))
;; ==> 		      (last-sync-end    (if (common:file-exists? end-file)
;; ==> 					    (file-modification-time end-file)
;; ==> 					    10))
;; ==> 		      (sync-period      (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period
;; ==> 		      (recently-synced  (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db!
;; ==> 					     (< mt-mod-time last-sync-start)))
;; ==> 		      (sync-done        (<= last-sync-start last-sync-end))
;; ==> 		      (sync-stale       (> start-time (+ last-sync-start sync-stale-seconds)))
;; ==> 		      (will-sync        (and (not *time-to-exit*)       ;; do not start a sync if we are in the process of exiting
;; ==> 					     (or need-sync should-sync)
;; ==> 					     (or sync-done sync-stale)
;; ==> 					     (not sync-in-progress)
;; ==> 					     (not recently-synced))))
;; ==> 		 (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop.  need-sync="need-sync" sync-in-progress=" sync-in-progress
;; ==> 				   " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync
;; ==> 				   " sync-done=" sync-done " sync-period=" sync-period)
;; ==> 		 (if (and (> sync-period 5)
;; ==> 			  (common:low-noise-print 30 "sync-period"))
;; ==> 		     (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds."))
;; ==> 		 ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
;; ==> 		 ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
;; ==> 		 (if will-sync (set! *db-sync-in-progress* #t))
;; ==> 		 (mutex-unlock! *db-multi-sync-mutex*)
;; ==> 		 (if will-sync
;; ==> 		     (let (;; (max-sync-duration  (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK!
;; ==> 			   (sync-start         (current-milliseconds)))
;; ==> 		       (with-output-to-file start-file (lambda ()(print (current-process-id))))
;; ==> 		       
;; ==> 		       ;; put lock here
;; ==> 		       
;; ==> 		       ;; (if (or (not max-sync-duration)
;; ==> 		       ;;        (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally
;; ==> 
;; ==> 		      ;;
;; ==> 		     
;; ==> 		       (for-each
;; ==> 			(lambda (subdb)
;; ==> 			  (let* (;;(dbstruct (db:setup))
;; ==> 				 (mtdb       (dbr:subdb-mtdb subdb))
;; ==> 				 (mtpath     (db:dbdat-get-path mtdb))
;; ==> 				 (tmp-area   (common:get-db-tmp-area))
;; ==> 				 (res        (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
;; ==> 			    (set! sync-duration (- (current-milliseconds) sync-start))
;; ==> 			    (if (> res 0) ;; some records were transferred, keep the db alive
;; ==> 				(begin
;; ==> 				  (mutex-lock! *heartbeat-mutex*)
;; ==> 				  (set! *db-last-access* (current-seconds))
;; ==> 				  (mutex-unlock! *heartbeat-mutex*)
;; ==> 				  (debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
;; ==> 				(debug:print-info 2 *default-log-port* "sync called but zero records transferred")))
;; ==> 			  )
;; ==> 			subdbs)))
;; ==> 		 ;;                         ;; TODO: factor this next routine out into a function
;; ==> 		 ;;                         (with-input-from-pipe ;; this should not block other threads but need to verify this
;; ==> 		 ;;                          (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*)
;; ==> 		 ;;                          (lambda ()
;; ==> 		 ;;                            (let loop ((inl (read-line))
;; ==> 		 ;;                                       (res #f))
;; ==> 		 ;;                              (if (eof-object? inl)
;; ==> 		 ;;                                  (begin
;; ==> 		 ;;                                    (set! sync-duration (- (current-milliseconds) sync-start))
;; ==> 		 ;;                                    (cond
;; ==> 		 ;;                                     ((not res)
;; ==> 		 ;;                                      (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\""))
;; ==> 		 ;;                                     ((> res 0)
;; ==> 		 ;;                                      (mutex-lock! *heartbeat-mutex*)
;; ==> 		 ;;                                      (set! *db-last-access* (current-seconds))
;; ==> 		 ;;                                      (mutex-unlock! *heartbeat-mutex*))))
;; ==> 		 ;;                                  (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl)))
;; ==> 		 ;;                                                      (if matches
;; ==> 		 ;;                                                          (string->number (cadr matches))
;; ==> 		 ;;                                                          #f))))
;; ==> 		 ;;                                    (loop (read-line)
;; ==> 		 ;;                                          (or num-synced res))))))))))
;; ==> 		 
;; ==> 		 (if will-sync
;; ==> 		     (begin
;; ==> 		       (mutex-lock! *db-multi-sync-mutex*)
;; ==> 		       (set! *db-sync-in-progress* #f)
;; ==> 		       (set! *db-last-sync* start-time)
;; ==> 		       (with-output-to-file end-file (lambda ()(print (current-process-id))))
;; ==> 		       
;; ==> 		       ;; release lock here
;; ==> 		       
;; ==> 		       (mutex-unlock! *db-multi-sync-mutex*)))
;; ==> 		 (if (and debug-mode
;; ==> 			  (> (- start-time last-time) 60))
;; ==> 		     (begin
;; ==> 		       (set! last-time start-time)
;; ==> 		       (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
;; ==> 	       
	       ;; keep going unless time to exit
	       ;;
	       (if (not *time-to-exit*)
		   (let delay-loop ((count 0))
		     ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
		     
		     (if (and (not *time-to-exit*)
			      (< count 6)) ;; was 11, changing to 4. 
			 (begin
			   (thread-sleep! 1)
			   (delay-loop (+ count 1))))
		     (if (not *time-to-exit*) (loop))))
	       
;; ==> 	       ;; time to exit, close the no-sync db here
;; ==> 	       (db:no-sync-close-db no-sync-db stmt-cache)
	       (if (common:low-noise-print 30)
		   (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) )))) ;; ))) ;;" this-wd-num="this-wd-num)))))))


Modified dbfile.scm from [8830f3b4fa] to [6f9211e5b8].

15
16
17
18
19
20
21

22
23
24
25
26
27
28
29




30
31
32
33
34
35
36
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit dbfile))


(module dbfile
	*
	
(import scheme chicken data-structures extras)
(import (prefix sqlite3 sqlite3:)
	posix typed-records srfi-18
	srfi-69)





;;======================================================================
;;  R E C O R D S
;;======================================================================

;; a single Megatest area with it's multiple dbs is
;; managed in a dbstruct







>







|
>
>
>
>







15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit dbfile))
;; (declare (uses debugprint))

(module dbfile
	*
	
(import scheme chicken data-structures extras)
(import (prefix sqlite3 sqlite3:)
	posix typed-records srfi-18
	srfi-69
	stack
	)

;; (import debugprint)

;;======================================================================
;;  R E C O R D S
;;======================================================================

;; a single Megatest area with it's multiple dbs is
;; managed in a dbstruct
63
64
65
66
67
68
69











































70
71
72
73
74
75
76
  (dbfile      #f)
  (dbh         #f)    
  (stmt-cache  (make-hash-table))
  (read-only   #f))

(define (dbfile:run-id->key run-id)
  (or run-id 'main))












































;; ;; set up a single db (e.g. main.db, 1.db ... etc.)
;; ;;
;; (define (db:setup-db dbstruct areapath run-id)
;;   (let* ((dbname   (db:run-id->dbname run-id))
;; 	 (dbstruct (hash-table-ref/default dbstructs dbname #f)))
;;     (if dbstruct







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
  (dbfile      #f)
  (dbh         #f)    
  (stmt-cache  (make-hash-table))
  (read-only   #f))

(define (dbfile:run-id->key run-id)
  (or run-id 'main))

(define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3))
  (if (<= try-num 0)
      #f
      (handle-exceptions
	  exn
	(begin
	  (print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn)
	  (thread-sleep! 3)
	  (sqlite3:interrupt! db)
	  (db:safely-close-sqlite3-db db stmt-cache try-num: (- try-num 1)))
	(if (sqlite3:database? db)
	    (let* ((stmts (and stmt-cache (hash-table-ref/default stmt-cache db #f))))
	      (if stmts (map sqlite3:finalize! (hash-table-values stmts)))
	      (sqlite3:finalize! db)
	      #t)
	    #f))))

;; close all opened run-id dbs
(define (db:close-all dbstruct)
  (if (dbr:dbstruct? dbstruct)
;; (handle-exceptions
;; 	  exn
;; 	  (begin
;; 	    (debug:print 0 *default-log-port* "WARNING: Finalizing failed, "  ((condition-property-accessor 'exn 'message) exn) ", note - exn=" exn)
;; 	    (print-call-chain *default-log-port*))
	;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server.
        (let* ((subdbs     (hash-table-values (dbr:dbstruct-subdbs dbstruct))))
	  (for-each
	   (lambda (subdb)
	     (let* ((tdbs       (stack->list (dbr:subdb-dbstack subdb)))
		    (mdb        (dbr:dbdat-dbh (dbr:subdb-mtdb subdb)))
		    (rdb        (dbr:dbdat-dbh (dbr:subdb-refndb subdb))))
		    
	       (map (lambda (dbdat)
		      (let* ((stmt-cache (dbr:dbdat-stmt-cache dbdat))
			     (dbh        (dbr:dbdat-dbh        dbdat)))
			(db:safely-close-sqlite3-db dbh stmt-cache)))
		    tdbs)
	       (db:safely-close-sqlite3-db mdb #f) ;; stmt-cache)     ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb))
	       (db:safely-close-sqlite3-db rdb #f))) ;; stmt-cache))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb))))))
	   subdbs))))
;; )

;; ;; set up a single db (e.g. main.db, 1.db ... etc.)
;; ;;
;; (define (db:setup-db dbstruct areapath run-id)
;;   (let* ((dbname   (db:run-id->dbname run-id))
;; 	 (dbstruct (hash-table-ref/default dbstructs dbname #f)))
;;     (if dbstruct

Added debugprint.scm version [54f7083883].































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175

(declare (unit debugprint))
(declare (uses mtargs))

(module debugprint
	*
	
;;(import scheme chicken data-structures extras files ports)
  (import
    scheme
    chicken
    data-structures
    posix
    ports
    extras
    
    ;; scheme
    ;; chicken.base
    ;; chicken.string
    ;; chicken.time
    ;; chicken.time.posix
    ;; chicken.port
    ;; chicken.process-context
    ;; chicken.process-context.posix
    
    (prefix mtargs args:)
    srfi-1
    ;; system-information
    )
  
;;======================================================================
;; debug stuff
;;======================================================================

(define verbosity (make-parameter '()))
(define *default-log-port*  (current-error-port))
(define debug:print-logger (make-parameter #f)) ;; set to a proc to call on every logging print
	 
(define (debug:setup)
  (let ((debugstr (or (args:get-arg "-debug")
      		      (args:get-arg "-debug-noprop")
      		      (get-environment-variable "MT_DEBUG_MODE"))))
    (verbosity (debug:calc-verbosity debugstr 'q))
    (debug:check-verbosity (verbosity) debugstr)
    ;; if we were handed a bad verbosity rule then we will override it with 1 and continue
    (if (not (verbosity))(verbosity 1))
    (if (and (not (args:get-arg "-debug-noprop"))
      	     (or (args:get-arg "-debug")
      		 (not (get-environment-variable "MT_DEBUG_MODE"))))
      	(setenv #;set-environment-variable! "MT_DEBUG_MODE" (if (list? (verbosity))
      				    (string-intersperse (map conc (verbosity)) ",")
      				    (conc (verbosity)))))))

;; check verbosity, #t is ok
(define (debug:check-verbosity verbosity vstr)
  (if (not (or (number? verbosity)
     	       (list?   verbosity)))
      (begin
     	(print "ERROR: Invalid debug value \"" vstr "\"")
     	#f)
      #t))

;;======================================================================
;; (define (debug:print . params) #f)
;; (define (debug:print-info . params) #f)
;; 
;; (define (set-functions dbgp dbgpinfo)
;;   (set! debug:print dbgp)
;;   (set! debug:print-info dbgpinfo))

;;======================================================================
;; this was cached based on results from profiling but it turned out the profiling
;; somehow went wrong - perhaps too many processes writing to it. Leaving the caching
;; in for now but can probably take it out later.
;;
(define (debug:calc-verbosity vstr arg) ;; arg is 'v (verbose) or 'q (quiet)
  (let* ((res (cond
	       ((number? vstr) vstr)
	       ((not (string?  vstr))   1)
	       ;; ((string-match  "^\\s*$" vstr) 1)
	       (vstr           (let ((debugvals  (filter number? (map string->number (string-split vstr ",")))))
				 (cond
				  ((> (length debugvals) 1) debugvals)
				  ((> (length debugvals) 0)(car debugvals))
				  (else 1))))
	       ((eq? arg 'v)   2) ;; verbose
	       ((eq? arg 'q)   0) ;; quiet
	       (else                   1))))
    (verbosity res)
    res))

;;======================================================================
;; check verbosity, #t is ok
#;(define (debug-check-verbosity verbosity vstr)
  (if (not (or (number? verbosity)
	       (list?   verbosity)))
      (begin
	(print "ERROR: Invalid debug value \"" vstr "\"")
	#f)
      #t))

(define (debug:debug-mode n)
  (let* ((vb (verbosity)))
    (cond
     ((and (number? vb)   ;; number number
	   (number? n))
      (<= n vb))
     ((and (list? vb)     ;; list   number
	   (number? n))
      (member n vb))
     ((and (list? vb)     ;; list   list
	   (list? n))
      (not (null? (lset-intersection! eq? vb n))))
     ((and (number? vb)
	   (list? n))
      (member vb n))
     (else #f))))

(define (debug:handle-remote-logging params)
  (if (debug:print-logger) ;; NOTE: turn params into string to keep things simple for now
      ((debug:print-logger)(conc "REMOTE ("(get-host-name)", pid="(current-process-id)") "
				 (string-intersperse (map conc params) " ") "; "
				 (string-intersperse (command-line-arguments) " ")))))

(define debug:enable-timestamp (make-parameter #t))

(define (debug:timestamp)
  (if (debug:enable-timestamp)
      (conc (time->string 
	     (seconds->local-time (current-seconds)) "%H:%M:%S") " ")
      ""))

  (define (debug:print n e . params)
  (if (debug:debug-mode n)
      (with-output-to-port (or e (current-error-port))
	(lambda ()
	  ;; (if *logging*
	      ;; (db:log-event (apply conc params))
	  (apply print (debug:timestamp) params)
	  ;; (debug:handle-remote-logging params)
	  )))
  #t ;; only here to make remote stuff happy. It'd be nice to fix that ...
  )

(define (debug:print-error n e . params)
  ;; normal print
  (if (debug:debug-mode n)
      (with-output-to-port (if (port? e) e (current-error-port))
	(lambda ()
	  (apply print "ERROR: " (debug:timestamp) params)
	  ;; (debug:handle-remote-logging (cons "ERROR: " params))
	  )))
  ;; pass important messages to stderr
  (if (and (eq? n 0)(not (eq? e (current-error-port)))) 
      (with-output-to-port (current-error-port)
	(lambda ()
	  (apply print "ERROR: " (debug:timestamp) params)
	  ))))

(define (debug:print-info n e . params)
  (if (debug:debug-mode n)
      (with-output-to-port (if (port? e) e (current-error-port))
	(lambda ()
	  (apply print "INFO: (" n ") "(debug:timestamp) params) ;; res)
	  ;; (debug:handle-remote-logging (cons "INFO: " params))
	  ))))

(define (debug:print-warn n e . params)
  (if (debug:debug-mode n)
      (with-output-to-port (if (port? e) e (current-error-port))
	(lambda ()
	  (apply print "WARN: (" n ") " (debug:timestamp) params) ;; res)
	  ;; (debug:handle-remote-logging (cons "WARN: " params))
	  ))))
)

Modified megatest.scm from [78b560ce68] to [de7df75e2a].

28
29
30
31
32
33
34

35
36
37
38
39
40
41
42
43
44
45
46
47




48
49
50
51
52
53
54
(declare (uses runs))
(declare (uses launch))
(declare (uses server))
(declare (uses client))
(declare (uses tests))
(declare (uses genexample))
;; (declare (uses daemon))

(declare (uses db))
;; (declare (uses dcommon))

(declare (uses tdb))
(declare (uses mt))
(declare (uses api))
(declare (uses tasks)) ;; only used for debugging.
(declare (uses env))
(declare (uses diff-report))
(declare (uses dbmod))
(declare (uses dbmod.import))
(declare (uses dbfile))
(declare (uses dbfile.import))





;; (declare (uses ftail))
;; (import ftail)

(import dbmod
	dbfile)








>













>
>
>
>







28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
(declare (uses runs))
(declare (uses launch))
(declare (uses server))
(declare (uses client))
(declare (uses tests))
(declare (uses genexample))
;; (declare (uses daemon))

(declare (uses db))
;; (declare (uses dcommon))

(declare (uses tdb))
(declare (uses mt))
(declare (uses api))
(declare (uses tasks)) ;; only used for debugging.
(declare (uses env))
(declare (uses diff-report))
(declare (uses dbmod))
(declare (uses dbmod.import))
(declare (uses dbfile))
(declare (uses dbfile.import))
;; (declare (uses debugprint))
;; (declare (uses debugprint.import))
;; (declare (uses mtargs))
;; (declare (uses mtargs.import))

;; (declare (uses ftail))
;; (import ftail)

(import dbmod
	dbfile)

Modified mtargs/mtargs.scm from [e2f1c247b7] to [147e7c2628].

54
55
56
57
58
59
60










61
62
63
64
65
66
67
  (if (> (length args) 0)
      (apply print "ERROR: " args))
  (if (string? help)
      (print help)
      (print "Usage: " (car (argv)) " ... "))
  (exit 0))











(define (get-args args params switches arg-hash num-needed)
  (let* ((numtargs (length args))
	 (adj-num-needed (if num-needed (+ num-needed 2) #f)))
    (if (< numtargs (if adj-num-needed adj-num-needed 2))
	(if (>= num-needed 1)
	    (usage "No arguments provided")
	    '())







>
>
>
>
>
>
>
>
>
>







54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
  (if (> (length args) 0)
      (apply print "ERROR: " args))
  (if (string? help)
      (print help)
      (print "Usage: " (car (argv)) " ... "))
  (exit 0))

 ;; one-of args defined
(define (args:any-defined? . param)
  (let ((res #f))
    (for-each 
     (lambda (arg)
       (if (get-arg arg)(set! res #t)))
     param)
    res))

;; args: 
(define (get-args args params switches arg-hash num-needed)
  (let* ((numtargs (length args))
	 (adj-num-needed (if num-needed (+ num-needed 2) #f)))
    (if (< numtargs (if adj-num-needed adj-num-needed 2))
	(if (>= num-needed 1)
	    (usage "No arguments provided")
	    '())

Modified server.scm from [99d72bd3eb] to [df77c3b3af].

701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
                    ))
              (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres)
              finalres)
            ) ;; end lambda
          ))
    do-a-sync))

(define (server:writable-watchdog-bruteforce dbstruct)
  (thread-sleep! 1) ;; delay for startup
  (let* ((do-a-sync  (server:get-bruteforce-syncer dbstruct))
         (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t)))
    (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync
	       (args:get-arg "-server"))
      
      (let loop ()
	(do-a-sync)
        (if (not *time-to-exit*) (loop))) ;; keep going unless time to exit

      ;; time to exit, close the no-sync db here
      (final-sync)

      (if (common:low-noise-print 30)
	  (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)
			    )))))

(define (server:writable-watchdog-deltasync dbstruct)
  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync  (common:run-sync?))
        (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
	(debug-mode   (debug:debug-mode 1))
	(last-time    (current-seconds))
	(no-sync-db   (db:open-no-sync-db))
	(stmt-cache   (dbr:dbstruct-stmt-cache dbstruct))
        (sync-duration 0) ;; run time of the sync in milliseconds
        )
    (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
    (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
    (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)  );;  " this-wd-num="this-wd-num)
    (if (and legacy-sync (not *time-to-exit*))
	(let* (;;(dbstruct (db:setup))
	       (mtdb       (dbr:dbstruct-mtdb dbstruct))
	       (mtpath     (db:dbdat-get-path mtdb))
	       (tmp-area   (common:get-db-tmp-area))
	       (start-file (conc tmp-area "/.start-sync"))
	       (end-file   (conc tmp-area "/.end-sync")))
	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
	  (let loop ()
	    ;; sync for filesystem local db writes
	    ;;
	    (mutex-lock! *db-multi-sync-mutex*)
	    (let* ((need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
		   (sync-in-progress *db-sync-in-progress*)
                   (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5))
		   (should-sync      (and (not *time-to-exit*)
                                          (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed
		   (start-time       (current-seconds))
                   (cpu-load-adj     (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)))
		   (mt-mod-time      (file-modification-time mtpath))
		   (last-sync-start  (if (common:file-exists? start-file)
					 (file-modification-time start-file)
					 0))
		   (last-sync-end    (if (common:file-exists? end-file)
					 (file-modification-time end-file)
					 10))
                   (sync-period      (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period
		   (recently-synced  (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db!
					  (< mt-mod-time last-sync-start)))
		   (sync-done        (<= last-sync-start last-sync-end))
                   (sync-stale       (> start-time (+ last-sync-start sync-stale-seconds)))
		   (will-sync        (and (not *time-to-exit*)       ;; do not start a sync if we are in the process of exiting
                                          (or need-sync should-sync)
					  (or sync-done sync-stale)
					  (not sync-in-progress)
					  (not recently-synced))))
              (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop.  need-sync="need-sync" sync-in-progress=" sync-in-progress
				" should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync
				" sync-done=" sync-done " sync-period=" sync-period)
              (if (and (> sync-period 5)
                       (common:low-noise-print 30 "sync-period"))
                  (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds."))
	      ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
	      ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
	      (if will-sync (set! *db-sync-in-progress* #t))
	      (mutex-unlock! *db-multi-sync-mutex*)
	      (if will-sync
                  (let (;; (max-sync-duration  (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK!
                        (sync-start         (current-milliseconds)))
		    (with-output-to-file start-file (lambda ()(print (current-process-id))))
		    
		    ;; put lock here
		    
                    ;; (if (or (not max-sync-duration)
                    ;;        (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally
                        (let ((res        (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
                          (set! sync-duration (- (current-milliseconds) sync-start))
                          (if (> res 0) ;; some records were transferred, keep the db alive
                              (begin
                                (mutex-lock! *heartbeat-mutex*)
                                (set! *db-last-access* (current-seconds))
                                (mutex-unlock! *heartbeat-mutex*)
                                (debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
                              (debug:print-info 2 *default-log-port* "sync called but zero records transferred")))))
;;                         ;; TODO: factor this next routine out into a function
;;                         (with-input-from-pipe ;; this should not block other threads but need to verify this
;;                          (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*)
;;                          (lambda ()
;;                            (let loop ((inl (read-line))
;;                                       (res #f))
;;                              (if (eof-object? inl)
;;                                  (begin
;;                                    (set! sync-duration (- (current-milliseconds) sync-start))
;;                                    (cond
;;                                     ((not res)
;;                                      (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\""))
;;                                     ((> res 0)
;;                                      (mutex-lock! *heartbeat-mutex*)
;;                                      (set! *db-last-access* (current-seconds))
;;                                      (mutex-unlock! *heartbeat-mutex*))))
;;                                  (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl)))
;;                                                      (if matches
;;                                                          (string->number (cadr matches))
;;                                                          #f))))
;;                                    (loop (read-line)
;;                                          (or num-synced res))))))))))
	      (if will-sync
		  (begin
		    (mutex-lock! *db-multi-sync-mutex*)
		    (set! *db-sync-in-progress* #f)
		    (set! *db-last-sync* start-time)
		    (with-output-to-file end-file (lambda ()(print (current-process-id))))

		    ;; release lock here

		    (mutex-unlock! *db-multi-sync-mutex*)))
	      (if (and debug-mode
		       (> (- start-time last-time) 60))
		  (begin
		    (set! last-time start-time)
		    (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
	    
	    ;; keep going unless time to exit
	    ;;
	    (if (not *time-to-exit*)
		(let delay-loop ((count 0))
                  ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
                                                            
		  (if (and (not *time-to-exit*)
			   (< count 6)) ;; was 11, changing to 4. 
		      (begin
			(thread-sleep! 1)
			(delay-loop (+ count 1))))
		  (if (not *time-to-exit*) (loop))))
	    ;; time to exit, close the no-sync db here
	    (db:no-sync-close-db no-sync-db stmt-cache)
	    (if (common:low-noise-print 30)
		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num)))))))








<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
701
702
703
704
705
706
707






















































































































































                    ))
              (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres)
              finalres)
            ) ;; end lambda
          ))
    do-a-sync))