Overview
Context
Changes
Modified common.scm
from [ddadf02a13]
to [b6c40dc319].
︙ | | |
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
|
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
|
-
+
-
+
|
(ok-flag
(let ((res (system cmd)))
(cond
((eq? 0 res)
#t)
(else
(set! ok-flag #f)
(debug:print 0 *default-log-port* "ERROR: Command failed with exit code "
(debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Command failed with exit code "
(if (< res 0)
res
(/ res 8)) " ["cmd"]" )
#f))))
(else
(debug:print 0 *default-log-port* "ERROR: Nor runnining command due to prior error. ["cmd"]")
(debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Not runnining command due to prior error. ["cmd"]")
#f))))
(copy (lambda (src dest) (docmd (conc "/bin/cp '"src"' '"dest"'"))))
(copy+zip (lambda (src dest) (docmd (conc "gzip -c - < '"src"' > '"dest"'"))))
(fullpath (realpath filepath))
(basedir (pathname-directory fullpath))
(basefile (pathname-strip-directory fullpath))
;;(prevfile (conc filepath ".prev.gz"))
|
︙ | | |
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
|
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
|
+
+
+
+
+
+
-
-
|
;; (and ohh srv)))
;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv)
(define *wdnum* 0)
(define *wdnum*mutex (make-mutex))
(define (common:human-time)
(time->string (seconds->local-time (current-seconds)) "%Y-%m-%d %H:%M:%S"))
;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
;;
(define (common:readonly-watchdog dbstruct)
(thread-sleep! 0.05) ;; delay for startup
(debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.")
;; sync megatest.db to /tmp/.../megatst.db
(let* ((sync-cool-off-duration 3)
(golden-mtdb (dbr:dbstruct-mtdb dbstruct))
(golden-mtpath (db:dbdat-get-path golden-mtdb))
|
︙ | | |
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
|
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
|
-
+
-
+
|
;; first look in config, then look in file .homehost, create it if not found
(homehost (or (configf:lookup *configdat* "server" "homehost" )
(handle-exceptions
exn
(if (> trynum 0)
(let ((delay-time (* (- 5 trynum) 5)))
(mutex-unlock! *homehost-mutex*)
(debug:print 0 *default-log-port* "ERROR: Failed to read .homehost file, delaying " delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn))
(debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying " delay-time " seconds and trying again, message: " ((condition-property-accessor 'exn 'message) exn))
(thread-sleep! delay-time)
(common:get-homehost trynum: (- trynum 1)))
(begin
(mutex-unlock! *homehost-mutex*)
(debug:print 0 *default-log-port* "ERROR: Failed to read .homehost file after trying five times. Giving up and exiting, message: " ((condition-property-accessor 'exn 'message) exn))
(debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file after trying five times. Giving up and exiting, message: " ((condition-property-accessor 'exn 'message) exn))
(exit 1)))
(let ((hhf (conc *toppath* "/.homehost")))
(if (common:file-exists? hhf)
(with-input-from-file hhf read-line)
(if (file-write-access? *toppath*)
(begin
(with-output-to-file hhf
|
︙ | | |
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
|
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
|
-
+
|
(if all-good
(let ((cmddat (make-qitem
command: command
host-port: host-port
params: params)))
(queue-push cmddat) ;; put request into the queue
(nn-send soc "queued")) ;; reply with "queued"
(print "ERROR: BAD request " dat))
(print "ERROR: ["(common:human-time)"] BAD request " dat))
(loop (nn-recv soc)))))
(nn-close soc)))
;;======================================================================
;; D A S H B O A R D U S E R V I E W S
|
︙ | | |
Modified server.scm
from [4b55838c84]
to [0dc738d239].
︙ | | |
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
|
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
|
-
+
+
-
+
+
-
-
+
+
+
+
+
+
+
|
(lockfile (conc tmp-db ".lock"))
(sync-cmd (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log))
(min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 30)))
(if (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync
(args:get-arg "-server"))
(let loop ()
(thread-sleep! min-intersync-delay)
(if (not (common:file-exists? lockfile))
(if (common:simple-file-lock lockfile)
(begin
(if (not (configf:lookup *configdat* "server" "disable-db-snapshot"))
(common:snapshot-file mtdbfile subdir: ".db-snapshot"))
(delete-file* staging-file)
(let ((start-time (current-milliseconds))
(res (system sync-cmd)))
(cond
((eq? 0 res)
(delete-file* (conc mtdbfile ".backup"))
(system (conc "/bin/mv " staging-file " " mtdbfile))
(debug:print 1 *default-log-port* "INFO: SYNC took "(/ (- (current-milliseconds) start-time))" sec")
(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] SYNC took "(/ (- (current-milliseconds) start-time))" sec")
#t)
(else
(system (conc "/bin/cp "sync-log" "sync-log".fail"))
(debug:print 0 *default-log-port* "ERROR: Sync failed. See log at "sync-log)
(system (conc "mv "mtdbfile ".backup" mtdbfile)))))))
(debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail")
(if (file-exists? (conc mtdbfile ".backup"))
(system (conc "/bin/cp "mtdbfile ".backup " mtdbfile)))))
(common:simple-file-release-lock lockfile)))
;; else
(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] other sync in progres; not syncing.")
) ;; end if got lockfile
;; keep going unless time to exit
;;
(if (not *time-to-exit*)
(let delay-loop ((count 0))
;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
|
︙ | | |