Overview
Comment: | ok, new syncer works, but -run-wait is not waiting. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.65-tweak-bfsync |
Files: | files | file ages | folders |
SHA1: |
1b9511af2613357b5ebbe3d52977a131 |
User & Date: | bjbarcla on 2019-03-15 18:38:41 |
Other Links: | branch diff | manifest | tags |
Context
2019-03-15
| ||
20:32 | fix fork to background for syncer check-in: b90b2aeb10 user: bjbarcla tags: v1.65-tweak-bfsync | |
18:38 | ok, new syncer works, but -run-wait is not waiting. check-in: 1b9511af26 user: bjbarcla tags: v1.65-tweak-bfsync | |
2019-03-14
| ||
18:28 | wip check-in: 89d013c708 user: bjbarcla tags: v1.65-tweak-bfsync | |
Changes
Modified common.scm from [8e98e8852f] to [b1d85b703a].
︙ | ︙ | |||
316 317 318 319 320 321 322 | (define (common:version-changed?) (not (equal? (common:get-last-run-version) (common:version-signature)))) (define (common:api-changed?) (not (equal? (substring (->string megatest-version) 0 4) (substring (conc (common:get-last-run-version)) 0 4)))) | | > > > > > > | 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 | (define (common:version-changed?) (not (equal? (common:get-last-run-version) (common:version-signature)))) (define (common:api-changed?) (not (equal? (substring (->string megatest-version) 0 4) (substring (conc (common:get-last-run-version)) 0 4)))) (define (common:get-sync-lock-filepath) (let* ((tmp-area (common:get-db-tmp-area)) (lockfile (conc tmp-area "/megatest.db.sync-lock"))) lockfile)) ;; Move me elsewhere ... ;; RADT => Why do we meed the version check here, this is called only if version misma ;; (define (common:cleanup-db dbstruct #!key (full #f)) (apply db:multi-db-sync dbstruct 'schema |
︙ | ︙ | |||
597 598 599 600 601 602 603 | (read (open-input-string (z3:decode-buffer (base64:base64-decode instr)))))) ;; dot-locking egg seems not to work, using this for now ;; if lock is older than expire-time then remove it and try again ;; to get the lock ;; (define (common:simple-file-lock fname #!key (expire-time 300)) | | | | | > > | 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 | (read (open-input-string (z3:decode-buffer (base64:base64-decode instr)))))) ;; dot-locking egg seems not to work, using this for now ;; if lock is older than expire-time then remove it and try again ;; to get the lock ;; (define (common:simple-file-lock fname #!key (expire-time 300)) ; (handle-exceptions ; exn ; #f ;; don't really care what went wrong right now. NOTE: I have not seen this one actually fail. (if (common:file-exists? fname) (if (> (- (current-seconds)(file-modification-time fname)) expire-time) (begin (delete-file* fname) (common:simple-file-lock fname expire-time: expire-time)) #f) (let ((key-string (conc (get-host-name) "-" (current-process-id)))) (with-output-to-file fname (lambda () (print key-string))) (thread-sleep! 0.25) (if (common:file-exists? fname) (with-input-from-file fname (lambda () (equal? key-string (read-line)))) #f))) ; ) ) (define (common:simple-file-lock-and-wait fname #!key (expire-time 300)) (let ((end-time (+ expire-time (current-seconds)))) (let loop ((got-lock (common:simple-file-lock fname expire-time: expire-time))) (if got-lock #t (if (> end-time (current-seconds)) |
︙ | ︙ |
Modified db.scm from [b2f54e4e77] to [597e7ddad8].
︙ | ︙ | |||
1047 1048 1049 1050 1051 1052 1053 | ;; kill servers ((killservers) (for-each (lambda (server) (match-let (((mod-time host port start-time pid) server)) (if (and host pid) (tasks:kill-server host pid)))) | | > > > > | 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 | ;; kill servers ((killservers) (for-each (lambda (server) (match-let (((mod-time host port start-time pid) server)) (if (and host pid) (tasks:kill-server host pid)))) servers) ;; /tmp/bjbarcla/megatest_localdb/fullrun/.nfs.pdx.disks.icf_env_disk001.bjbarcla.gwa.issues.mtdev.matt-bisect.megatest.ext-tests.runs.all.v1.65.1.6524.dev.bb-24-justrollup-f8.rollup.fullrun/megatest.db.lock (delete-file* (common:get-sync-lock-filepath)) ) ;; clear out junk records ;; ((dejunk) (db:delay-if-busy mtdb) ;; ok to delay on mtdb (db:clean-up mtdb) (db:clean-up tmpdb) |
︙ | ︙ |
Modified megatest.scm from [8e2abc3809] to [8964e71ae0].
︙ | ︙ | |||
424 425 426 427 428 429 430 431 432 433 434 435 436 437 | "-create-megatest-area" "-mark-incompletes" "-convert-to-norm" "-convert-to-old" "-import-megatest.db" "-sync-to-megatest.db" "-logging" "-v" ;; verbose 2, more than normal (normal is 1) "-q" ;; quiet 0, errors/warnings only "-diff-rep" ) args:arg-hash | > | 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 | "-create-megatest-area" "-mark-incompletes" "-convert-to-norm" "-convert-to-old" "-import-megatest.db" "-sync-to-megatest.db" "-sync-brute-force" "-logging" "-v" ;; verbose 2, more than normal (normal is 1) "-q" ;; quiet 0, errors/warnings only "-diff-rep" ) args:arg-hash |
︙ | ︙ | |||
2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 | 'killservers 'dejunk 'adj-testids 'old2new ;; 'new2old ) (set! *didsomething* #t))) (if (args:get-arg "-sync-to-megatest.db") (let* ((dbstruct (db:setup #f)) (tmpdbpth (cdr (dbr:dbstruct-tmpdb dbstruct))) (lockfile (conc tmpdbpth ".lock")) (locked (common:simple-file-lock lockfile)) (res (if locked | > > > > | 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 | 'killservers 'dejunk 'adj-testids 'old2new ;; 'new2old ) (set! *didsomething* #t))) (when (args:get-arg "-sync-brute-force") ((server:get-bruteforce-syncer (db:setup #t) persist-until-sync: #t)) (set! *didsomething* #t)) (if (args:get-arg "-sync-to-megatest.db") (let* ((dbstruct (db:setup #f)) (tmpdbpth (cdr (dbr:dbstruct-tmpdb dbstruct))) (lockfile (conc tmpdbpth ".lock")) (locked (common:simple-file-lock lockfile)) (res (if locked |
︙ | ︙ |
Modified server.scm from [98cdb6b75b] to [219dea7dfd].
︙ | ︙ | |||
499 500 501 502 503 504 505 | ;; (server:release-sync-lock) ;; (server:have-sync-lock?)) ;; (else #f)))) ;; moving this here as it needs access to db and cannot be in common. ;; | | | | > | | > > > | | > | > > | > > > | | | | | | | | | | | | | | | | | | | | | | | | > | | | | | | | | > > > | > | > > > > > > | | | > > > > | > | | | < < | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | < < | < < < < < < < < < < < < < | < < < < < < < < < < < < < < < < < < < < | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | > | | | > > | 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 | ;; (server:release-sync-lock) ;; (server:have-sync-lock?)) ;; (else #f)))) ;; moving this here as it needs access to db and cannot be in common. ;; (define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f)) (let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) (tmp-area (common:get-db-tmp-area)) (tmp-db (conc tmp-area "/megatest.db")) (staging-file (conc *toppath* "/.megatest.db")) (mtdbfile (conc *toppath* "/megatest.db")) (lockfile (common:get-sync-lock-filepath)) (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log)) (sync-cmd (if fork-to-background (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core"\"") sync-cmd-core)) (default-min-intersync-delay 2) (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay)) (default-duty-cycle 0.1) (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle)) (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle) (calculate-off-time (lambda (work-duration duty-cycle) (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds))) (off-time min-intersync-delay) ;; adjusted in closure below. (do-a-sync (lambda () (BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync) (let* ((finalres (let retry-loop ((num-tries 0)) (if (common:simple-file-lock lockfile) (begin (cond ((not (or fork-to-background persist-until-sync)) (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay " , off-time="off-time" seconds ]") (thread-sleep! (max off-time min-intersync-delay))) (else (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit..."))) (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) (common:snapshot-file mtdbfile subdir: ".db-snapshot")) (delete-file* staging-file) (let* ((start-time (current-milliseconds)) (res (system sync-cmd)) (res2 (cond ((eq? 0 res) (delete-file* (conc mtdbfile ".backup")) (if (eq? 0 (file-size sync-log)) (delete-file sync-log)) (system (conc "/bin/mv " staging-file " " mtdbfile)) (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000)) (set! off-time (calculate-off-time last-sync-seconds (cond ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1)) duty-cycle) (else (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle) default-duty-cycle)))) (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec") (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time) 'sync-completed) (else (system (conc "/bin/cp "sync-log" "sync-log".fail")) (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") (if (file-exists? (conc mtdbfile ".backup")) (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))) #f)))) (common:simple-file-release-lock lockfile) (BB> "released lockfile: " lockfile) (when (common:file-exists? lockfile) (BB> "DID NOT ACTUALLY RELEASE LOCKFILE")) res2) ;; end let );; end begin ;; else (cond (persist-until-sync (thread-sleep! 1) (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)") (retry-loop (add1 num-tries))) (else (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") 'parallel-sync-in-progress)) ) ;; end if got lockfile ) )) (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres) finalres) ) ;; end lambda )) do-a-sync)) (define (server:writable-watchdog dbstruct) (thread-sleep! 10) ;; delay for startup (let* ((do-a-sync (server:get-bruteforce-syncer dbstruct)) (final-sync (server:get-bruteforce-syncer dbstruct persist-until-sync: #t))) (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync (args:get-arg "-server")) (let loop () (do-a-sync) (if (not *time-to-exit*) (loop))) ;; keep going unless time to exit ;; time to exit, close the no-sync db here (final-sync) (if (common:low-noise-print 30) (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))) |