Megatest

Check-in [f0223f5b3a]
Login
Overview
Comment:Keep server alive for sync to megatest.db now working
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.62-no-rpc
Files: files | file ages | folders
SHA1: f0223f5b3a3c6e152e086c0d74e2e622ea674caf
User & Date: matt on 2016-11-26 22:21:20
Other Links: branch diff | manifest | tags
Context
2016-11-27
07:35
Added mutex to prevent overlapping runs of launch:setup check-in: 39f9bda2f2 user: matt tags: v1.62-no-rpc
2016-11-26
22:21
Keep server alive for sync to megatest.db now working check-in: f0223f5b3a user: matt tags: v1.62-no-rpc
21:09
Version transitions can only be addressed on the homehost. check-in: 279aab9f23 user: matt tags: v1.62-no-rpc
Changes

Modified common.scm from [6e0336f419] to [4ce144b0a8].

225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
	       (begin
		 (debug:print-info 0 *default-log-port* "removing " gzfile)
		 (delete-file gzfile)))
	   (debug:print-info 0 *default-log-port* "compressing " file)
	   (system (conc "gzip logs/" file)))))
   '()
   "logs"))
    


;; Force a megatest cleanup-db if version is changed and skip-version-check not specified
;;
(define (common:exit-on-version-changed)
  (if (common:version-changed?)
      (if (common:on-homehost?)
	  (let ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))







<
<







225
226
227
228
229
230
231


232
233
234
235
236
237
238
	       (begin
		 (debug:print-info 0 *default-log-port* "removing " gzfile)
		 (delete-file gzfile)))
	   (debug:print-info 0 *default-log-port* "compressing " file)
	   (system (conc "gzip logs/" file)))))
   '()
   "logs"))



;; Force a megatest cleanup-db if version is changed and skip-version-check not specified
;;
(define (common:exit-on-version-changed)
  (if (common:version-changed?)
      (if (common:on-homehost?)
	  (let ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))
512
513
514
515
516
517
518



519
520
521
522
523
524
525
526
527
528
529
530
531

532
533
534
535
536
537
538
539
540

541
542

543
544
545
546
547
548
549
550
551
552

553
554
555








556
557
558
559
560
561
562
563
564
565
566
  (message-digest-string (md5-primitive) *toppath*))

;;======================================================================
;; E X I T   H A N D L I N G
;;======================================================================

(define (common:run-sync?)



  (and (common:on-homehost?)
       (args:get-arg "-server")))

;;;; run-ids
;;    if #f use *db-local-sync* : or 'local-sync-flags
;;    if #t use timestamps      : or 'timestamps
(define (common:sync-to-megatest.db dbstruct) 
  (let ((start-time         (current-seconds)))
    (db:multi-db-sync dbstruct 'new2old)
    (let ((sync-time (- (current-seconds) start-time)))
      (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds")
      (if (common:low-noise-print 30 "sync new to old")
	  (debug:print-info 0 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds")))))


;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
;;
(define (common:watchdog)
  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync (common:run-sync?))
	(debug-mode  (debug:debug-mode 1))
	(last-time   (current-seconds)))

    (if legacy-sync
	(let ((dbstruct (db:setup)))

	  (let loop ()
	    ;; sync for filesystem local db writes
	    ;;
	    (mutex-lock! *db-multi-sync-mutex*)
	    (let* ((need-sync        (>= *db-last-write* *db-last-sync*)) ;; no sync since last write
		   (sync-in-progress *db-sync-in-progress*)
		   (should-sync      (> (- (current-seconds) *db-last-sync*) 5)) ;; sync every five seconds minimum
		   (will-sync        (and (or need-sync should-sync)
					  (not sync-in-progress)))
		   (start-time       (current-seconds)))

	      (if will-sync (set! *db-sync-in-progress* #t))
	      (mutex-unlock! *db-multi-sync-mutex*)
	      (if will-sync (common:sync-to-megatest.db dbstruct)) 








	      (if will-sync
		  (begin
		    (mutex-lock! *db-multi-sync-mutex*)
		    (set! db-sync-in-progress* #f)
		    (set! *db-last-sync* start-time)
		    (mutex-unlock! *db-multi-sync-mutex*)))
	      (if (and debug-mode
		       (> (- start-time last-time) 60))
		  (begin
		    (set! last-time start-time)
		    (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))







>
>
>
|
|





|
|



|
>









>


>










>


|
>
>
>
>
>
>
>
>



|







510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
  (message-digest-string (md5-primitive) *toppath*))

;;======================================================================
;; E X I T   H A N D L I N G
;;======================================================================

(define (common:run-sync?)
  (let ((ohh (common:on-homehost?))
	(srv (args:get-arg "-server")))
    (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv)
    (and (common:on-homehost?)
	 (args:get-arg "-server"))))

;;;; run-ids
;;    if #f use *db-local-sync* : or 'local-sync-flags
;;    if #t use timestamps      : or 'timestamps
(define (common:sync-to-megatest.db dbstruct) 
  (let ((start-time         (current-seconds))
	(res                (db:multi-db-sync dbstruct 'new2old)))
    (let ((sync-time (- (current-seconds) start-time)))
      (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds")
      (if (common:low-noise-print 30 "sync new to old")
	  (debug:print-info 0 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds")))
    res))

;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
;;
(define (common:watchdog)
  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync (common:run-sync?))
	(debug-mode  (debug:debug-mode 1))
	(last-time   (current-seconds)))
    (debug:print-info 0 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync)
    (if legacy-sync
	(let ((dbstruct (db:setup)))
	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
	  (let loop ()
	    ;; sync for filesystem local db writes
	    ;;
	    (mutex-lock! *db-multi-sync-mutex*)
	    (let* ((need-sync        (>= *db-last-write* *db-last-sync*)) ;; no sync since last write
		   (sync-in-progress *db-sync-in-progress*)
		   (should-sync      (> (- (current-seconds) *db-last-sync*) 5)) ;; sync every five seconds minimum
		   (will-sync        (and (or need-sync should-sync)
					  (not sync-in-progress)))
		   (start-time       (current-seconds)))
	      ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
	      (if will-sync (set! *db-sync-in-progress* #t))
	      (mutex-unlock! *db-multi-sync-mutex*)
	      (if will-sync
		  (let ((res (common:sync-to-megatest.db dbstruct))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
		    (if (> res 0) ;; some records were transferred, keep the db alive
			(begin
			  (mutex-lock! *heartbeat-mutex*)
			  (set! *db-last-access* (current-seconds))
			  (mutex-unlock! *heartbeat-mutex*)
			  (debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
			(debug:print-info 2 *default-log-port* "sync called but zero records transferred"))))
	      (if will-sync
		  (begin
		    (mutex-lock! *db-multi-sync-mutex*)
		    (set! *db-sync-in-progress* #f)
		    (set! *db-last-sync* start-time)
		    (mutex-unlock! *db-multi-sync-mutex*)))
	      (if (and debug-mode
		       (> (- start-time last-time) 60))
		  (begin
		    (set! last-time start-time)
		    (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
790
791
792
793
794
795
796


797
798
799
800
801





802
803
804
805
806
807
808
	(if target
	    (begin
	      (debug:print-error 0 *default-log-port* "Invalid target, spaces or blanks not allowed \"" target "\", target should be: " (string-intersperse keys "/") ", have " tlist " for elements")
	      #f)
	    #f))))

;; logic for getting homehost. Returns (host . at-home)


;;
(define (common:get-homehost)
  (cond
   (*home-host*     *home-host*)
   ((not *toppath*) #f)            ;; don't know toppath yet? return #f





   (else
    (let* ((currhost (get-host-name))
	   (bestadrs (server:get-best-guess-address currhost))
	   ;; first look in config, then look in file .homehost, create it if not found
	   (homehost (or (configf:lookup *configdat* "server" "homehost" )
			 (let ((hhf (conc *toppath* "/.homehost")))
			   (if (file-exists? hhf)







>
>

|


|
>
>
>
>
>







803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
	(if target
	    (begin
	      (debug:print-error 0 *default-log-port* "Invalid target, spaces or blanks not allowed \"" target "\", target should be: " (string-intersperse keys "/") ", have " tlist " for elements")
	      #f)
	    #f))))

;; logic for getting homehost. Returns (host . at-home)
;; IF *toppath* is not set, wait up to five seconds trying every two seconds
;; (this is to accomodate the watchdog)
;;
(define (common:get-homehost #!key (trynum 5))
  (cond
   (*home-host*     *home-host*)
   ((not *toppath*)
    (if (> trynum 0)
	(begin
	  (thread-sleep! 2)
	  (common:get-homehost trynum: (- trynum 1)))
	#f))
   (else
    (let* ((currhost (get-host-name))
	   (bestadrs (server:get-best-guess-address currhost))
	   ;; first look in config, then look in file .homehost, create it if not found
	   (homehost (or (configf:lookup *configdat* "server" "homehost" )
			 (let ((hhf (conc *toppath* "/.homehost")))
			   (if (file-exists? hhf)
845
846
847
848
849
850
851

852
853
854
855
856
857
858
		     (talb (cdr listb)))
	    (if (equal? heda hedb)
		(if (null? tala) ;; we are done
		    talb
		    (loop (car tala)
			  (cdr tala)
			  (car talb)

			  (cdr talb)))
		#f)))))

;; Needed for long lists to be sorted where (apply max ... ) dies
;;
(define (common:max inlst)
  (let loop ((max-val (car inlst))







>







865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
		     (talb (cdr listb)))
	    (if (equal? heda hedb)
		(if (null? tala) ;; we are done
		    talb
		    (loop (car tala)
			  (cdr tala)
			  (car talb)
			  
			  (cdr talb)))
		#f)))))

;; Needed for long lists to be sorted where (apply max ... ) dies
;;
(define (common:max inlst)
  (let loop ((max-val (car inlst))

Modified db.scm from [78f8818c84] to [778d4cf187].

796
797
798
799
800
801
802
803

804
805
806
807
808
809
810
  (if (not (launch:setup))
      (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.")
      (let* ((mtdb     (dbr:dbstruct-mtdb dbstruct))
	     (tmpdb    (dbr:dbstruct-tmpdb dbstruct))
             (refndb   (dbr:dbstruct-refndb dbstruct))
	     (allow-cleanup #t) ;; (if run-ids #f #t))
	     (tdbdat  (tasks:open-db))
	     (servers (tasks:get-all-servers (db:delay-if-busy tdbdat))))

    
	;; kill servers
	(if (member 'killservers options)
	    (for-each
	     (lambda (server)
	       (tasks:server-delete-record (db:delay-if-busy tdbdat) (vector-ref server 0) "dbmigration")
	       (tasks:kill-server (vector-ref server 2)(vector-ref server 1)))







|
>







796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
  (if (not (launch:setup))
      (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.")
      (let* ((mtdb     (dbr:dbstruct-mtdb dbstruct))
	     (tmpdb    (dbr:dbstruct-tmpdb dbstruct))
             (refndb   (dbr:dbstruct-refndb dbstruct))
	     (allow-cleanup #t) ;; (if run-ids #f #t))
	     (tdbdat  (tasks:open-db))
	     (servers (tasks:get-all-servers (db:delay-if-busy tdbdat)))
	     (data-synced 0)) ;; count of changed records (I hope)
    
	;; kill servers
	(if (member 'killservers options)
	    (for-each
	     (lambda (server)
	       (tasks:server-delete-record (db:delay-if-busy tdbdat) (vector-ref server 0) "dbmigration")
	       (tasks:kill-server (vector-ref server 2)(vector-ref server 1)))
842
843
844
845
846
847
848

849


850
851
852
853
854
855
856
;; 		   (sqlite3:finalize! (db:dbdat-get-db (dbr:dbstruct-rundb dbstruct)))))
;; 	       run-ids)))

	;; now ensure all newdb data are synced to megatest.db
	;; do not use the run-ids list passed in to the function
	;;
	(if (member 'new2old options)

	    (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb))



        (if (member 'fixschema options)
            (begin
              (db:patch-schema-maindb (db:dbdat-get-db mtdb))
              (db:patch-schema-maindb (db:dbdat-get-db tmpdb))
              (db:patch-schema-maindb (db:dbdat-get-db refndb))
              (db:patch-schema-rundb  (db:dbdat-get-db mtdb))







>
|
>
>







843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
;; 		   (sqlite3:finalize! (db:dbdat-get-db (dbr:dbstruct-rundb dbstruct)))))
;; 	       run-ids)))

	;; now ensure all newdb data are synced to megatest.db
	;; do not use the run-ids list passed in to the function
	;;
	(if (member 'new2old options)
	    (set! data-synced
		  (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb)
		      data-synced)))


        (if (member 'fixschema options)
            (begin
              (db:patch-schema-maindb (db:dbdat-get-db mtdb))
              (db:patch-schema-maindb (db:dbdat-get-db tmpdb))
              (db:patch-schema-maindb (db:dbdat-get-db refndb))
              (db:patch-schema-rundb  (db:dbdat-get-db mtdb))
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
;; 		    (begin
;; 		      (debug:print 0 *default-log-port* "Removing database file for deleted run " fullname)
;; 		      (delete-file fullname)))))
;; 	    dead-runs))))
;; 
	;; (db:close-all dbstruct)
	;; (sqlite3:finalize! mdb)
	)))

;; keeping it around for debugging purposes only
(define (open-run-close-no-exception-handling  proc idb . params)
  (debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params)
  (print "I don't work anymore. open-run-close-no-exception-handling needs fixing or removing...")
  (exit)
  (if (or *db-write-access*







|







921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
;; 		    (begin
;; 		      (debug:print 0 *default-log-port* "Removing database file for deleted run " fullname)
;; 		      (delete-file fullname)))))
;; 	    dead-runs))))
;; 
	;; (db:close-all dbstruct)
	;; (sqlite3:finalize! mdb)
	data-synced)))

;; keeping it around for debugging purposes only
(define (open-run-close-no-exception-handling  proc idb . params)
  (debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params)
  (print "I don't work anymore. open-run-close-no-exception-handling needs fixing or removing...")
  (exit)
  (if (or *db-write-access*

Modified server.scm from [0ee3e1c9e6] to [185590173a].

184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
	(let ((res (handle-exceptions
		    exn
		    #f ;; failed for some reason, for the moment simply return #f
		    (with-output-to-file server-file
		      (lambda ()
			(print hostport)))
		    #t)))
	  (debug:print-info 0 *default-log-port* "server file " serverfile " for " hostport " created")
	  (common:simple-file-release-lock lock-file)
	  res)
	#f)))

(define (server:remove-dotserver-file areapath hostport)
  (let ((dotserver   (server:read-dotserver areapath))
	(server-file (conc areapath "/.server"))







|







184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
	(let ((res (handle-exceptions
		    exn
		    #f ;; failed for some reason, for the moment simply return #f
		    (with-output-to-file server-file
		      (lambda ()
			(print hostport)))
		    #t)))
	  (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " created")
	  (common:simple-file-release-lock lock-file)
	  res)
	#f)))

(define (server:remove-dotserver-file areapath hostport)
  (let ((dotserver   (server:read-dotserver areapath))
	(server-file (conc areapath "/.server"))