Megatest

Check-in [15a030a849]
Login
Overview
Comment:Disabled run/remove lock using no-sync. It slows down things way too much and it seems the locking didn't help anything
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.64 | v1.6425
Files: files | file ages | folders
SHA1: 15a030a8499701fd82c3bbb7df3d7f14dd626d61
User & Date: matt on 2017-07-13 07:38:01
Other Links: branch diff | manifest | tags
Context
2017-07-13
23:14
Increase threshold to 1 sec before running sync as separate process. Every hour do a full sync to ensure no data gets dropped. Added simple usage logger check-in: 79d2c0eca7 user: matt tags: v1.64
07:38
Disabled run/remove lock using no-sync. It slows down things way too much and it seems the locking didn't help anything check-in: 15a030a849 user: matt tags: v1.64, v1.6425
07:04
Use WAL mode on no-sync db check-in: 18c376fe83 user: matt tags: v1.64
Changes

Modified launch.scm from [efe1a9c8e9] to [8ce078ac53].

1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255













1256
1257
1258
1259
1260
1261
1262
1236
1237
1238
1239
1240
1241
1242













1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262







-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+







;; 3. create link from run dir to megatest runs area 
;; 4. remotely run the test on allocated host
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
  (let* ((lock-key        (conc "test-" test-id))
	 (got-lock        (let loop ((lock        (rmt:no-sync-get-lock lock-key))
				     (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
			    (if (car lock)
				#t
				(if (> (current-seconds) expire-time)
				    (begin
				      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path)
				      (rmt:no-sync-del! lock-key) ;; destroy the lock
				      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
				    (begin
				      (thread-sleep! 1)
				      (loop (rmt:no-sync-get-lock lock-key) expire-time))))))
  (let* ( ;; (lock-key        (conc "test-" test-id))
	;; (got-lock        (let loop ((lock        (rmt:no-sync-get-lock lock-key))
	;; 			     (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
	;; 		    (if (car lock)
	;; 			#t
	;; 			(if (> (current-seconds) expire-time)
	;; 			    (begin
	;; 			      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path)
	;; 			      (rmt:no-sync-del! lock-key) ;; destroy the lock
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
	;; 			    (begin
	;; 			      (thread-sleep! 1)
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time))))))
	 (item-path       (item-list->path itemdat))
	 (contour         #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
    (let loop ((delta        (- (current-seconds) *last-launch*))
	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1)))
      (if (> launch-delay delta)
	  (begin
	    (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
1425
1426
1427
1428
1429
1430
1431
1432

1433
1434
1435
1436
1437
1438
1439
1425
1426
1427
1428
1429
1430
1431

1432
1433
1434
1435
1436
1437
1438
1439







-
+







					      cmdstr
					      (conc cmdstr " >> mt_launch.log 2>&1 &")))
					(car fullcmd))
				    (if useshell
					'()
					(cdr fullcmd)))))
        (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork.
	(rmt:no-sync-del! lock-key)         ;; release the lock for starting this test
	;; (rmt:no-sync-del! lock-key)         ;; release the lock for starting this test
	(if (not launchwait) ;; give the OS a little time to allow the process to start
	    (thread-sleep! 0.01))
	(with-output-to-file "mt_launch.log"
	  (lambda ()
	    (print "LAUNCHCMD: " (string-intersperse fullcmd " "))
	    (if (list? launch-results)
		(apply print launch-results)

Modified megatest-version.scm from [a6d6e89240] to [98eab1891e].

1
2
3
4
5
6

7
1
2
3
4
5

6
7





-
+

;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..

(declare (unit megatest-version))

(define megatest-version 1.6424)
(define megatest-version 1.6425)

Modified runs.scm from [642d238014] to [a45c56992f].

1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980














1981
1982
1983
1984
1985
1986
1987
1961
1962
1963
1964
1965
1966
1967













1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988







-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+







  (let* ((run-dir       (db:test-get-rundir test))    ;; run dir is from the link tree
	 (real-dir      (if (common:file-exists? run-dir)
			    ;; (resolve-pathname run-dir)
			    (common:nice-path run-dir)
			    #f))
         (clean-mode    (or mode 'remove-all))
         (test-id       (db:test-get-id test))
         (lock-key      (conc "test-" test-id))
         (got-lock      (let loop ((lock        (rmt:no-sync-get-lock lock-key))
				     (expire-time (+ (current-seconds) 30))) ;; give up on getting the lock and steal it after 15 seconds
			    (if (car lock)
				#t
				(if (> (current-seconds) expire-time)
				    (begin
				      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to clean test with id " test-id)
				      (rmt:no-sync-del! lock-key) ;; destroy the lock
				      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
				    (begin
				      (thread-sleep! 1)
				      (loop (rmt:no-sync-get-lock lock-key) expire-time)))))))
        ;; (lock-key      (conc "test-" test-id))
        ;; (got-lock      (let loop ((lock        (rmt:no-sync-get-lock lock-key))
	;; 			     (expire-time (+ (current-seconds) 30))) ;; give up on getting the lock and steal it after 15 seconds
	;; 		    (if (car lock)
	;; 			#t
	;; 			(if (> (current-seconds) expire-time)
	;; 			    (begin
	;; 			      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to clean test with id " test-id)
	;; 			      (rmt:no-sync-del! lock-key) ;; destroy the lock
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
	;; 			    (begin
	;; 			      (thread-sleep! 1)
	 ;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)))))))
	 )
    (case clean-mode
      ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f))
      ((remove-all)      (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f))
      ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f)))
    (debug:print-info 1 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
    (if (and real-dir 
	     (> (string-length real-dir) 5)
2014
2015
2016
2017
2018
2019
2020
2021


2022
2023
2024
2025
2026
2027
2028
2015
2016
2017
2018
2019
2020
2021

2022
2023
2024
2025
2026
2027
2028
2029
2030







-
+
+







		(debug:print 0 *default-log-port* "NOTE: the run dir for this test is undefined. Test may have already been deleted."))
	    ))
    ;; Only delete the records *after* removing the directory. If things fail we have a record 
    (case clean-mode
      ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f))
      ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f))
      (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test))))
    (rmt:no-sync-del! lock-key)))
    ;; (rmt:no-sync-del! lock-key)
    ))

;;======================================================================
;; Routines for manipulating runs
;;======================================================================

;; Since many calls to a run require pretty much the same setup 
;; this wrapper is used to reduce the replication of code