Megatest

Check-in [c8ec459e79]
Login
Overview
Comment:Reduce zombie cleanup, not clear it helps
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80-dbperformance
Files: files | file ages | folders
SHA1: c8ec459e7977f96daed577850f94d7beec3bc76d
User & Date: matt on 2023-02-03 21:51:17
Other Links: branch diff | manifest | tags
Context
2023-02-04
20:00
Change default number of db's to 10, couple minor fixes. Closed-Leaf check-in: 7b86032d25 user: matt tags: v1.80-dbperformance
2023-02-03
21:51
Reduce zombie cleanup, not clear it helps check-in: c8ec459e79 user: matt tags: v1.80-dbperformance
21:50
Cleanup droop curve a bit check-in: 07b36fe461 user: matt tags: v1.80-dbperformance
Changes

Modified launch.scm from [b249e00a3b] to [3243ef2278].

203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
			(debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep)))
                        (file-close status-file)
                        )

                        ))))))

(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)
  (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30")))
         (start-seconds (current-seconds))
	 (calc-minutes  (lambda ()
			  (inexact->exact 
			   (round 
			    (- 
			     (current-seconds) 
			     start-seconds)))))







|







203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
			(debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep)))
                        (file-close status-file)
                        )

                        ))))))

(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)
  (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "60")))
         (start-seconds (current-seconds))
	 (calc-minutes  (lambda ()
			  (inexact->exact 
			   (round 
			    (- 
			     (current-seconds) 
			     start-seconds)))))
257
258
259
260
261
262
263

264
265
266
267
268
269
270
271
         ((equal? status "DEAD")
          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
          (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.")
          ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING
          (set! kill-job? #f)))

        (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)

        (launch:handle-zombie-tests run-id)
        (when do-sync
          ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
          ;;  (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
          ;; (common:telemetry-log "zombie" (conc  "launch:monitor-job - dosync started at "(current-seconds)))
          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
          ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))
	  )







>
|







257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
         ((equal? status "DEAD")
          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
          (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.")
          ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING
          (set! kill-job? #f)))

        (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
        (if (common:low-noise-print 600 "run zombie") ;; every five minutes is plenty
	    (launch:handle-zombie-tests run-id))
        (when do-sync
          ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
          ;;  (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
          ;; (common:telemetry-log "zombie" (conc  "launch:monitor-job - dosync started at "(current-seconds)))
          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
          ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))
	  )