Megatest

Check-in [c0c5c1cbc3]
Login
Overview
Comment:removed server-start from run
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.64
Files: files | file ages | folders
SHA1: c0c5c1cbc3035611fba45e1f52314479fcfb0a37
User & Date: bjbarcla on 2017-07-14 17:29:46
Other Links: branch diff | manifest | tags
Context
2017-07-17
08:50
Added simple script for observing close_wait check-in: 2d5edb0a67 user: mrwellan tags: v1.64
2017-07-16
20:39
First steps towards better archive handling check-in: 7323061194 user: matt tags: v1.64-better-archives
2017-07-14
17:29
removed server-start from run check-in: c0c5c1cbc3 user: bjbarcla tags: v1.64
17:21
Added get-file-descriptor-count function to common.scm check-in: 09e3c49fcb user: jmoon18 tags: v1.64
Changes

cgisetup/cgi-bin/models became a symlink with target [39c07627cc].

cgisetup/cgi-bin/pages became a symlink with target [e2b5ed002d].

Modified http-transport.scm from [e4a4e03b22] to [0a52055d0d].

304
305
306
307
308
309
310

311
312
313
314
315
316
317
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318







+







	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (handle-exceptions
	    exn
	    (begin
	      (print-call-chain *default-log-port*)
	      (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn)))
	    (close-connection! api-dat)
            (close-idle-connections!)
	    #t))
	#f)))


(define (make-http-transport:server-dat)(make-vector 6))
(define (http-transport:server-dat-get-iface         vec)    (vector-ref  vec 0))
(define (http-transport:server-dat-get-port          vec)    (vector-ref  vec 1))

Modified rmt.scm from [6f288ac3e2] to [677a774188].

228
229
230
231
232
233
234
235

236
237

238
239
240
241
242
243
244
228
229
230
231
232
233
234

235
236

237
238
239
240
241
242
243
244







-
+

-
+








     ;;DOT CASE11 [label="send_receive"];
     ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
     ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
     ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
     ;; not on homehost, do server query
     (else
      (mutex-unlock! *rmt-mutex*)
      ;; (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9")
      (mutex-lock! *rmt-mutex*)
      ;; (mutex-lock! *rmt-mutex*)
      (let* ((conninfo (remote-conndat runremote))
	     (dat      (case (remote-transport runremote)
			 ((http) (condition-case ;; handling here has caused a lot of problems. However it is needed to deal with attemtped communication to servers that have gone away
                                  (http-transport:client-api-send-receive 0 conninfo cmd params)
                                  ((commfail)(vector #f "communications fail"))
                                  ((exn)(vector #f "other fail" (print-call-chain)))))
			 (else

Modified runs.scm from [a45c56992f] to [df9cc9bbed].

324
325
326
327
328
329
330
331
332
333



334
335
336
337
338
339
340
324
325
326
327
328
329
330



331
332
333
334
335
336
337
338
339
340







-
-
-
+
+
+







					       (exit 4)))))
		       (thread-start! th2)
		       (thread-start! th1)
		       (thread-join! th2)))))
      (set-signal-handler! signal/int sighand)
      (set-signal-handler! signal/term sighand))

    ;; force the starting of a server
    (debug:print 0 *default-log-port* "waiting on server...")
    (server:start-and-wait *toppath*)
    ;; force the starting of a server -- removed BB 17ww28 - no longer needed.
    ;;(debug:print 0 *default-log-port* "waiting on server...")
    ;;(server:start-and-wait *toppath*)
    
    (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
    (set! runconf (if (common:file-exists? runconfigf)
		      (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target)
		      (begin
			(debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf)
			#f)))
594
595
596
597
598
599
600

601
602
603
604
605
606
607
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608







+







      (cdr reg)
      (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal
	  '()
	  reg)))

(define runs:nothing-left-in-queue-count 0)

;; BB: for future reference - suspect target vars are not expanded to env vars at this point (item expansion using [items]\nwhatever [system echo $TARGETVAR] doesnt work right whereas [system echo #{targetvar}] does.. Tal and Randy have tix on this.  on first pass, var not set, on second pass, ok.  
(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)))
			    (if (list? res)
				res
				(begin
				  (debug:print 0 *default-log-port*
1269
1270
1271
1272
1273
1274
1275

1276
1277
1278
1279
1280
1281
1282






1283
1284
1285
1286
1287
1288
1289
1270
1271
1272
1273
1274
1275
1276
1277
1278






1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291







+

-
-
-
-
-
-
+
+
+
+
+
+







			   testmode:    testmode
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))
	(runs:dat-regfull-set! runsdat regfull)

        ;; -- removed BB 17ww28 - no longer needed.
	;; every 15 minutes verify the server is there for this run
	(if (and (common:low-noise-print 240 "try start server"  run-id)
		 (not (or (and *runremote*
			       (remote-server-url *runremote*)
			       (server:ping (remote-server-url *runremote*)))
			  (server:check-if-running *toppath*))))
	    (server:kind-run *toppath*))
	;; (if (and (common:low-noise-print 240 "try start server"  run-id)
	;; 	 (not (or (and *runremote*
	;; 		       (remote-server-url *runremote*)
	;; 		       (server:ping (remote-server-url *runremote*)))
	;; 		  (server:check-if-running *toppath*))))
	;;     (server:kind-run *toppath*))
	
	(if (> num-running 0)
	  (set! last-time-some-running (current-seconds)))

      (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
	  (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*))
1407
1408
1409
1410
1411
1412
1413
1414

1415
1416
1417
1418
1419
1420
1421
1409
1410
1411
1412
1413
1414
1415

1416
1417
1418
1419
1420
1421
1422
1423







-
+







	  (if (null? tal)
	      #f
	      (loop (car tal)(cdr tal) reg reruns)))
	    
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS
	 ((or (procedure? items)(eq? items 'have-procedure))
	 ((or (procedure? items)(eq? items 'have-procedure)) ;; BB - target vars are env vars here? to allow expansion of [items]\nsomething [system echo $SOMETARGVAR], which is wonky
	  (let ((can-run-more    (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)))
	    (if (and (list? can-run-more)
		     (car can-run-more))
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)))
		  (if loop-list
		      (apply loop loop-list)))
		;; if can't run more just loop with next possible test