Megatest

Check-in [6bbd0fa9a2]
Login
Overview
Comment:Merged fork
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v2.0001 | ulex-smoketest
Files: files | file ages | folders
SHA1: 6bbd0fa9a2a8dafe12efd3894193813f5716fd16
User & Date: matt on 2022-01-13 18:50:14
Other Links: branch diff | manifest | tags
Context
2022-01-14
08:15
Merged changes from v2.0001 check-in: 3253e6faaa user: matt tags: v2.0001-ulex-one-shot
00:34
Removed need for old hostinfo egg check-in: c3e3d94576 user: matt tags: v2.0001
2022-01-13
18:50
Merged fork check-in: 6bbd0fa9a2 user: matt tags: v2.0001, ulex-smoketest
16:38
Added work and notification mailboxes to tcp-server demo stuff check-in: 167b804135 user: jmoon18 tags: v2.0001
2022-01-12
16:40
wip, misc cleanup and reduce some messages. check-in: 20b4054f76 user: matt tags: v2.0001
Changes

Modified dashboard.scm from [cc72246a09] to [4505f63ba6].

3640
3641
3642
3643
3644
3645
3646
3647

3648
3649
3650
3651
3652
3653
3654
3640
3641
3642
3643
3644
3645
3646

3647
3648
3649
3650
3651
3652
3653
3654







-
+







(define (dashboard:do-update-rundat tabdat)
  (dboard:update-rundat
   tabdat
   (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "runname" "%")
   (dboard:tabdat-numruns tabdat)
   (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "test-name" "%/%")
   ;; generate key patterns from the target stored in tabdat
   (let* ((dbkeys (dboard:tabdat-dbkeys tabdat)))
   (let* ((dbkeys  (dboard:tabdat-dbkeys tabdat)))
     (let ((fres   (if (dboard:tabdat-target tabdat)
                       (let ((ptparts (append (dboard:tabdat-target tabdat)(make-list (length dbkeys) "%"))))
                         (map (lambda (k v)(list k v)) dbkeys ptparts))
                       (let ((res '()))
                         (for-each (lambda (key)
                                     (if (not (equal? key "runname"))
                                         (let ((val (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) key #f)))

Modified dbmod.scm from [4d2069b432] to [ac637164a6].

691
692
693
694
695
696
697


698

699
700
701
702
703
704
705
691
692
693
694
695
696
697
698
699

700
701
702
703
704
705
706
707







+
+
-
+







;; ;;     (mutex-unlock! *db-multi-sync-mutex*)
;; ;;     (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)))

;; NOTE: touched logic is disabled/not done
;; sync run to disk if touched
;;
(define (db:sync-inmem->disk dbstruct apath dbfile #!key (force-sync #f))
  (if #f
      (debug:print-info 0 *default-log-port* "syncing "*toppath*" "dbfile" at "(current-seconds))
  #f) ;; disabled
      #f)) ;; disabled
;;   (let* ((dbdat       (db:get-dbdat dbstruct apath dbfile))
;; 	 (dbfullname  (conc apath "/" dbfile))
;; 	 (db          (db:open-run-db dbfullname db:initialize-db)) ;; (dbr:dbdat-db dbdat))
;; 	 (inmem       (dbr:dbdat-inmem dbdat))
;; 	 (start-t     (current-seconds))
;; 	 (last-update (dbr:dbdat-last-write dbdat))
;; 	 (last-sync   (dbr:dbdat-last-sync dbdat)))

Modified rmtmod.scm from [a8f42f4480] to [e895913787].

2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2146
2147
2148
2149
2150
2151
2152

2153
2154
2155
2156
2157
2158
2159







-







			(debug:print-info 0 *default-log-port* "Starting watchdog thread (in state "(thread-state watchdog)")")
			(thread-start! watchdog))
		      (debug:print-info 0 *default-log-port* "Not starting watchdog thread (in state "(thread-state watchdog)")"))
		  (debug:print 0 *default-log-port* "ERROR: *watchdog* not setup, cannot start it."))
	      #;(loop (+ count 1) bad-sync-count start-time)
	      ))
	
	(debug:print-info 0 *default-log-port* "syncing "*toppath*" "dbname" at "(current-seconds))
	(db:sync-inmem->disk *dbstruct-db* *toppath* dbname force-sync: #t)
	
	(mutex-unlock! *heartbeat-mutex*)
	
	;; when things go wrong we don't want to be doing the various
	;; queries too often so we strive to run this stuff only every
	;; four seconds or so.

Modified ulex/ulex.scm from [70c15d4319] to [81b8992868].

217
218
219
220
221
222
223
224

225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244



















































245
246
247
248
249

250
251
252
253
254
255
256
257
258









259
260

261
262

263
264
265
266

267
268
269
270
271
272
273
274
275
276
277
278



279
280
281
282
283


284
285
286
287
288
289
290
291

292
293
294
295
296
297


298
299
300

301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321

322
323
324
325
326
327
328
217
218
219
220
221
222
223

224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301









302
303
304
305
306
307
308
309
310


311


312




313












314
315
316





317
318








319






320
321



322








323
324
325
326
327
328
329
330
331
332
333
334

335
336
337
338
339
340
341
342







-
+




















+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+





+
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
-
-
+
-
-
+
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
-
-
-
-
-
+
+
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
+
+
-
-
-
+
-
-
-
-
-
-
-
-












-
+







;;        - I believe (without substantial evidence) that re-using connections will
;;          be beneficial ...
;;
(define (send udata host-port qrykey cmd params)
  (let* ((my-host-port (udat-host-port udata))          ;; remote will return to this
	 (isme         #f #;(equal? host-port my-host-port)) ;; calling myself?
	 ;; dat is a self-contained work block that can be sent or handled locally
	 (dat          (list my-host-port qrykey cmd params)))
	 (dat          (list my-host-port qrykey cmd params #;(cons (current-seconds)(current-milliseconds)))))
    (cond
     (isme (ulex-handler udata dat)) ;; no transmission needed
     (else
      (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC?
	  exn
	  #f
	(begin
	  ;; (mutex-lock! *send-mutex*)
	  (let-values (((inp oup)(tcp-connect host-port)))
	    (let ((res (if (and inp oup)
			   (begin
			     (serialize dat oup)
			     (deserialize inp))
			   (begin
			     (print "ERROR: send called but no receiver has been setup. Please call setup first!")
			     #f))))
	      (close-input-port inp)
	      (close-output-port oup)
	      ;; (mutex-unlock! *send-mutex*)
	      res)))))))) ;; res will always be 'ack unless return-method is direct

(define (send-via-polling uconn host-port cmd data)
  (let* ((qrykey (make-cookie uconn))
	 (sres   (send uconn host-port qrykey cmd data)))
    (case sres
      ((ack)
       (let loop ((start-time (current-milliseconds)))
	 (if (> (current-milliseconds)(+ start-time 10000)) ;; ten seconds timeout
	     (begin
	       (print "ULEX ERROR: timed out waiting for response from "host-port", "cmd" "data)
	       #f)
	     (let* ((result (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) ;; NOTE: we are re-using mboxes hash
	       (if result ;; result is '(status . result-data) or #f for nothing yet
		   (begin
		     (hash-table-delete! (udat-mboxes uconn) qrykey)
		     (cdr result))
		   (begin
		     (thread-sleep! 0.01)
		     (loop start-time)))))))
      (else
       (print "ULEX ERROR: Communication failed? sres="sres)
       #f))))

(define (send-via-mailbox uconn host-port cmd data)
  (let* ((cmbox     (get-cmbox uconn)) ;; would it be better to keep a stack of mboxes to reuse?
	 (qrykey    (car cmbox))
	 (mbox      (cdr cmbox))
	 (mbox-time (current-milliseconds))
	 (sres      (send uconn host-port qrykey cmd data))) ;; short res
    (if (eq? sres 'ack)
	(let* ((mbox-timeout-secs    120 #;(if (eq? 'primordial (thread-name (current-thread)))
				     #f
				     120)) ;; timeout)
	       (mbox-timeout-result 'MBOX_TIMEOUT)
	       (res                  (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result))
	       (mbox-receive-time    (current-milliseconds)))
	  ;; (put-cmbox uconn cmbox) ;; reuse mbox and cookie. is it worth it?
	  (hash-table-delete! (udat-mboxes uconn) qrykey)
	  (if (eq? res 'MBOX_TIMEOUT)
	      (begin
		(print "WARNING: mbox timed out for query "cmd", with data "data
		       ", waiting for response from "host-port".")

		;; here it might make sense to clean up connection records and force clean start?
		;; NO. The progam using ulex needs to do the reset. Right thing here is exception
		
		#f)  ;; convert to raising exception?
	      res))
	(begin
	  (print "ERROR: Communication failed? Got "sres)
	  #f))))
  
;; send a request to the given host-port and register a mailbox in udata
;; wait for the mailbox data and return it
;;
(define (send-receive uconn host-port cmd data)
  (let* ((start-time (current-milliseconds))
  (cond
   ((member cmd '(ping goodbye)) ;; these are immediate
    (send uconn host-port 'ping cmd data))
   ((eq? (work-method) 'direct)
    ;; the result from send will be the actual result, not an 'ack
    (send uconn host-port 'direct cmd data))
   (else
    (case (return-method)
      ((polling)
	 (result     (cond
		      ((member cmd '(ping goodbye)) ;; these are immediate
		       (send uconn host-port 'ping cmd data))
		      ((eq? (work-method) 'direct)
		       ;; the result from send will be the actual result, not an 'ack
		       (send uconn host-port 'direct cmd data))
		      (else
		       (case (return-method)
			 ((polling)
       (let* ((qrykey (make-cookie uconn))
	      (sres   (send uconn host-port qrykey cmd data)))
			  (send-via-polling uconn host-port cmd data))
	 (case sres
	   ((ack)
			 ((mailbox) 
	    (let loop ((start-time (current-milliseconds)))
	      (if (> (current-milliseconds)(+ start-time 10000)) ;; ten seconds timeout
		  (begin
		    (print "ULEX ERROR: timed out waiting for response from "host-port", "cmd" "data)
			  (send-via-mailbox uconn host-port cmd data))
		    #f)
		  (let* ((result (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) ;; NOTE: we are re-using mboxes hash
		    (if result ;; result is '(status . result-data) or #f for nothing yet
			(begin
			  (hash-table-delete! (udat-mboxes uconn) qrykey)
			  (cdr result))
			(begin
			  (thread-sleep! 0.01)
			  (loop start-time)))))))
	   (else
	    (print "ULEX ERROR: Communication failed? sres="sres)
	    #f))))
			 (else
			  (print "ULEX ERROR: unrecognised return-method "(return-method)".")
			  #f))))))
      ((mailbox) 
       (let* ((cmbox     (get-cmbox uconn)) ;; would it be better to keep a stack of mboxes to reuse?
	      (qrykey    (car cmbox))
	      (mbox      (cdr cmbox))
	      (mbox-time (current-milliseconds))
    ;; this is ONLY for development and debugging. It will be removed once Ulex is stable.
    (if (< 5000 (- (current-milliseconds) start-time))
	      (sres      (send uconn host-port qrykey cmd data))) ;; short res
	 (if (eq? sres 'ack)
	     (let* ((mbox-timeout-secs    120 #;(if (eq? 'primordial (thread-name (current-thread)))
					  #f
					  120)) ;; timeout)
		    (mbox-timeout-result 'MBOX_TIMEOUT)
		    (res                  (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result))
		    (mbox-receive-time    (current-milliseconds)))
	(print "ULEX WARNING: round-trip took over 5 seconds; "
	       ;; (put-cmbox uconn cmbox) ;; reuse mbox and cookie. is it worth it?
	       (hash-table-delete! (udat-mboxes uconn) qrykey)
	       (if (eq? res 'MBOX_TIMEOUT)
		   (begin
		     (print "WARNING: mbox timed out for query "cmd", with data "data", waiting for response from "host-port".")

	       cmd", host-port="host-port", data="data))
    result))
		     ;; here it might make sense to clean up connection records and force clean start?
		     ;; NO. The progam using ulex needs to do the reset. Right thing here is exception
		     
    
		     #f)  ;; convert to raising exception?
		   res))
	     (begin
	       (print "ERROR: Communication failed? Got "sres)
	       #f))))
      (else
       (print "ULEX ERROR: unrecognised return-method "(return-method)".")
       #f)))))

;;======================================================================
;; responder side
;;======================================================================

;; take a request, rdat, and if not immediate put it in the work queue
;;
;; Reserved cmds; ack ping goodbye response
;;
(define (ulex-handler uconn rdat)
  (assert (list? rdat) "FATAL: ulex-handler give rdat as not list")
  (match rdat ;;  (string-split controldat)
    ((rem-host-port qrykey cmd params)
    ((rem-host-port qrykey cmd params);; timedata)
     ;; (print "ulex-handler got: "rem-host-port" qrykey: "qrykey" cmd: "cmd" params: "params)
     (case cmd
       ;; ((ack )(print "Got ack! But why? Should NOT get here.") 'ack)
       ((ping)
	;; (print "Got Ping!")
	;; (add-to-work-queue uconn rdat)
	'ack)