Megatest

Diff
Login

Differences From Artifact [cc561d90e9]:

To Artifact [44aa462a83]:


230
231
232
233
234
235
236

237
238
239
240
241
242
243
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244







+







;; client side handler
;;
;;(tt:handler #<tt> get-keys #f () 2 #f "/home/matt/data/megatest/ext-tests" #f "main.db" "ext-tests" "/home/matt/data/megatest/bin/.22.04/../megatest")
;;
(define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe)
  (debug:print 2 *default-log-port* "tt:handler cmd: " cmd " run-id: " run-id " attemptnum: " attemptnum)
  ;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now.
  ;; connect-to-server will start a server if needed.
  (let* ((conn (tt:client-connect-to-server ttdat dbfname run-id testsuite))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f)))
    (if conn
	;; have connection, call the server
	(let* ((res (tt:send-receive ttdat conn cmd run-id params)))
	  ;; res is (status errmsg result meta)
         ; (debug:print 0 *default-log-port* "conn:" conn " res: " res)
	  (match res
258
259
260
261
262
263
264
265

266
267
268
269
270
271
272
259
260
261
262
263
264
265

266
267
268
269
270
271
272
273







-
+







	       ((loaded)
		(debug:print 0 *default-log-port* "WARNING: server for "dbfname" is loaded, slowing queries.")
		(tt:backoff-incr (tt-conn-host conn)(tt-conn-port conn))
		result) ;; (tt:handler  ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
	       (else
		result)))
	    (else ;; did not receive properly formated result
	     (if (not res) ;; tt:handler is telling us that communication failed
	     (if (not res) ;; tt:send-receive telling us that communication failed
		 (let* ((host    (tt-conn-host conn))
			(port    (tt-conn-port conn))
			;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db
			(pid     (tt-conn-pid  conn))
                        ;;(servinf (tt-conn-servinf-file conn))) 
			(servinf (tt-servinf-file ttdat))) ;; (conc areapath"/.servinfo/"host":"port"-"pid":"dbfname))) ;; TODO, use (server:get-servinfo-dir areapath)
		   (hash-table-set! (tt-conns ttdat) dbfname #f)
291
292
293
294
295
296
297
298

299
300
301
302
303
304
305
292
293
294
295
296
297
298

299
300
301
302
303
304
305
306







-
+







				     ;; start server - addressed in client-connect-to-server
				     ;; delay        - addressed in client-connect-to-server
				     ;; try again
				     (thread-sleep! 0.25) ;; dunno, I think this needs to be here
				     (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
				   ))))
		       (begin ;; no server file, delay and try again
			 (debug:print 2 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", but do not see servinf file "servinf)
			 (debug:print 2 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", no servinf file. Server exited? ")
			 (thread-sleep! 0.5)
			 (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))))
		 (begin ;; this case is where res is malformed. Probably should abort
		   (assert #f "FATAL: tt:handler received bad data "res)
		   ;; (debug:print 0 *default-log-port* "INFO: got corrupt data from server "host":"port", "res", for "dbfname", will try again.")
		   ;; (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)
		   )))))
474
475
476
477
478
479
480

481

482
483
484
485
486
487
488
475
476
477
478
479
480
481
482

483
484
485
486
487
488
489
490







+
-
+







(define (tt:start-server areapath run-id dbfname-in handler keys)
  (assert areapath "FATAL: areapath not provided for tt:start-server")
  ;; is there already a server for this dbfile? Then exit.
  (debug:print 2 *default-log-port* "tt:start-server: " dbfname-in)
  (let* ((ttdat   (make-tt areapath: areapath))
	 (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id)))
	 (servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead
         (debug:print 0 *default-log-port* "Found " (length servers) " already running for " dbfname)
    (if (> (length servers) 4)
    (if (> (length servers) 0)
	(begin
	  (debug:print 0 *default-log-port* "INFO: found server(s) already running for db "dbfname", "(string-intersperse servers ",")" Exiting.")
	  (exit))
	(let* ((dbstruct   (dbmod:open-dbmoddb areapath run-id dbfname (dbfile:db-init-proc) keys)))
	  (tt-handler-set! ttdat (handler dbstruct))
	  (let* ((tcp-thread (make-thread
			      (lambda ()
535
536
537
538
539
540
541
542
543

544
545
546
547
548
549
550
537
538
539
540
541
542
543


544
545
546
547
548
549
550
551







-
-
+







		    (if (tt-cleanup-proc ttdat)
			((tt-cleanup-proc ttdat)))
		    (dbfile:with-no-sync-db nosyncdbpath
					    (lambda (db)
					      (let* ((dbtmpname (dbr:dbstruct-dbtmpname dbstruct)))
						(debug:print-info 0 *default-log-port* "Running clean up, including removing db file "dbtmpname)
						(db:no-sync-del! db dbfname)
						#;(if dbtmpname
						    (delete-file dbtmpname))))))))
                                                ))))))
    (set! *server-info* ttdat)
    (let loop ((count 0))
      (if (> count 240)
	  (begin
	    (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.")
	    (exit 1))
	  (if (not (tt-port ttdat)) ;; no connection yet
583
584
585
586
587
588
589

590
591
592
593
594
595
596
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598







+







						(debug:print 0 *default-log-port* "Failed to get server lock for "dbfname)
						#f))))))
			  (if (and res (common:low-noise-print 120 "top server message"))
			      (debug:print-info 0 *default-log-port* "Keep running, I'm the top server for "
						dbfname" on "(tt-host ttdat)":"(tt-port ttdat)))
			  res))
		       (else
                        ;; wrong servinfo file
			(debug:print-info 0 *default-log-port* "I'm not the lead server: "servers)
			(let* ((leadsrv (car servers)))
			  (match leadsrv
			    ((host port startseconds server-id pid dbfname servinfofile)
			     (let* ((result  (tt:timed-ping host port server-id))
				    (res     (car result))
				    (ping    (cdr result)))