Megatest

Check-in [f69329ec9a]
Login
Overview
Comment:More cleanup of cleanup, removed all use of cleanup-proc
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80-revolution
Files: files | file ages | folders
SHA1: f69329ec9ab9dd7c730e0805eb80e2d1db6bff2d
User & Date: matt on 2023-11-22 04:21:08
Other Links: branch diff | manifest | tags
Context
2023-11-22
15:45
beginnings of new lock-in for main.db check-in: a29849711f user: mrwellan tags: v1.80-revolution
04:21
More cleanup of cleanup, removed all use of cleanup-proc check-in: f69329ec9a user: matt tags: v1.80-revolution
2023-11-21
20:50
removed another mutex but this seems to be worse than the last commit check-in: a1c90b4301 user: matt tags: v1.80-revolution
Changes

Modified db.scm from [a04781df0a] to [d6b6de146b].

2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (dbdat db)
     (let ((res   (cons #f #f))
	   (stmth (db:get-cache-stmth dbdat db "SELECT state,status FROM tests WHERE id=? AND run_id=?;")))
       ;; (db:with-mutex-for-stmth
	;; (lambda()
	  (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test
	   (lambda (state status)
	     (cons state status))
	   ;; db
	   stmth ;;"SELECT state,status FROM tests WHERE id=? AND run_id=?;" ;; stmth try not compiling this one - yes, this fixed the bind issue
	   test-id run-id)))
       res)) ;; ))

;; Use db:test-get* to access
;; Get test data using test_ids. NB// Only works within a single run!!
;;
(define (db:get-test-info-by-ids dbstruct run-id test-ids)
  (db:with-db
   dbstruct







<
<





|
|







2633
2634
2635
2636
2637
2638
2639


2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (dbdat db)
     (let ((res   (cons #f #f))
	   (stmth (db:get-cache-stmth dbdat db "SELECT state,status FROM tests WHERE id=? AND run_id=?;")))


	  (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test
	   (lambda (state status)
	     (cons state status))
	   ;; db
	   stmth ;;"SELECT state,status FROM tests WHERE id=? AND run_id=?;" ;; stmth try not compiling this one - yes, this fixed the bind issue
	   test-id run-id)
       res))))

;; Use db:test-get* to access
;; Get test data using test_ids. NB// Only works within a single run!!
;;
(define (db:get-test-info-by-ids dbstruct run-id test-ids)
  (db:with-db
   dbstruct

Modified tcp-transportmod.scm from [46ac0a4bbf] to [2239dfb672].

510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526

527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
		    (begin
		      (procinf-port-set! *procinf* (tt-port ttdat))
		      (procinf-dbname-set! *procinf* dbfname)
		      (dbfile:with-no-sync-db
		       nosyncdbpath
		       (lambda (nsdb)
			 (dbfile:insert-or-update-process nsdb *procinf*))))
		    (if (< count 5)
			(begin
			  (thread-sleep! 0.5)
			  (loop (+ count 1)))
			(debug:print 0 *default-log-port* "ERROR: (tt-port ttdat) no port set!"))))
	    
	      (thread-join! run-thread) ;; run thread will exit on timeout or other conditions
	      ;; replace with call to (dbfile:set-process-done nsdb host pid reason)
	      (procinf-status-set! *procinf* "done")
	      (procinf-end-set! *procinf* (current-seconds))

	      (dbfile:with-no-sync-db
	       nosyncdbpath
	       (lambda (nsdb)
		 (dbfile:insert-or-update-process nsdb *procinf*)))
              (debug:print 0 *default-log-port* "Exiting now.")
	      (exit)))))))

(define (tt:keep-running ttdat dbfname dbstruct)
  ;; verfiy conn for ready
  ;; listener socket has been started by this stage
  ;; wait for a port before creating the registration file
  ;;
  (let* ((db-locked-in #f)
	 (areapath     (tt-areapath ttdat))
	 (nosyncdbpath (conc areapath"/.mtdb"))
	 (cleanup (lambda ()
		    (if (tt-cleanup-proc ttdat)
			((tt-cleanup-proc ttdat))) ;; removes .servinfo file
		    (dbfile:with-no-sync-db nosyncdbpath
					    (lambda (db)
					      (let* ((dbtmpname (dbr:dbstruct-dbtmpname dbstruct)))
						;; (debug:print-info 0 *default-log-port* "Running clean up, including removing db file "dbtmpname)
						(db:no-sync-del! db dbfname)
                                                ))))))
    (set! *server-info* ttdat)
    (let loop ((count 0))
      (if (> count 240)
	  (begin
	    (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.")
	    (exit 1))
	  (if (not (tt-port ttdat)) ;; no connection yet







|

|







>














|
<
<
<
<
<
<
<
<
<







510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542









543
544
545
546
547
548
549
		    (begin
		      (procinf-port-set! *procinf* (tt-port ttdat))
		      (procinf-dbname-set! *procinf* dbfname)
		      (dbfile:with-no-sync-db
		       nosyncdbpath
		       (lambda (nsdb)
			 (dbfile:insert-or-update-process nsdb *procinf*))))
		    (if (< count 10)
			(begin
			  (thread-sleep! 0.25)
			  (loop (+ count 1)))
			(debug:print 0 *default-log-port* "ERROR: (tt-port ttdat) no port set!"))))
	    
	      (thread-join! run-thread) ;; run thread will exit on timeout or other conditions
	      ;; replace with call to (dbfile:set-process-done nsdb host pid reason)
	      (procinf-status-set! *procinf* "done")
	      (procinf-end-set! *procinf* (current-seconds))
	      ;; either convert this to use set-process-done or get rid of set-process-done
	      (dbfile:with-no-sync-db
	       nosyncdbpath
	       (lambda (nsdb)
		 (dbfile:insert-or-update-process nsdb *procinf*)))
              (debug:print 0 *default-log-port* "Exiting now.")
	      (exit)))))))

(define (tt:keep-running ttdat dbfname dbstruct)
  ;; verfiy conn for ready
  ;; listener socket has been started by this stage
  ;; wait for a port before creating the registration file
  ;;
  (let* ((db-locked-in #f)
	 (areapath     (tt-areapath ttdat))
	 (nosyncdbpath (conc areapath"/.mtdb")))









    (set! *server-info* ttdat)
    (let loop ((count 0))
      (if (> count 240)
	  (begin
	    (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.")
	    (exit 1))
	  (if (not (tt-port ttdat)) ;; no connection yet
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691

692
693
694

695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
			    (else ;; should never get here
			     (debug:print 0 *default-log-port* "BAD SERVER RECORD: "leadsrv)
			     (assert #f "Bad server record "leadsrv))))))))
	(if ok
	    (tt-last-access-set! ttdat *db-last-access*) ;; bit silly, just use db-last-access
	    (begin
	      (debug:print 0 *default-log-port* "Exiting immediately")
	      (cleanup)
	      (exit)))

	(let* ((last-update (dbr:dbstruct-last-update dbstruct))
	       (curr-secs   (current-seconds)))
	  (if (and (eq? (tt-state ttdat) 'running)
		   (> (- curr-secs last-update) 3)) ;; every 3-4 seconds update the db?
	      (let* ((sinfo-file (tt-servinf-file ttdat)))
		;; (debug:print 0 *default-log-port* "INFO: touching "sinfo-file)
		(set! (file-modification-time sinfo-file) (current-seconds))
		((dbr:dbstruct-sync-proc dbstruct) last-update)
		(dbr:dbstruct-last-update-set! dbstruct curr-secs))))
	  
	(if (< (- (current-seconds) (tt-last-access ttdat)) (tt-server-timeout-param))
	    (begin
	      (thread-sleep! 5)
	      (loop)))))
    (cleanup)
    (debug:print 0 *default-log-port* "INFO: Server timed out, exiting from tt:keep-running.")))

  
;; ;; given an already set up uconn start the cmd-loop
;; ;;
;; (define (tt:cmd-loop ttdat)
;;   (let* ((serv-listener (-socket uconn))
;; 	 (listener      (lambda ()
;; 			  (let loop ((state 'start))
;; 			    (let-values (((inp oup)(tcp-accept serv-listener)))
;; 			      ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
;; 			      (let* ((rdat  (deserialize inp)) ;; '(my-host-port qrykey cmd params)
;; 				     (resp  (ulex-handler uconn rdat)))
;; 				(serialize resp oup)
;; 				(close-input-port inp)
;; 				(close-output-port oup)
;; 				;; (mutex-unlock! *send-mutex*) ;; DOESN'T SEEM TO HELP
;; 				)
;; 			      (loop state))))))
;;     ;; start N of them
;;     (let loop ((thnum   0)
;; 	       (threads '()))
;;       (if (< thnum 100)
;; 	  (let* ((th (make-thread listener (conc "listener" thnum))))
;; 	    (thread-start! th)
;; 	    (loop (+ thnum 1)
;; 		  (cons th threads)))
;; 	  (map thread-join! threads)))))
;; 
;; 
;; 
;; (define (wait-and-close uconn)
;;   (thread-join! (udat-cmd-thread uconn))
;;   (tcp-close (udat-socket uconn)))
;; 
;; 

(define (tt:shutdown-server ttdat)
  (let* ((cleanproc (tt-cleanup-proc ttdat))
	 (port      (tt-port         ttdat)))

    (tt-state-set! ttdat 'shutdown)
    (portlogger:open-run-close portlogger:set-port port "released")
    (if cleanproc (cleanproc))

    (tcp-close (tt-socket ttdat)) ;; close up ports here
    ))

;; (define (wait-and-close uconn)
;;   (thread-join! (tt-cmd-thread uconn))
;;   (tcp-close (tt-socket uconn)))

;; return servid
;; side-effects:
;;   ttdat-cleanup-proc is populated with function to remove the serverinfo file
(define (tt:create-server-registration-file ttdat dbfname)
  (let* ((areapath (tt-areapath ttdat))
	 (servdir  (tt:get-servinfo-dir areapath))
	 (host     (tt-host ttdat))
	 (port     (tt-port ttdat))
	 (servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname))
	 (serv-id (tt:mk-signature areapath))
	 (clean-proc (lambda ()
		       (delete-file* servinf)
		       )))
    (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn, dbfname="dbfname)
    (tt-cleanup-proc-set! ttdat clean-proc)
    (tt-servinf-file-set! ttdat servinf)
    (with-output-to-file servinf
      (lambda ()
	(print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)" dbfname: "dbfname)))
      serv-id))

;; find valid server







|
















|


|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<

|
|
>


|
>



<
<
<
<









|
<
<
<

<







619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646


































647
648
649
650
651
652
653
654
655
656
657




658
659
660
661
662
663
664
665
666
667



668

669
670
671
672
673
674
675
			    (else ;; should never get here
			     (debug:print 0 *default-log-port* "BAD SERVER RECORD: "leadsrv)
			     (assert #f "Bad server record "leadsrv))))))))
	(if ok
	    (tt-last-access-set! ttdat *db-last-access*) ;; bit silly, just use db-last-access
	    (begin
	      (debug:print 0 *default-log-port* "Exiting immediately")
	      (tt:shutdown-server ttdat)
	      (exit)))

	(let* ((last-update (dbr:dbstruct-last-update dbstruct))
	       (curr-secs   (current-seconds)))
	  (if (and (eq? (tt-state ttdat) 'running)
		   (> (- curr-secs last-update) 3)) ;; every 3-4 seconds update the db?
	      (let* ((sinfo-file (tt-servinf-file ttdat)))
		;; (debug:print 0 *default-log-port* "INFO: touching "sinfo-file)
		(set! (file-modification-time sinfo-file) (current-seconds))
		((dbr:dbstruct-sync-proc dbstruct) last-update)
		(dbr:dbstruct-last-update-set! dbstruct curr-secs))))
	  
	(if (< (- (current-seconds) (tt-last-access ttdat)) (tt-server-timeout-param))
	    (begin
	      (thread-sleep! 5)
	      (loop)))))
    ;; (cleanup) ;; all done by tt:shutdown-server
    (debug:print 0 *default-log-port* "INFO: Server timed out, exiting from tt:keep-running.")))




































(define (tt:shutdown-server ttdat)
  (let* ((host (tt-host ttdat))
	 (port (tt-port ttdat))
	 (sinf (tt-servinf-file ttdat)))
    (tt-state-set! ttdat 'shutdown)
    (portlogger:open-run-close portlogger:set-port port "released")
    (if (file-exists? sinf)
	(delete-file* sinf))
    (tcp-close (tt-socket ttdat)) ;; close up ports here
    ))





;; return servid
;; side-effects:
;;   ttdat-cleanup-proc is populated with function to remove the serverinfo file
(define (tt:create-server-registration-file ttdat dbfname)
  (let* ((areapath (tt-areapath ttdat))
	 (servdir  (tt:get-servinfo-dir areapath))
	 (host     (tt-host ttdat))
	 (port     (tt-port ttdat))
	 (servinf (conc servdir"/"host":"port"-"(current-process-id)":"dbfname))
	 (serv-id (tt:mk-signature areapath)))



    (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn, dbfname="dbfname)

    (tt-servinf-file-set! ttdat servinf)
    (with-output-to-file servinf
      (lambda ()
	(print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)" dbfname: "dbfname)))
      serv-id))

;; find valid server
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
	 (goodfiles '()))

    ;; filter the files here by looking in processes table (if we are not main.db)
    ;; and or look at the time stamp on the servinfo file, a running server will
    ;; touch the file every minute (again, this will only apply for main.db)
    (for-each (lambda (fname)
		(let* ((age (- (current-seconds)(file-modification-time fname))))
		  (if (> age 200) ;; can't trust it if over twenty seconds old
		      (begin
			(debug:print 0 *default-log-port* "WARNING: removing stale servinfo file "fname)
			(handle-exceptions
			 exn
			 (debug:print 0 *default-log-port* "WARNING: error attempting to remove stale servinfo file "fname)
			 (delete-file fname))) ;; 
		      (set! goodfiles (cons fname goodfiles)))))
	      sfiles)
    goodfiles))







|

|







683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
	 (goodfiles '()))

    ;; filter the files here by looking in processes table (if we are not main.db)
    ;; and or look at the time stamp on the servinfo file, a running server will
    ;; touch the file every minute (again, this will only apply for main.db)
    (for-each (lambda (fname)
		(let* ((age (- (current-seconds)(file-modification-time fname))))
		  (if (> age 200) ;; can't trust it if over 200 seconds old
		      (begin
			(debug:print 0 *default-log-port* "WARNING: removing stale servinfo file "fname", it is "age" seconds old")
			(handle-exceptions
			 exn
			 (debug:print 0 *default-log-port* "WARNING: error attempting to remove stale servinfo file "fname)
			 (delete-file fname))) ;; 
		      (set! goodfiles (cons fname goodfiles)))))
	      sfiles)
    goodfiles))