Megatest

Check-in [24be385550]
Login
Overview
Comment:More working
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v2.001
Files: files | file ages | folders
SHA1: 24be3855502b43e585340409de9b4fca89c0ff92
User & Date: matt on 2021-12-20 18:34:41
Other Links: branch diff | manifest | tags
Context
2021-12-20
18:48
wip check-in: a9fa8512c8 user: matt tags: v2.001
18:34
More working check-in: 24be385550 user: matt tags: v2.001
16:26
Basic listing of servers working check-in: 4e80b340f3 user: matt tags: v2.001
Changes

Modified TODO from [426bdbbbe8] to [1f8790aebc].

14
15
16
17
18
19
20


21



22
23
24
25
26
27
28
29
# 
#     You should have received a copy of the GNU General Public License
#     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

TODO
====



NextSteps



. Remove servermod.scm

WW15
. fill newview matrix with data, filter pipeline gui elements
. improve [script], especially indent handling

WW16
. split db into megatest.db (runs etc.) db/<something>.db







>
>
|
>
>
>
|







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# 
#     You should have received a copy of the GNU General Public License
#     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

TODO
====

Loose ends
----------

. -list-servers not correct
. move *remotedat* into bigdata
. add back server stats on exit (look in rmt:run in rmtmod.scm)


WW15
. fill newview matrix with data, filter pipeline gui elements
. improve [script], especially indent handling

WW16
. split db into megatest.db (runs etc.) db/<something>.db

Modified dbmod.scm from [fef313a73a] to [1af1007e9d].

5861
5862
5863
5864
5865
5866
5867
5868

5869
5870
5871
5872
5873
5874
5875
	      (begin
		(debug:print-info 0 *default-log-port* "Server already removed for "apath", "dbname) ;; at "sinfo ", while trying to register server " host":"port)
		#f) ;; server already deregistered
	      (begin
		(sqlite3:execute db "DELETE FROM servers WHERE apath=? AND dbname=?;" ;; (host,port,servkey,pid,ipaddr,apath,dbname) VALUES (?,?,?,?,?,?,?);"
				 ;; host port servkey pid ipaddr
				 apath dbname)
		#;(db:get-server-info dbstruct apath dbname)))))))))


(define (db:get-server-info dbstruct apath dbname)
  (db:with-db
   dbstruct
   #f #f
   (lambda (db)
     (sqlite3:fold-row







|
>







5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
	      (begin
		(debug:print-info 0 *default-log-port* "Server already removed for "apath", "dbname) ;; at "sinfo ", while trying to register server " host":"port)
		#f) ;; server already deregistered
	      (begin
		(sqlite3:execute db "DELETE FROM servers WHERE apath=? AND dbname=?;" ;; (host,port,servkey,pid,ipaddr,apath,dbname) VALUES (?,?,?,?,?,?,?);"
				 ;; host port servkey pid ipaddr
				 apath dbname)
		#;(db:get-server-info dbstruct apath dbname)
		'done))))))))

(define (db:get-server-info dbstruct apath dbname)
  (db:with-db
   dbstruct
   #f #f
   (lambda (db)
     (sqlite3:fold-row

Modified megatest.scm from [b3a97379d1] to [89bdcd6c8f].

1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165

1166
1167
1168

1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
	    ;; port INTEGER,
	    ;; servkey TEXT,
	    ;; pid TEXT,
	    ;; ipaddr TEXT,
	    ;; apath TEXT,
	    ;; dbname TEXT,
	    ;; event_time 
     	    (format #t fmtstr "pid" "Interface:port" "age (hms)" "Last mod" "State")
     	    (format #t fmtstr "===" "==============" "=========" "========" "=====")
     	    (for-each ;;  ( mod-time host port start-time pid )
     	     (lambda (server)
     	       (let* ((mtm (any->number (car server)))
     		      (mod (if mtm (- (current-seconds) mtm) "unk"))
     		      (age (- (current-seconds)(or (any->number (list-ref server 3)) (current-seconds))))
     		      (url (conc (cadr server) ":" (caddr server)))
     		      (pid (list-ref server 4))
     		      (alv (if (number? mod)(< mod 10) #f)))
     		 (format #t
     			 fmtstr
     			 pid
     			 url
     			 (seconds->hr-min-sec age)

     			 (seconds->hr-min-sec mod)
     			 (if alv "alive" "dead"))
     		 (if (and alv

     			  (args:get-arg "-kill-servers"))
     		     (begin
     		       (debug:print-info 0 *default-log-port* "Attempting to kill server with pid " pid " !!needs completion!!")
     		       #;(server:kill server)))))
     	     (sort servers (lambda (a b)
     			     (let ((ma (or (any->number (car a)) 9e9))
     				   (mb (or (any->number (car b)) 9e9)))
     			       (> ma mb)))))
     	    ;; (debug:print-info 1 *default-log-port* "Done with listservers")
     	    (set! *didsomething* #t)
     	    (exit))
     	  (exit))))
           ;; must do, would have to add checks to many/all calls below
     
     ;;======================================================================







|
|


|
|
<
<
<
<
|
|
|
<
|
>
|
<
|
>
|



|
<
<
<







1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156




1157
1158
1159

1160
1161
1162

1163
1164
1165
1166
1167
1168
1169



1170
1171
1172
1173
1174
1175
1176
	    ;; port INTEGER,
	    ;; servkey TEXT,
	    ;; pid TEXT,
	    ;; ipaddr TEXT,
	    ;; apath TEXT,
	    ;; dbname TEXT,
	    ;; event_time 
     	    (format #t fmtstr "pid" "Interface:port" "State" "dbname" "apath")
     	    (format #t fmtstr "===" "==============" "=====" "======" "=====")
     	    (for-each ;;  ( mod-time host port start-time pid )
     	     (lambda (server)
	       (match-let
		(((id host port servkey pid ipaddr apath dbname event_time) server))




     		(format #t
     			fmtstr
     			pid

     			(conc host":"port)
     			(if (server-ready? host port servkey) "Running" "Dead")
     			dbname ;; (seconds->hr-min-sec mod)

     			apath
			)
     		 (if (args:get-arg "-kill-servers")
     		     (begin
     		       (debug:print-info 0 *default-log-port* "Attempting to kill server with pid " pid " !!needs completion!!")
     		       #;(server:kill server)))))
     	     servers)



     	    ;; (debug:print-info 1 *default-log-port* "Done with listservers")
     	    (set! *didsomething* #t)
     	    (exit))
     	  (exit))))
           ;; must do, would have to add checks to many/all calls below
     
     ;;======================================================================

Modified rmtmod.scm from [1621ebeda5] to [3f5d189366].

1525
1526
1527
1528
1529
1530
1531
1532

1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
	  (if (sqlite3:database? db)
	      (sqlite3:finalize! db)
	      (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing..."))
	  (if (sqlite3:database? inmem)
	      (sqlite3:finalize! inmem)
	      (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing..."))
	  (debug:print-info 0 *default-log-port* "Finalizing db and inmem complete")
	  (if am-server

	      (if (string-match ".*/main.db$" dbfile)
		  (let ((pkt-file (conc (get-pkts-dir *toppath*)
					"/" (servdat-uuid *server-info*)
					".pkt")))
		    (debug:print-info 0 *default-log-port* "removing pkt "pkt-file)
		    (delete-file* pkt-file)
		    (debug:print-info 0 *default-log-port* "Releasing lock for "dbfile)
		    (db:with-lock-db (servdat-dbfile *server-info*)
				     (lambda (dbh dbfile)
				       (db:release-lock dbh dbfile))))
		  (let* ((sdat *server-info*) ;; we have a run-id server
			 (host (servdat-host sdat))
			 (port (servdat-port sdat))
			 (uuid (servdat-uuid sdat)))
		    (if (not (string-match ".db/main.db" (args:get-arg "-db")))
			(let* ((res (rmt:deregister-server remdat
							   *toppath*
							   (servdat-host *server-info*)   ;; iface
							   (servdat-port *server-info*)
							   (servdat-uuid *server-info*)
							   dbfile ;; (current-process-id)
							   )))
			  (debug:print-info 0 *default-log-port* "deregistered-server, res="res)))
		    
		    (debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid)
		    )))))))

(define (std-exit-procedure)
  ;;(common:telemetry-log-close)
  (on-exit (lambda () 0))
  ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)







|
>













|
<
|
<
<
<
<
<
<
|
<







1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547

1548






1549

1550
1551
1552
1553
1554
1555
1556
	  (if (sqlite3:database? db)
	      (sqlite3:finalize! db)
	      (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing..."))
	  (if (sqlite3:database? inmem)
	      (sqlite3:finalize! inmem)
	      (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing..."))
	  (debug:print-info 0 *default-log-port* "Finalizing db and inmem complete")
	  (if (not am-server)
	      (debug:print-info 0 *default-log-port* "I am not a server, should NOT get here!")
	      (if (string-match ".*/main.db$" dbfile)
		  (let ((pkt-file (conc (get-pkts-dir *toppath*)
					"/" (servdat-uuid *server-info*)
					".pkt")))
		    (debug:print-info 0 *default-log-port* "removing pkt "pkt-file)
		    (delete-file* pkt-file)
		    (debug:print-info 0 *default-log-port* "Releasing lock for "dbfile)
		    (db:with-lock-db (servdat-dbfile *server-info*)
				     (lambda (dbh dbfile)
				       (db:release-lock dbh dbfile))))
		  (let* ((sdat *server-info*) ;; we have a run-id server
			 (host (servdat-host sdat))
			 (port (servdat-port sdat))
			 (uuid (servdat-uuid sdat))

			 (res  (rmt:deregister-server remdat *toppath* host port uuid dbfile)))






		    (debug:print-info 0 *default-log-port* "deregistered-server, res="res)

		    (debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid)
		    )))))))

(define (std-exit-procedure)
  ;;(common:telemetry-log-close)
  (on-exit (lambda () 0))
  ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
	 (hostname        (get-host-name))
	 (ipaddrstr       (let ((ipstr (if (string=? "-" hostn)
					   ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
					   (server:get-best-guess-address hostname)
					   #f)))
			    (if ipstr ipstr hostn))) ;; hostname))) 
	 (port            (portlogger:open-run-close portlogger:find-port))
	 (link-tree-path  (common:get-linktree))
	 ;; (tmp-area        (common:get-db-tmp-area))
	 #;(start-file      (conc tmp-area "/.server-start")))
    (debug:print-info 0 *default-log-port* "portlogger recommended port: " port)
    (if *server-info*
	(begin
	  (servdat-host-set! *server-info* ipaddrstr)
	  (servdat-port-set! *server-info* port)







|







1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
	 (hostname        (get-host-name))
	 (ipaddrstr       (let ((ipstr (if (string=? "-" hostn)
					   ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
					   (server:get-best-guess-address hostname)
					   #f)))
			    (if ipstr ipstr hostn))) ;; hostname))) 
	 (port            (portlogger:open-run-close portlogger:find-port))
	 ;; (link-tree-path  (common:get-linktree))
	 ;; (tmp-area        (common:get-db-tmp-area))
	 #;(start-file      (conc tmp-area "/.server-start")))
    (debug:print-info 0 *default-log-port* "portlogger recommended port: " port)
    (if *server-info*
	(begin
	  (servdat-host-set! *server-info* ipaddrstr)
	  (servdat-port-set! *server-info* port)
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
	(servdat-trynum-set! *server-info*
			     (+ (servdat-trynum *server-info*) 1)))
      (set! *server-info* (make-servdat host: ipaddrstr port: portnum)))
  (debug:print-info 0 *default-log-port* "rmt:try-start-server time="
		    (seconds->time-string (current-seconds))
		    " ipaddrsstr=" ipaddrstr
		    " portnum=" portnum)
;;(if (is-port-in-use portnum)
;;    (begin
;;	(portlogger:open-run-close portlogger:set-failed portnum)
;;	(debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port")
;;	;; (thread-sleep! 0.1)
;;	(rmt:try-start-server ipaddrstr
;;			      (portlogger:open-run-close
;;			       portlogger:find-port)))
      (begin
	(if (not *server-info*)
	    (set! *server-info* (make-servdat
				 host: ipaddrstr
				 port: portnum)))
	(servdat-status-set! *server-info* 'starting)
	(servdat-port-set!   *server-info* portnum)
	(if (not (servdat-rep *server-info*))
	    (let ((rep  (make-rep-socket)))
	      (servdat-rep-set!    *server-info* rep)
	      (socket-set! rep 'nng/recvtimeo 2000)))
	(let* ((rep (servdat-rep *server-info*)))
	  (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum)
	  (handle-exceptions
	   exn
	   (begin
	     (print-error-message exn)
	     (if (< portnum 64000)
		 (begin 
		   (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...")
		   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
		   (debug:print 5 *default-log-port* "exn=" (condition->list exn))
		   (portlogger:open-run-close portlogger:set-failed portnum)
		   (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port")
		   ;; (thread-sleep! 0.1)
		   (rmt:try-start-server ipaddrstr
					 (portlogger:open-run-close portlogger:find-port)))
		 (begin
		   (print "ERROR: Tried and tried but could not start the server, stopping at port "portnum))))
	   (nng-listen rep (conc "tcp://*:" portnum))
	   rep)))) ;;)

;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================

;;======================================================================
;; C L I E N T S







<
<
<
<
<
<
<
<
<
|
<
<
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







1709
1710
1711
1712
1713
1714
1715









1716



1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
	(servdat-trynum-set! *server-info*
			     (+ (servdat-trynum *server-info*) 1)))
      (set! *server-info* (make-servdat host: ipaddrstr port: portnum)))
  (debug:print-info 0 *default-log-port* "rmt:try-start-server time="
		    (seconds->time-string (current-seconds))
		    " ipaddrsstr=" ipaddrstr
		    " portnum=" portnum)









  (assert (servdat? *server-info*) "FATAL: Must always have *server-info* properly set up by here.")



  (servdat-status-set! *server-info* 'starting)
  (servdat-port-set!   *server-info* portnum)
  (if (not (servdat-rep *server-info*))
      (let ((rep  (make-rep-socket)))
	(servdat-rep-set!    *server-info* rep)
	(socket-set! rep 'nng/recvtimeo 2000)))
  (let* ((rep (servdat-rep *server-info*)))
    (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum)
    (handle-exceptions
     exn
     (begin
       (print-error-message exn)
       (if (< portnum 64000)
	   (begin 
	     (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...")
	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 5 *default-log-port* "exn=" (condition->list exn))
	     (portlogger:open-run-close portlogger:set-failed portnum)
	     (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port")
	     ;; (thread-sleep! 0.1)
	     (rmt:try-start-server ipaddrstr
				   (portlogger:open-run-close portlogger:find-port)))
	   (begin
	     (print "ERROR: Tried and tried but could not start the server, stopping at port "portnum))))
     (nng-listen rep (conc "tcp://*:" portnum))
     rep)))

;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================

;;======================================================================
;; C L I E N T S
2213
2214
2215
2216
2217
2218
2219
2220

2221
2222
2223
2224
2225
2226
2227
				     (set! *unclean-shutdown* #f)
				     (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
				     (rmt:server-shutdown)
				     (portlogger:open-run-close portlogger:set-port port "released")
				     (exit)))
	 (timed-out?        (lambda ()
			      (<= (+ last-access server-timeout)
				 (current-seconds)))))

    ;; main and run db servers have both got wait logic (could/should merge it)
    (if is-main
	(rmt:wait-for-server pkts-dir dbname server-key)
	(rmt:wait-for-stable-interface))
    ;; this is our forever loop
    (let* ((iface             (servdat-host *server-info*))
	   (port              (servdat-port *server-info*)))







|
>







2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
				     (set! *unclean-shutdown* #f)
				     (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
				     (rmt:server-shutdown)
				     (portlogger:open-run-close portlogger:set-port port "released")
				     (exit)))
	 (timed-out?        (lambda ()
			      (<= (+ last-access server-timeout)
				  (current-seconds)))))
    (servdat-dbfile-set! *server-info* (args:get-arg "-db"))
    ;; main and run db servers have both got wait logic (could/should merge it)
    (if is-main
	(rmt:wait-for-server pkts-dir dbname server-key)
	(rmt:wait-for-stable-interface))
    ;; this is our forever loop
    (let* ((iface             (servdat-host *server-info*))
	   (port              (servdat-port *server-info*)))