Megatest

Check-in [9f1a2f71d3]
Login
Overview
Comment:wip
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v2.001
Files: files | file ages | folders
SHA1: 9f1a2f71d3274e2d33ca3f573cb70c928aa3f087
User & Date: matt on 2021-12-19 20:28:12
Other Links: branch diff | manifest | tags
Context
2021-12-20
15:00
wip, closer ... check-in: d45dbac9d7 user: matt tags: v2.001
2021-12-19
20:28
wip check-in: 9f1a2f71d3 user: matt tags: v2.001
2021-12-18
20:09
Improved results from unit tests. WIP check-in: bb1843a1b0 user: matt tags: v2.001
Changes

Modified apimod.scm from [0baabe14dd] to [f6411932bc].

201
202
203
204
205
206
207
208

209
210
211
212
213

214
215
216
217
218
219
220
201
202
203
204
205
206
207

208
209
210
211
212

213
214
215
216
217
218
219
220







-
+




-
+







    ;;===============================================

    ((get-keys-write)                        (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl
    
    ;; SERVERS
    ;; ((start-server)                    (apply server:kind-run params))
    ((kill-server)                       (set! *server-run* #f))
    ((get-server)                        (api:start-server dbstruct params))
    ((start-server get-server)           (api:start-server dbstruct params))
    ((get-server-info)                   (apply db:get-server-info dbstruct params))
    ((register-server)                   (apply db:register-server dbstruct params) );; dbstruct host port servkey pid ipaddr dbpath)
    ((deregister-server)                 (apply db:deregister-server dbstruct params) );; dbstruct host port servkey pid ipaddr dbpath)
    ((get-count-servers)                 (apply db:get-count-servers dbstruct params))

    ((get-servers-info)                  (apply db:get-servers-info dbstruct params))
    ;; TESTS

    ;;((test-set-state-status-by-id)     (apply mt:test-set-state-status-by-id dbstruct params))
    ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items.
    ((test-set-state-status-by-id)

     ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)

Modified commonmod.scm from [3bc09277ff] to [66ca132e41].

273
274
275
276
277
278
279
280
281
282
283
284
285






286
287
288
289
290
291
292
273
274
275
276
277
278
279






280
281
282
283
284
285
286
287
288
289
290
291
292







-
-
-
-
-
-
+
+
+
+
+
+







common:get-normalized-cpu-load-raw
common:unix-ping
launch:is-test-alive
common:get-num-cpus
common:wait-for-normalized-load
common:wait-for-cpuload
tasks:kill-server
server:get-logs-list
server:get-list
server:get-num-alive
server:get-best
server:get-first-best
server:get-rand-best
;; server:get-logs-list
;; server:get-list
;; server:get-num-alive
;; server:get-best
;; server:get-first-best
;; server:get-rand-best
server:record->id
server:get-num-servers
server:logf-get-start-info
get-uname
realpath
common:real-path
common:get-disk-space-used
2823
2824
2825
2826
2827
2828
2829
2830

2831
2832
2833
2834
2835
2836
2837
2838
2839
2840

2841
2842
2843
2844
2845
2846
2847
2823
2824
2825
2826
2827
2828
2829

2830
2831
2832
2833
2834
2835
2836
2837
2838
2839

2840
2841
2842
2843
2844
2845
2846
2847







-
+









-
+







      (thread-sleep! 0.5)
      (if (file-exists? gzfile) (delete-file gzfile))
      (system (conc "gzip " logfile))
      
      (unset-environment-variable! "TARGETHOST_LOGF")
      (unset-environment-variable! "TARGETHOST"))))

(define (server:get-logs-list area-path)
#;(define (server:get-logs-list area-path)
  (let* (;; (server-logs-cmd  (conc "grep -iL exiting " area-path "/logs/server-*-*.log"))
	 ;; (server-logs   (string-split (string-chomp (with-input-from-pipe server-logs-cmd read-string))))
	 (server-logs      (glob (conc area-path"/logs/server-*-*.log")))
	 )
    server-logs))
  
;; get a list of servers with all relevant data
;; ( mod-time host port start-time pid )
;;
(define (server:get-list areapath #!key (limit #f))
#;(define (server:get-list areapath #!key (limit #f))
  (let ((fname-rx    (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
	(day-seconds (* 24 60 60)))
    ;; if the directory exists continue to get the list
    ;; otherwise attempt to create the logs dir and then
    ;; continue
    (if (if (directory-exists? (conc areapath "/logs"))
	    '()
2885
2886
2887
2888
2889
2890
2891
2892

2893
2894
2895
2896
2897
2898
2899
2885
2886
2887
2888
2889
2890
2891

2892
2893
2894
2895
2896
2897
2898
2899







-
+







		  (if (null? tal)
		      (if (and limit
			       (> (length new-res) limit))
			  new-res ;; (take new-res limit)  <= need intelligent sorting before this will work
			  new-res)
		      (loop (string-chomp (car tal)) (cdr tal) new-res)))))))))

(define (server:get-num-alive srvlst)
#;(define (server:get-num-alive srvlst)
  (let ((num-alive 0))
    (for-each
     (lambda (server)
        (handle-exceptions
          exn
         (begin 
          (debug:print-info 0 *default-log-port*  "Unable to get server start-time and/or mod-time from " server ", exn=" exn))
2912
2913
2914
2915
2916
2917
2918
2919

2920
2921
2922
2923
2924
2925
2926
2912
2913
2914
2915
2916
2917
2918

2919
2920
2921
2922
2923
2924
2925
2926







-
+







;; active (i.e. mod-time < 10 seconds
;;
;; mod-time host port start-time pid
;;
;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off
;; and servers should stick around for about two hours or so.
;;
(define (server:get-best srvlst)
#;(define (server:get-best srvlst)
  (let* ((nums (server:get-num-servers))
	 (now  (current-seconds))
	 (slst (sort
		(filter (lambda (rec)
			  (if (and (list? rec)
				   (> (length rec) 2))
			      (let ((start-time (list-ref rec 3))
2940
2941
2942
2943
2944
2945
2946
2947

2948
2949
2950
2951
2952
2953
2954

2955
2956
2957
2958
2959
2960
2961
2940
2941
2942
2943
2944
2945
2946

2947
2948
2949
2950
2951
2952
2953

2954
2955
2956
2957
2958
2959
2960
2961







-
+






-
+







		(lambda (a b)
		  (< (list-ref a 3)
		     (list-ref b 3))))))
    (if (> (length slst) nums)
	(take slst nums)
	slst)))

(define (server:get-first-best areapath)
#;(define (server:get-first-best areapath)
  (let ((srvrs (server:get-best (server:get-list areapath))))
    (if (and srvrs
	     (not (null? srvrs)))
	(car srvrs)
	#f)))

(define (server:get-rand-best areapath)
#;(define (server:get-rand-best areapath)
  (let ((srvrs (server:get-best (server:get-list areapath))))
    (if (and (list? srvrs)
	     (not (null? srvrs)))
	(let* ((len (length srvrs))
	       (idx (pseudo-random-integer len)))
	  (list-ref srvrs idx))
	#f)))

Modified dashboard.scm from [955f1d46eb] to [feba132a9c].

238
239
240
241
242
243
244

245
246
247
248
249
250
251
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252







+







;; RA => Might require revert for filters 
;; create a watch dog to move changes from lt/.db/*.db to megatest.db
;;
;;;(if (file-write-access? (conc *toppath* "/megatest.db"))
;;(debug:print-info 13 *default-log-port* "Before common:watchdog spawn")

(thread-start! (make-thread common:watchdog "Watchdog thread"))

;;(debug:print-info 13 *default-log-port* "After common:watchdog spawn")
;; (if (not (args:get-arg "-use-db-cache"))
;;     (begin
;;       (debug:print-info 0 *default-log-port* "Forcing db-cache mode due to read-only access to megatest.db")
;;       (hash-table-set! args:arg-hash "-use-db-cache" #t)));;;)
;;)

3667
3668
3669
3670
3671
3672
3673
3674

3675
3676
3677
3678
3679
3680
3681
3668
3669
3670
3671
3672
3673
3674

3675
3676
3677
3678
3679
3680
3681
3682







-
+







   "dashboard:runs-tab-updater"))

;;======================================================================
;; The heavy lifting starts here
;;======================================================================

(define (dashboard-main)
  (let ((mtdb-path (conc *toppath* "/megatest.db"))) ;; 
  (let ((mtdb-path (conc *toppath* "/megatest.db"))) ;; not used for now, update for .db area and use for write access detection
    #;(if (and (common:file-exists? mtdb-path)
	     (file-writable? mtdb-path))
	(if (not (args:get-arg "-skip-version-check"))
            (common:exit-on-version-changed)))
    (let* ((commondat       (dboard:commondat-make)))
      ;; Move this stuff to db.scm? I'm not sure that is the right thing to do...
      (cond 

Modified dbmod.scm from [ddeeddaa42] to [43fb4b6c81].

225
226
227
228
229
230
231

232
233
234
235
236
237
238
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239







+







db:hoh-set!
db:hoh-get
db:get-cache-stmth
db:register-server
db:deregister-server
db:get-server-info
db:get-count-servers
db:get-servers-info
db:get-steps-info-by-id

make-dbr:dbdat
dbr:dbdat-db
dbr:dbdat-inmem
dbr:dbdat-last-sync
dbr:dbdat-last-write
5887
5888
5889
5890
5891
5892
5893
5894












5895


5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907

5908
5909








+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
     (sqlite3:fold-row
      (lambda (res count)
	(max res count))
      0
      db
      "SELECT count(*) FROM servers WHERE apath=?;"
      apath))))

(define (db:get-servers-info dbstruct apath)
  (db:with-db
   dbstruct
   #f #f
   (lambda (db)
     (sqlite3:fold-row
      (lambda (res count)
	(max res count))
      0
      db
      "SELECT * FROM servers WHERE apath=?;"
      apath))))
)

)

Modified dcommon.scm from [dedc418b9b] to [cfabfe1da5].

903
904
905
906
907
908
909
910

911
912
913
914
915
916
917
903
904
905
906
907
908
909

910
911
912
913
914
915
916
917







-
+







				     #:numcol 7
				     #:numcol-visible 7
				     #:numlin-visible 5
				     ))
	 (colnames       (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId"))
	 (updater        (lambda ()
			   (if (dashboard:monitor-changed? commondat tabdat)
			       (let ((servers  (server:get-list *toppath* limit: 10)))
			       (let ((servers  (rmt:get-servers-info *remotedat* *toppath*)#;(server:get-list *toppath* limit: 10)))
				 (iup:attribute-set! servers-matrix "NUMLIN" (length servers))
				 ;; (set! colnum 0)
				 ;; (for-each (lambda (colname)
				 ;;    	 ;; (print "colnum: " colnum " colname: " colname)
				 ;;    	 (iup:attribute-set! servers-matrix (conc "0:" colnum) colname)
				 ;;    	 (set! colnum (+ 1 colnum)))
				 ;;           colnames)

Modified megatest.scm from [ca9d861939] to [6434fb218b].

1133
1134
1135
1136
1137
1138
1139
1140

1141









1142
1143
1144
1145
1146
1147
1148
1133
1134
1135
1136
1137
1138
1139

1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157







-
+

+
+
+
+
+
+
+
+
+







           (adjutant-run)
           (set! *didsomething* #t)))
     
     (if (or (args:get-arg "-list-servers")
             (args:get-arg "-kill-servers"))
         (let ((tl (launch:setup)))
           (if tl ;; all roads from here exit
     	  (let* ((servers (server:get-list *toppath*))
     	  (let* ((servers (rmt:get-servers-info *remotedat* *toppath*))
     		 (fmtstr  "~8a~22a~20a~20a~8a\n"))
	    ;; id INTEGER PRIMARY KEY,
	    ;; host TEXT,
	    ;; port INTEGER,
	    ;; servkey TEXT,
	    ;; pid TEXT,
	    ;; ipaddr TEXT,
	    ;; apath TEXT,
	    ;; dbname TEXT,
	    ;; event_time 
     	    (format #t fmtstr "pid" "Interface:port" "age (hms)" "Last mod" "State")
     	    (format #t fmtstr "===" "==============" "=========" "========" "=====")
     	    (for-each ;;  ( mod-time host port start-time pid )
     	     (lambda (server)
     	       (let* ((mtm (any->number (car server)))
     		      (mod (if mtm (- (current-seconds) mtm) "unk"))
     		      (age (- (current-seconds)(or (any->number (list-ref server 3)) (current-seconds))))

Modified rmtmod.scm from [7039de8d85] to [310ad66fe4].

430
431
432
433
434
435
436
437

438
439
440

441
442
443
444
445
446
447
430
431
432
433
434
435
436

437
438
439

440
441
442
443
444
445
446
447







-
+


-
+







;;======================================================================

;;======================================================================
;;  S E R V E R
;;======================================================================

(define (rmt:kill-server run-id)
  (rmt:send-receive 'kill-server run-id (list run-id)))
  (rmt:send-receive 'kill-server #f (list run-id)))

(define (rmt:start-server run-id)
  (rmt:send-receive 'start-server 0 (list run-id)))
  (rmt:send-receive 'start-server #f (list run-id)))

(define (rmt:get-server-info apath dbname)
  (rmt:send-receive 'get-server-info #f (list apath dbname)))

;;======================================================================
;;  M I S C
;;======================================================================
2132
2133
2134
2135
2136
2137
2138
2139
2140




2141
2142
2143
2144
2145
2146
2147
2132
2133
2134
2135
2136
2137
2138


2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149







-
-
+
+
+
+







					    ,dbname)))

(define (rmt:get-count-servers remdat apath)
  (remotedat-conns remdat) ;; just checking types
  (rmt:open-main-connection remdat apath) ;; we need a channel to main.db
  (rmt:send-receive-real remdat apath      ;; params: host port servkey pid ipaddr dbpath
			 (db:run-id->dbname #f)
			 'get-count-servers `(,apath
					      )))
			 'get-count-servers `(,apath)))

(define (rmt:get-servers-info apath)
  (rmt:send-receive 'get-servers-info #f `(,apath)))

(define (rmt:deregister-server remdat apath iface port server-key dbname)
  (remotedat-conns remdat) ;; just checking types
  (rmt:open-main-connection remdat apath) ;; we need a channel to main.db
  (rmt:send-receive-real remdat apath      ;; params: host port servkey pid ipaddr dbpath
                         (db:run-id->dbname #f)
                         'deregister-server `(,iface
2202
2203
2204
2205
2206
2207
2208
2209










2210
2211
2212
2213
2214
2215
2216
2204
2205
2206
2207
2208
2209
2210

2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227







-
+
+
+
+
+
+
+
+
+
+








  (let* ((remdat            *remotedat*)
	 (server-start-time (current-seconds))
	 (pkts-dir          (get-pkts-dir))
	 (server-key        (rmt:get-signature)) ;; This servers key
	 (is-main           (equal? (args:get-arg "-db") ".db/main.db"))
	 (last-access       0)
	 (server-timeout    (server:expiration-timeout)))
	 (server-timeout    (server:expiration-timeout))
	 (shutdown-server-sequence (lambda (port)
				     (set! *unclean-shutdown* #f)
				     (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
				     (rmt:server-shutdown)
				     (portlogger:open-run-close portlogger:set-port port "released")
				     (exit)))
	 (timed-out?        (lambda ()
			      (<= (+ last-access server-timeout)
				 (current-seconds)))))
    ;; main and run db servers have both got wait logic (could/should merge it)
    (if is-main
	(rmt:wait-for-server pkts-dir dbname server-key)
	(rmt:wait-for-stable-interface))
    ;; this is our forever loop
    (let* ((iface             (servdat-host *server-info*))
	   (port              (servdat-port *server-info*)))
2288
2289
2290
2291
2292
2293
2294






2295
2296

2297
2298

2299
2300
2301
2302
2303
2304
2305
2306
2307
2308

2309
2310
2311
2312
2313
2314
2315
2316
2317
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312

2313


2314
2315
2316
2317
2318
2319
2320
2321
2322


2323


2324
2325
2326
2327
2328
2329
2330







+
+
+
+
+
+

-
+
-
-
+








-
-
+
-
-







	
	(if (common:low-noise-print 60 "dbstats")
	    (begin
	      (debug:print 0 *default-log-port* "Server stats:")
	      (db:print-current-query-stats)))
	(let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))
	  (cond
	   ((not *server-run*)
	    (debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.")
	    (shutdown-server-sequence port))
	   ((timed-out?)
	    (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
	    (shutdown-server-sequence port))
	   ((and *server-run*
		 (> (+ last-access server-timeout)
		 (not (timed-out?))
		    (current-seconds))
		 (if is-main
		 #;(if is-main ;; intention here was to exit main server quickly. 
		     (> (rmt:get-count-servers remdat *toppath*) 1)
		     #t))
	    (if (common:low-noise-print 120 "server continuing")
		(debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)))
	    (loop 0 bad-sync-count (current-milliseconds)))
	   (else
	    (set! *unclean-shutdown* #f)
	    (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
	    (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
	    (rmt:server-shutdown)
	    (shutdown-server-sequence port)
	    (portlogger:open-run-close portlogger:set-port port "released")
	    (exit)
	    #;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: "
			      (open-send-receive-nn (conc iface":"port)      ;; do this here and not in server-shutdown
						    (sexpr->string 'quit)))
	    )))))))

;; Call this to start the actual server
;;

Modified tests/unittests/server.scm from [b0131aa814] to [40ab640762].

65
66
67
68
69
70
71
72

73
74
75
76
77
78
79
80
81
82
83


84

85
65
66
67
68
69
70
71

72

73
74
75
76
77
78
79
80
81
82
83
84

85
86







-
+
-










+
+
-
+


(thread-sleep! 2)
(test #f #t (rmt:general-open-connection *remotedat* *toppath* ".db/2.db"))


(test #f '("SYSTEM" "RELEASE") (rmt:get-keys))
(test #f 1 (rmt:send-receive 'register-run #f (list keyvals "run1" "new" "n/a" "justme" #f)))
(print "Got here.")
;; (print "Got here.")
(exit)

(test #f 1 (rmt:send-receive 'register-run 1 (list keyvals "run1" "new" "n/a" "justme" #f)))

(test #f 2 (rmt:register-run keyvals "run2" "new" "n/a" "justme" #f))
;; (test #f 2 (rmt:deregister-server *remotedat* *toppath* iface port server-key dbname

(test #f 2 (rmt:get-count-servers *remotedat* *toppath*))

(test #f "run2" (rmt:get-run-name-from-id 2))

(test #f #t (list? (rmt:get-servers-info *toppath*)))

;; (exit)
(exit)