Megatest

Check-in [808adeca23]
Login
Overview
Comment:wip-still-gazillions-of-open-files
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | v1.6569-multi-db-wip
Files: files | file ages | folders
SHA1: 808adeca2352119c9dfe1f3b917308f092750ae0
User & Date: matt on 2021-02-15 20:08:39
Other Links: branch diff | manifest | tags
Context
2021-02-15
20:08
wip-still-gazillions-of-open-files Leaf check-in: 808adeca23 user: matt tags: v1.6569-multi-db-wip (unpublished)
19:12
still chipping away check-in: 59823ee440 user: matt tags: v1.6569-multi-db-wip (unpublished)
Changes

Modified client.scm from [340e19da02] to [d550770e12].

31
32
33
34
35
36
37





38
39
40
41
42
43
44
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.

(declare (uses commonmod))
(import commonmod)

(declare (uses dbmod))
(import dbmod)






(include "common_records.scm")
(include "db_records.scm")

;; client:get-signature
(define (client:get-signature)
  (if *my-client-signature* *my-client-signature*







>
>
>
>
>







31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.

(declare (uses commonmod))
(import commonmod)

(declare (uses dbmod))
(import dbmod)

(declare (uses rmt))

(declare (uses servermod))
(import servermod)

(include "common_records.scm")
(include "db_records.scm")

;; client:get-signature
(define (client:get-signature)
  (if *my-client-signature* *my-client-signature*
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
		  (port  (caddr server-dat))
                  (server-id (caddr (cddr server-dat))))
	      (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
	      (if (and (not area-dat)
		       (not *runremote*))
                  (begin
		    ;; POSSIBLE BUG. I removed the full initialization call. mrw
		    (set! *runremote* (make-remote)) ;; (create-remote-record))
                    (let* ((server-info (remote-server-info *runremote*))) 
                      (if server-info
                        (begin
                          (remote-server-url-set! *runremote* (server:record->url server-info))
                          (remote-server-id-set! *runremote* (server:record->id server-info)))))))
	      (if (and host port server-id)
		  (let* ((start-res (case *transport-type*
				      ((http)(http-transport:client-connect host port server-id))))
			 (ping-res  (case *transport-type* 
				      ((http)(rmt:login-no-auto-client-setup start-res)))))
		    (if (and start-res
			     ping-res)
			(let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago
			  (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
			  (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
			  start-res)
			(begin    ;; login failed but have a server record, clean out the record and try again







|






<
|
<
|







104
105
106
107
108
109
110
111
112
113
114
115
116
117

118

119
120
121
122
123
124
125
126
		  (port  (caddr server-dat))
                  (server-id (caddr (cddr server-dat))))
	      (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
	      (if (and (not area-dat)
		       (not *runremote*))
                  (begin
		    ;; POSSIBLE BUG. I removed the full initialization call. mrw
		    (set! *runremote* (create-remote-record))
                    (let* ((server-info (remote-server-info *runremote*))) 
                      (if server-info
                        (begin
                          (remote-server-url-set! *runremote* (server:record->url server-info))
                          (remote-server-id-set! *runremote* (server:record->id server-info)))))))
	      (if (and host port server-id)

		  (let* ((start-res (http-transport:client-connect host port server-id))

			 (ping-res  (rmt:login-no-auto-client-setup start-res)))
		    (if (and start-res
			     ping-res)
			(let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago
			  (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
			  (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
			  start-res)
			(begin    ;; login failed but have a server record, clean out the record and try again

Modified http-transport.scm from [25123e9128] to [1ced6f64e6].

41
42
43
44
45
46
47



48
49
50
51
52
53
54
(import commonmod)

(declare (uses configfmod))
(import configfmod)

(declare (uses dbmod))
(import dbmod)




(include "common_records.scm")
(include "db_records.scm")
(include "js-path.scm")

(require-library stml)
(define (http-transport:make-server-url hostport)







>
>
>







41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
(import commonmod)

(declare (uses configfmod))
(import configfmod)

(declare (uses dbmod))
(import dbmod)

(declare (uses servermod))
(import servermod)

(include "common_records.scm")
(include "db_records.scm")
(include "js-path.scm")

(require-library stml)
(define (http-transport:make-server-url hostport)

Modified rmt.scm from [ee608697e4] to [0777bafc6d].

72
73
74
75
76
77
78




79
80
81
82
83
84
85
86
87
88
89
90
	      cinfo
	      (if (server:check-if-running areapath)
		  (client:setup areapath)
		  #f))))

(define (create-remote-record)
  (let ((rr (make-remote)))




    (remote-hh-dat-set!         rr (common:get-homehost)) ;
    (remote-server-info-set!    rr (if *toppath* (server:check-if-running *toppath*) #f))
    (remote-transport-set!      rr *transport-type*)
    (remote-server-timeout-set! rr (server:expiration-timeout))
    rr))
  
;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected

  #;(common:telemetry-log (conc "rmt:"(->string cmd))
                        payload: `((rid . ,rid)







>
>
>
>
|
|
|
|
|







72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
	      cinfo
	      (if (server:check-if-running areapath)
		  (client:setup areapath)
		  #f))))

(define (create-remote-record)
  (let ((rr (make-remote)))
    (rmt:init-remote rr)
    rr))

(define (rmt:init-remote rr)
  (remote-hh-dat-set!         rr (common:get-homehost)) ;
  (remote-server-info-set!    rr (if *toppath* (server:check-if-running *toppath*) #f))
  (remote-transport-set!      rr *transport-type*)
  (remote-server-timeout-set! rr (server:expiration-timeout))
  rr)
  
;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected

  #;(common:telemetry-log (conc "rmt:"(->string cmd))
                        payload: `((rid . ,rid)

Modified runs.scm from [f9eefab8cc] to [72d304a2fa].

36
37
38
39
40
41
42



43
44
45
46
47
48
49
(import commonmod)

(declare (uses configfmod))
(import configfmod)

(declare (uses dbmod))
(import dbmod)




(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")








>
>
>







36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
(import commonmod)

(declare (uses configfmod))
(import configfmod)

(declare (uses dbmod))
(import dbmod)

(declare (uses servermod))
(import servermod)

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")

Modified servermod.scm from [9a8148641c] to [467961a959].

83
84
85
86
87
88
89
90
91
92
93
94


95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115



116
117
118
119
120
121
122
;;     result)))

;; given a path to a server log return: host port startseconds
;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let 
;;
(define (server:logf-get-start-info logf)
  (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+)"))) ;; SERVER STARTED: host:port AT timesecs server id
    (handle-exceptions
     exn
     (begin
       (debug:print-info 0 *default-log-port* "Unable to get server info from " logf ", exn=" exn)
       (list #f #f #f #f)) ;; no idea what went wrong, call it a bad server


     (with-input-from-file
	 logf
       (lambda ()
	 (let loop ((inl  (read-line))
		    (lnum 0))
	   (if (not (eof-object? inl))
	       (let ((mlst (string-match rx inl)))
		 (if (not mlst)
		     (if (< lnum 500) ;; give up if more than 500 lines of server log read
			 (loop (read-line)(+ lnum 1))
			 (begin 
                           (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf )
                           (list #f #f #f #f)))
		     (let ((dat  (cdr mlst)))
		       (list (car dat) ;; host
			     (string->number (cadr dat)) ;; port
			     (string->number (caddr dat))
			     (cadr (cddr dat))))))
	       (begin 
		 (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (current-seconds))
		 (list #f #f #f #f)))))))))




;; get a list of servers with all relevant data
;; ( mod-time host port start-time pid )
;;
(define (server:get-list areapath #!key (limit #f))
  (let ((fname-rx    (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
	(day-seconds (* 24 60 60)))







|
|
|
|
|
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>







83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
;;     result)))

;; given a path to a server log return: host port startseconds
;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let 
;;
(define (server:logf-get-start-info logf)
  (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+)"))) ;; SERVER STARTED: host:port AT timesecs server id
    ;;(handle-exceptions
    ;; exn
    ;; (begin
    ;;   (debug:print-info 0 *default-log-port* "Unable to get server info from " logf ", exn=" exn)
    ;;   (list #f #f #f #f)) ;; no idea what went wrong, call it a bad server
    (if (and (file-exists? logf)
	     (file-read-access? logf))
	(with-input-from-file
	    logf
	  (lambda ()
	    (let loop ((inl  (read-line))
		       (lnum 0))
	      (if (not (eof-object? inl))
		  (let ((mlst (string-match rx inl)))
		    (if (not mlst)
			(if (< lnum 500) ;; give up if more than 500 lines of server log read
			    (loop (read-line)(+ lnum 1))
			    (begin 
			      (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf )
			      (list #f #f #f #f)))
			(let ((dat  (cdr mlst)))
			  (list (car dat) ;; host
				(string->number (cadr dat)) ;; port
				(string->number (caddr dat))
				(cadr (cddr dat))))))
		  (begin 
		    (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (current-seconds))
		    (list #f #f #f #f))))))
	(begin
	  (debug:print-info 0 *default-log-port* "Unable to get server info from " logf ", file not found or not readable.")
	  (list #f #f #f #f)))))

;; get a list of servers with all relevant data
;; ( mod-time host port start-time pid )
;;
(define (server:get-list areapath #!key (limit #f))
  (let ((fname-rx    (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
	(day-seconds (* 24 60 60)))

Modified tasks.scm from [b4c4a4968f] to [ae153a5943].

264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
       (set! res count))
     mdb
     "SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;"
     (car (user-information (current-user-id))))
    res))

;; 
#;(define (tasks:start-monitor db mdb)
  (if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more
      (debug:print-info 1 *default-log-port* "Not starting monitor, already have more than two running")
      (let* ((megatestdb     (conc *toppath* "/megatest.db"))
	     (monitordbf     (conc (db:dbfile-path #f) "/monitor.db"))
	     (last-db-update 0)) ;; (file-modification-time megatestdb)))
	(task:register-monitor mdb)
	(let loop ((count      0)
		   (next-touch 0)) ;; next-touch is the time where we need to update last_update
	  ;; if the db has been modified we'd best look at the task queue
	  (let ((modtime (file-modification-time megatestdbpath )))
	    (if (> modtime last-db-update)
		(tasks:process-queue db)) ;; BROKEN. mdb last-db-update megatestdb next-touch))
	    ;; WARNING: Possible race conditon here!!
	    ;; should this update be immediately after the task-get-action call above?
	    (if (> (current-seconds) next-touch)
		(begin
		  (tasks:monitors-update mdb)
		  (loop (+ count 1)(+ (current-seconds) 240)))
		(loop (+ count 1) next-touch)))))))
      
;;======================================================================
;; T A S K S   Q U E U E
;;
;;   NOTE:: These operate on task_queue which is in main.db
;;
;;======================================================================







|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
       (set! res count))
     mdb
     "SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;"
     (car (user-information (current-user-id))))
    res))

;; 
;; (define (tasks:start-monitor db mdb)
;;   (if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more
;;       (debug:print-info 1 *default-log-port* "Not starting monitor, already have more than two running")
;;       (let* ((megatestdb     (conc *toppath* "/megatest.db"))
;; 	     (monitordbf     (conc (db:dbfile-path #f) "/monitor.db"))
;; 	     (last-db-update 0)) ;; (file-modification-time megatestdb)))
;; 	(task:register-monitor mdb)
;; 	(let loop ((count      0)
;; 		   (next-touch 0)) ;; next-touch is the time where we need to update last_update
;; 	  ;; if the db has been modified we'd best look at the task queue
;; 	  (let ((modtime (file-modification-time megatestdbpath )))
;; 	    (if (> modtime last-db-update)
;; 		(tasks:process-queue db)) ;; BROKEN. mdb last-db-update megatestdb next-touch))
;; 	    ;; WARNING: Possible race conditon here!!
;; 	    ;; should this update be immediately after the task-get-action call above?
;; 	    (if (> (current-seconds) next-touch)
;; 		(begin
;; 		  (tasks:monitors-update mdb)
;; 		  (loop (+ count 1)(+ (current-seconds) 240)))
;; 		(loop (+ count 1) next-touch)))))))
      
;;======================================================================
;; T A S K S   Q U E U E
;;
;;   NOTE:: These operate on task_queue which is in main.db
;;
;;======================================================================
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
;; remove tasks given by a string of numbers comma separated
(define (tasks:remove-queue-entries dbstruct task-ids)
  (db:with-db
   dbstruct #f #t
   (lambda (db)
     (sqlite3:execute db (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");")))))

#;(define (tasks:process-queue dbstruct)
  (let* ((task   (tasks:snag-a-task dbstruct))
	 (action (if task (tasks:task-get-action task) #f)))
    (if action (print "tasks:process-queue task: " task))
    (if action
	(case (string->symbol action)
	  ((run)       (tasks:start-run     dbstruct task))
	  ((remove)    (tasks:remove-runs   dbstruct task))
	  ((lock)      (tasks:lock-runs     dbstruct task))
	  ;; ((monitor)   (tasks:start-monitor db task))
	  #;((rollup)    (tasks:rollup-runs   dbstruct task))
	  ((updatemeta)(tasks:update-meta   dbstruct task))
	  #;((kill)      (tasks:kill-monitors dbstruct task))))))

(define (tasks:tasks->text tasks)
  (let ((fmtstr "~10a~10a~10a~12a~20a~12a~12a~10a"))
    (conc (format #f fmtstr "id" "action" "owner" "state" "target" "runname" "testpatts" "params") "\n"
	  (string-intersperse 
	   (map (lambda (task)
		  (format #f fmtstr







|
|
|
|
|
|
|
|
|
|
|
|
|







428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
;; remove tasks given by a string of numbers comma separated
(define (tasks:remove-queue-entries dbstruct task-ids)
  (db:with-db
   dbstruct #f #t
   (lambda (db)
     (sqlite3:execute db (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");")))))

;; (define (tasks:process-queue dbstruct)
;;   (let* ((task   (tasks:snag-a-task dbstruct))
;; 	 (action (if task (tasks:task-get-action task) #f)))
;;     (if action (print "tasks:process-queue task: " task))
;;     (if action
;; 	(case (string->symbol action)
;; 	  ((run)       (tasks:start-run     dbstruct task))
;; 	  ((remove)    (tasks:remove-runs   dbstruct task))
;; 	  ((lock)      (tasks:lock-runs     dbstruct task))
;; 	  ;; ((monitor)   (tasks:start-monitor db task))
;; 	  #;((rollup)    (tasks:rollup-runs   dbstruct task))
;; 	  ((updatemeta)(tasks:update-meta   dbstruct task))
;; 	  #;((kill)      (tasks:kill-monitors dbstruct task))))))

(define (tasks:tasks->text tasks)
  (let ((fmtstr "~10a~10a~10a~12a~20a~12a~12a~10a"))
    (conc (format #f fmtstr "id" "action" "owner" "state" "target" "runname" "testpatts" "params") "\n"
	  (string-intersperse 
	   (map (lambda (task)
		  (format #f fmtstr
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
			 param-key state-patt action-patt test-patt)))))

(define (tasks:find-task-queue-records dbstruct target run-name test-patt state-patt action-patt)
  ;; (handle-exceptions
  ;;  exn
  ;;  '()
  ;;  (sqlite3:first-row
  (let ((db (db:delay-if-busy (db:get-db dbstruct)))
	(res '()))
    (sqlite3:for-each-row 
     (lambda (a . b)
       (set! res (cons (cons a b) res)))
     db "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue 
           WHERE
              target = ? AND name = ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;"







|







503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
			 param-key state-patt action-patt test-patt)))))

(define (tasks:find-task-queue-records dbstruct target run-name test-patt state-patt action-patt)
  ;; (handle-exceptions
  ;;  exn
  ;;  '()
  ;;  (sqlite3:first-row
  (let ((db  (db:get-db dbstruct))
	(res '()))
    (sqlite3:for-each-row 
     (lambda (a . b)
       (set! res (cons (cons a b) res)))
     db "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue 
           WHERE
              target = ? AND name = ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;"

Modified tests.scm from [9f0819548e] to [23b44c969b].

42
43
44
45
46
47
48



49
50
51
52
53
54
55
(import commonmod)

(declare (uses dbmod))
(import dbmod)

(declare (uses configfmod))
(import configfmod)




(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")
(include "js-path.scm")







>
>
>







42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
(import commonmod)

(declare (uses dbmod))
(import dbmod)

(declare (uses configfmod))
(import configfmod)

(declare (uses servermod))
(import servermod)

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")
(include "js-path.scm")