Megatest

Changes On Branch b79afa463fdae793
Login

Changes In Branch try-nanomsg Through [b79afa463f] Excluding Merge-Ins

This is equivalent to a diff from ec50f4ac00 to b79afa463f

2014-11-30
09:53
Merged fix for get-tests-for-run(s) check-in: c8184e551e user: matt tags: v1.60
08:04
Archiving check-in: 5ab4109044 user: matt tags: archiving
2014-11-26
21:29
Add timeout on all remote calls check-in: 245a3a0b6d user: matt tags: try-nanomsg
16:09
Added debug support in newdashboard check-in: b79afa463f user: matt tags: try-nanomsg
08:59
Merged v1.60 changes into try-nanomsg check-in: 0b3a6d8aa9 user: matt tags: try-nanomsg
2014-11-25
21:10
Add big delay and take a break when system is clearly overloaded. check-in: ec50f4ac00 user: matt tags: v1.60
16:39
Many tweaks to improve reliability under stress check-in: 0b6b35ab5b user: mrwellan tags: v1.60

Modified Makefile from [64fd867d54] to [4886dc652e].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# make install CSCOPTS='-accumulate-profile -profile-name $(PWD)/profile-ww$(shell date +%V.%u)'
PREFIX=$(PWD)
CSCOPTS= 
INSTALL=install
SRCFILES = common.scm items.scm launch.scm \
           ods.scm runconfig.scm server.scm configf.scm \
           db.scm keys.scm margs.scm megatest-version.scm \
           process.scm runs.scm tasks.scm tests.scm genexample.scm \
	   http-transport.scm filedb.scm \
           client.scm gutils.scm synchash.scm daemon.scm mt.scm dcommon.scm \
	   tree.scm ezsteps.scm lock-queue.scm sdb.scm \
	   rmt.scm api.scm tdb.scm portlogger.scm

# Eggs to install (straightforward ones)
EGGS=matchable readline apropos base64 regex-literals format regex-case test coops trace csv \
     dot-locking posix-utils posix-extras directory-utils hostinfo tcp-server rpc csv-xml fmt \








|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# make install CSCOPTS='-accumulate-profile -profile-name $(PWD)/profile-ww$(shell date +%V.%u)'
PREFIX=$(PWD)
CSCOPTS= 
INSTALL=install
SRCFILES = common.scm items.scm launch.scm \
           ods.scm runconfig.scm server.scm configf.scm \
           db.scm keys.scm margs.scm megatest-version.scm \
           process.scm runs.scm tasks.scm tests.scm genexample.scm \
	   http-transport.scm nmsg-transport.scm filedb.scm \
           client.scm gutils.scm synchash.scm daemon.scm mt.scm dcommon.scm \
	   tree.scm ezsteps.scm lock-queue.scm sdb.scm \
	   rmt.scm api.scm tdb.scm portlogger.scm

# Eggs to install (straightforward ones)
EGGS=matchable readline apropos base64 regex-literals format regex-case test coops trace csv \
     dot-locking posix-utils posix-extras directory-utils hostinfo tcp-server rpc csv-xml fmt \

Modified api.scm from [a688529701] to [52a89446cf].

47
48
49
50
51
52
53


54



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125

126
127
128
129
130


131
132
133
134
135
136
137
138
139
140
141
142
143
144

145
146
147
148
149
150
151
    get-steps-data
    login
    testmeta-get-record))

;; These are called by the server on recipt of /api calls
;;    - keep it simple, only return the actual result of the call, i.e. no meta info here
;;


(define (api:execute-requests dbstruct cmd params)



  (case (string->symbol cmd)
    ;; SERVERS
    ((start-server)                 (apply server:kind-run params))
    ((kill-server)                  (set! *server-run* #f))

    ;; KEYS
    ((get-key-val-pairs)            (apply db:get-key-val-pairs dbstruct params))
    ((get-keys)                     (db:get-keys dbstruct))

    ;; TESTS
    ((test-toplevel-num-items)         (apply db:test-toplevel-num-items dbstruct params))
    ((get-test-info-by-id)	       (apply db:get-test-info-by-id dbstruct params))
    ((test-get-rundir-from-test-id)    (apply db:test-get-rundir-from-test-id dbstruct params))
    ((test-set-state-status-by-id)     (apply db:test-set-state-status-by-id dbstruct params))
    ((get-count-tests-running)         (apply db:get-count-tests-running dbstruct params))
    ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params))
    ((delete-test-records)             (apply db:delete-test-records dbstruct params))
    ;; ((delete-test-step-records)        (apply db:delete-test-step-records dbstruct params))
    ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
    ((test-set-status-state)           (apply db:test-set-status-state dbstruct params))
    ((get-previous-test-run-record)    (apply db:get-previous-test-run-record dbstruct params))
    ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
    ((test-get-logfile-info)           (apply db:test-get-logfile-info dbstruct params))
    ((test-get-records-for-index-file)  (apply db:test-get-records-for-index-file dbstruct params))
    ((get-testinfo-state-status)       (apply db:get-testinfo-state-status dbstruct params))
    ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
    ((test-get-top-process-pid)        (apply db:test-get-top-process-pid dbstruct params))
    ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
    ((get-prereqs-not-met)             (apply db:get-prereqs-not-met dbstruct params))
    ((roll-up-pass-fail-counts)        (apply db:roll-up-pass-fail-counts dbstruct params))
    ((update-fail-pass-counts)         (apply db:general-call dbstruct 'update-pass-fail-counts params))
    ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))

    ;; RUNS
    ((get-run-info)                 (apply db:get-run-info dbstruct params))
    ((get-run-status)               (apply db:get-run-status dbstruct params))
    ((set-run-status)               (apply db:set-run-status dbstruct params))
    ((register-run)                 (apply db:register-run dbstruct params))
    ((set-tests-state-status)       (apply db:set-tests-state-status dbstruct params))
    ((get-tests-for-run)            (apply db:get-tests-for-run dbstruct params))
    ((get-test-id)                  (apply db:get-test-id dbstruct params))
    ((get-tests-for-runs-mindata)   (apply db:get-tests-for-runs-mindata dbstruct params))
    ((get-run-name-from-id)         (apply db:get-run-name-from-id dbstruct params))
    ((delete-run)                   (apply db:delete-run dbstruct params))
    ((get-runs)                     (apply db:get-runs dbstruct params))
    ((get-all-run-ids)              (db:get-all-run-ids dbstruct))
    ((get-prev-run-ids)             (apply db:get-prev-run-ids dbstruct params))
    ((get-run-ids-matching-target)  (apply db:get-run-ids-matching-target dbstruct params))
    ((get-runs-by-patt)             (apply db:get-runs-by-patt dbstruct params))
    ((lock/unlock-run)              (apply db:lock/unlock-run dbstruct params))
    ((update-run-event_time)        (apply db:update-run-event_time dbstruct params))
    ((find-and-mark-incomplete)     (apply db:find-and-mark-incomplete dbstruct params))

    ;; STEPS
    ((teststep-set-status!)         (apply db:teststep-set-status! dbstruct params))

    ;; TEST DATA
    ((test-data-rollup)             (apply db:test-data-rollup dbstruct params))
    ((csv->test-data)               (apply db:csv->test-data dbstruct params))
    ((get-steps-data)               (apply db:get-steps-data dbstruct params))

    ;; MISC
    ((login)                        (apply db:login dbstruct params))
    ((general-call)                 (let ((stmtname   (car params))
					  (run-id     (cadr params))
					  (realparams (cddr params)))
				      (db:with-db dbstruct run-id #t ;; these are all for modifying the db
						  (lambda (db)
						    (db:general-call db stmtname realparams)))))
    ((sync-inmem->db)               (db:sync-touched dbstruct run-id force-sync: #t))
    ((sdb-qry)                      (apply sdb:qry params))


    ;; TESTMETA
    ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))
    ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
    ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params))


    (else
     (list "ERROR" 0))))

;; http-server  send-response
;;                 api:process-request
;;                    db:*
;;
;; NB// Runs on the server as part of the server loop
;;
(define (api:process-request dbstruct $) ;; the $ is the request vars proc
  (let* ((cmd     ($ 'cmd))
	 (paramsj ($ 'params))
	 (params  (db:string->obj paramsj)) ;; (rmt:json-str->dat paramsj))
	 (res     (api:execute-requests dbstruct cmd params)))


    ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds
    ;; (rmt:dat->json-str
    ;;  (if (or (string? res)
    ;;          (list?   res)
    ;;          (number? res)
    ;;          (boolean? res))







>
>

>
>
>
|
|
|
|

|
|
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

|
|

|
|
|
|

|
|
|
|
|
|
|
|
|
|
>

|
|
|
|
>
>
|
|











|
>







47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
    get-steps-data
    login
    testmeta-get-record))

;; These are called by the server on recipt of /api calls
;;    - keep it simple, only return the actual result of the call, i.e. no meta info here
;;
;;    - returns #( flag result )
;;
(define (api:execute-requests dbstruct cmd params)
  (let ((res
	 (case (if (symbol? cmd) 
		   cmd
		   (string->symbol cmd))
	   ;; SERVERS
	   ((start-server)                 (apply server:kind-run params))
	   ((kill-server)                  (set! *server-run* #f))

	   ;; KEYS
	   ((get-key-val-pairs)            (apply db:get-key-val-pairs dbstruct params))
	   ((get-keys)                     (db:get-keys dbstruct))

	   ;; TESTS
	   ((test-toplevel-num-items)         (apply db:test-toplevel-num-items dbstruct params))
	   ((get-test-info-by-id)	       (apply db:get-test-info-by-id dbstruct params))
	   ((test-get-rundir-from-test-id)    (apply db:test-get-rundir-from-test-id dbstruct params))
	   ((test-set-state-status-by-id)     (apply db:test-set-state-status-by-id dbstruct params))
	   ((get-count-tests-running)         (apply db:get-count-tests-running dbstruct params))
	   ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params))
	   ((delete-test-records)             (apply db:delete-test-records dbstruct params))
	   ;; ((delete-test-step-records)        (apply db:delete-test-step-records dbstruct params))
	   ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
	   ((test-set-status-state)           (apply db:test-set-status-state dbstruct params))
	   ((get-previous-test-run-record)    (apply db:get-previous-test-run-record dbstruct params))
	   ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
	   ((test-get-logfile-info)           (apply db:test-get-logfile-info dbstruct params))
	   ((test-get-records-for-index-file)  (apply db:test-get-records-for-index-file dbstruct params))
	   ((get-testinfo-state-status)       (apply db:get-testinfo-state-status dbstruct params))
	   ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
	   ((test-get-top-process-pid)        (apply db:test-get-top-process-pid dbstruct params))
	   ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
	   ((get-prereqs-not-met)             (apply db:get-prereqs-not-met dbstruct params))
	   ((roll-up-pass-fail-counts)        (apply db:roll-up-pass-fail-counts dbstruct params))
	   ((update-fail-pass-counts)         (apply db:general-call dbstruct 'update-pass-fail-counts params))
	   ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))

	   ;; RUNS
	   ((get-run-info)                 (apply db:get-run-info dbstruct params))
	   ((get-run-status)               (apply db:get-run-status dbstruct params))
	   ((set-run-status)               (apply db:set-run-status dbstruct params))
	   ((register-run)                 (apply db:register-run dbstruct params))
	   ((set-tests-state-status)       (apply db:set-tests-state-status dbstruct params))
	   ((get-tests-for-run)            (apply db:get-tests-for-run dbstruct params))
	   ((get-test-id)                  (apply db:get-test-id dbstruct params))
	   ((get-tests-for-runs-mindata)   (apply db:get-tests-for-runs-mindata dbstruct params))
	   ((get-run-name-from-id)         (apply db:get-run-name-from-id dbstruct params))
	   ((delete-run)                   (apply db:delete-run dbstruct params))
	   ((get-runs)                     (apply db:get-runs dbstruct params))
	   ((get-all-run-ids)              (db:get-all-run-ids dbstruct))
	   ((get-prev-run-ids)             (apply db:get-prev-run-ids dbstruct params))
	   ((get-run-ids-matching-target)  (apply db:get-run-ids-matching-target dbstruct params))
	   ((get-runs-by-patt)             (apply db:get-runs-by-patt dbstruct params))
	   ((lock/unlock-run)              (apply db:lock/unlock-run dbstruct params))
	   ((update-run-event_time)        (apply db:update-run-event_time dbstruct params))
	   ((find-and-mark-incomplete)     (apply db:find-and-mark-incomplete dbstruct params))

	   ;; STEPS
	   ((teststep-set-status!)         (apply db:teststep-set-status! dbstruct params))

	   ;; TEST DATA
	   ((test-data-rollup)             (apply db:test-data-rollup dbstruct params))
	   ((csv->test-data)               (apply db:csv->test-data dbstruct params))
	   ((get-steps-data)               (apply db:get-steps-data dbstruct params))

	   ;; MISC
	   ((login)                        (apply db:login dbstruct params))
	   ((general-call)                 (let ((stmtname   (car params))
						 (run-id     (cadr params))
						 (realparams (cddr params)))
					     (db:with-db dbstruct run-id #t ;; these are all for modifying the db
							 (lambda (db)
							   (db:general-call db stmtname realparams)))))
	   ((sync-inmem->db)               (db:sync-touched dbstruct run-id force-sync: #t))
	   ((sdb-qry)                      (apply sdb:qry params))
	   ((ping)                         (current-process-id))

	   ;; TESTMETA
	   ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))
	   ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
	   ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params)))))
    (vector #t res)))
	 ;; NO ELSE - let it return undef
	 ;;(else
	 ;; (list "ERROR" 0))))

;; http-server  send-response
;;                 api:process-request
;;                    db:*
;;
;; NB// Runs on the server as part of the server loop
;;
(define (api:process-request dbstruct $) ;; the $ is the request vars proc
  (let* ((cmd     ($ 'cmd))
	 (paramsj ($ 'params))
	 (params  (db:string->obj paramsj)) ;; (rmt:json-str->dat paramsj))
	 (resdat  (api:execute-requests dbstruct cmd params)) ;; #( flag result )
	 (res     (vector-ref resdat 1)))

    ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds
    ;; (rmt:dat->json-str
    ;;  (if (or (string? res)
    ;;          (list?   res)
    ;;          (number? res)
    ;;          (boolean? res))

Modified client.scm from [6d1c8717b3] to [ec1ead835f].

59
60
61
62
63
64
65
66
67
68

69



70


71
72
73
74
75
76
77
78
79
80

81
82
83
84
85
86
87
88
89
90
91

92

93


94

95
96
97
98
99
100
101
102

103
104
105
106
107
108
109
110
  (debug:print-info 2 "client:setup remaining-tries=" remaining-tries)
  (let* ((tdbdat (tasks:open-db)))
    (if (<= remaining-tries 0)
	(begin
	  (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id)
	  (exit 1))
	(let ((host-info (hash-table-ref/default *runremote* run-id #f)))
	  (if host-info
	      (let* ((iface     (http-transport:server-dat-get-iface host-info))
		     (port      (http-transport:server-dat-get-port  host-info))

		     (start-res (http-transport:client-connect iface port))



		     (ping-res  (rmt:login-no-auto-client-setup start-res run-id)))


		(if ping-res   ;; sucessful login?
		    (begin
		      (debug:print-info 2 "client:setup, ping is good using host-info=" host-info ", remaining-tries=" remaining-tries)
		      ;; Why add the close-connections here?
		      ;; (http-transport:close-connections run-id)
		      (hash-table-set! *runremote* run-id start-res)
		      start-res)  ;; return the server info
		    ;; have host info but no ping. shutdown the current connection and try again
		    (begin    ;; login failed
		      (debug:print-info 1 "client:setup, ping is bad for start-res=" start-res " and *runremote*=" host-info)

		      (http-transport:close-connections run-id)
		      (hash-table-delete! *runremote* run-id)
		      (if (< remaining-tries 8)
			  (thread-sleep! 5)
			  (thread-sleep! 1))
		      (client:setup run-id remaining-tries: (- remaining-tries 1)))))
	      ;; YUK: rename server-dat here
	      (let* ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id)))
		(debug:print-info 4 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
		(if server-dat
		    (let* ((iface     (tasks:hostinfo-get-interface server-dat))

			   (port      (tasks:hostinfo-get-port      server-dat))

			   (start-res (http-transport:client-connect iface port))


			   (ping-res  (rmt:login-no-auto-client-setup start-res run-id)))

		      (if (and start-res
			       ping-res)
			  (begin
			    (hash-table-set! *runremote* run-id start-res)
			    (debug:print-info 2 "connected to " (http-transport:server-dat-make-url start-res))
			    start-res)
			  (begin    ;; login failed but have a server record, clean out the record and try again
			    (debug:print-info 0 "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat)

			    (http-transport:close-connections run-id)
			    (hash-table-delete! *runremote* run-id)
			    (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat)
								 run-id 
								 (tasks:hostinfo-get-interface server-dat)
								 (tasks:hostinfo-get-port      server-dat)
								 " client:setup (server-dat = #t)")
			    (thread-sleep! 2)







|


>
|
>
>
>
|
>
>



<
<
<




>
|










>

>
|
>
>
|
>








>
|







59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
  (debug:print-info 2 "client:setup remaining-tries=" remaining-tries)
  (let* ((tdbdat (tasks:open-db)))
    (if (<= remaining-tries 0)
	(begin
	  (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id)
	  (exit 1))
	(let ((host-info (hash-table-ref/default *runremote* run-id #f)))
	  (if host-info ;; this is a bit circular. the host-info *is* the start-res FIXME
	      (let* ((iface     (http-transport:server-dat-get-iface host-info))
		     (port      (http-transport:server-dat-get-port  host-info))
		     (start-res (case *transport-type* 
				  ((http)(http-transport:client-connect iface port))
				  ((nmsg) host-info) ;; (http-transport:server-dat-get-socket host-info))
				  (else #f)))
		     (ping-res  (case *transport-type*
				  ((http)(rmt:login-no-auto-client-setup start-res run-id))
				  ((nmsg)(nmsg-transport:ping iface port timeout: 2 socket: #t))
				  (else #f))))
		(if ping-res   ;; sucessful login?
		    (begin
		      (debug:print-info 2 "client:setup, ping is good using host-info=" host-info ", remaining-tries=" remaining-tries)



		      start-res)  ;; return the server info
		    ;; have host info but no ping. shutdown the current connection and try again
		    (begin    ;; login failed
		      (debug:print-info 1 "client:setup, ping is bad for start-res=" start-res " and *runremote*=" host-info)
		      (case *transport-type*
			((http)(http-transport:close-connections run-id)))
		      (hash-table-delete! *runremote* run-id)
		      (if (< remaining-tries 8)
			  (thread-sleep! 5)
			  (thread-sleep! 1))
		      (client:setup run-id remaining-tries: (- remaining-tries 1)))))
	      ;; YUK: rename server-dat here
	      (let* ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id)))
		(debug:print-info 4 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
		(if server-dat
		    (let* ((iface     (tasks:hostinfo-get-interface server-dat))
			   (hostname  (tasks:hostinfo-get-hostname  server-dat))
			   (port      (tasks:hostinfo-get-port      server-dat))
			   (start-res (case *transport-type*
					((http)(http-transport:client-connect iface port))
					((nmsg)(nmsg-transport:client-connect hostname port))))
			   (ping-res  (case *transport-type* 
					((http)(rmt:login-no-auto-client-setup start-res run-id))
					((nmsg)(http-transport:server-dat-get-socket start-res))))) ;; socket is the result of a ping
		      (if (and start-res
			       ping-res)
			  (begin
			    (hash-table-set! *runremote* run-id start-res)
			    (debug:print-info 2 "connected to " (http-transport:server-dat-make-url start-res))
			    start-res)
			  (begin    ;; login failed but have a server record, clean out the record and try again
			    (debug:print-info 0 "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat)
			    (case *transport-type* 
			      ((http)(http-transport:close-connections run-id)))
			    (hash-table-delete! *runremote* run-id)
			    (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat)
								 run-id 
								 (tasks:hostinfo-get-interface server-dat)
								 (tasks:hostinfo-get-port      server-dat)
								 " client:setup (server-dat = #t)")
			    (thread-sleep! 2)

Modified common.scm from [aa6e9ff977] to [0598bb95e7].

63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
(define *inmemdb*             #f)
(define *task-db*             #f) ;; (vector db path-to-db)
(define *db-access-allowed*   #t) ;; flag to allow access
(define *db-access-mutex*     (make-mutex))

;; SERVER
(define *my-client-signature* #f)
(define *transport-type*    'http)
(define *rpc:listener*      #f) ;; if set up for server communication this will hold the tcp port
(define *runremote*         (make-hash-table)) ;; if set up for server communication this will hold <host port>
(define *max-cache-size*    0)
(define *logged-in-clients* (make-hash-table))
(define *client-non-blocking-mode* #f)
(define *server-id*         #f)
(define *server-info*       #f)
(define *time-to-exit*      #f)







|
<







63
64
65
66
67
68
69
70

71
72
73
74
75
76
77
(define *inmemdb*             #f)
(define *task-db*             #f) ;; (vector db path-to-db)
(define *db-access-allowed*   #t) ;; flag to allow access
(define *db-access-mutex*     (make-mutex))

;; SERVER
(define *my-client-signature* #f)
(define *transport-type*    'nmsg)

(define *runremote*         (make-hash-table)) ;; if set up for server communication this will hold <host port>
(define *max-cache-size*    0)
(define *logged-in-clients* (make-hash-table))
(define *client-non-blocking-mode* #f)
(define *server-id*         #f)
(define *server-info*       #f)
(define *time-to-exit*      #f)

Modified db.scm from [7251c124d5] to [75e6e604f1].

2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
       res))))

;;======================================================================
;; QUEUE UP META, TEST STATUS AND STEPS REMOTE ACCESS
;;======================================================================

;; NOTE: Can remove the regex and base64 encoding for zmq
(define (db:obj->string obj)
  (case *transport-type*
    ;; ((fs) obj)
    ((http fs)
     (string-substitute
      (regexp "=") "_"
      (base64:base64-encode 
       (z3:encode-buffer
	(with-output-to-string
	  (lambda ()(serialize obj)))))
      #t))
    ((zmq)(with-output-to-string (lambda ()(serialize obj))))
    (else obj)))

(define (db:string->obj msg)
  (case *transport-type*
    ;; ((fs) msg)
    ((http fs)
     (if (string? msg)
	 (with-input-from-string 
	     (z3:decode-buffer
	      (base64:base64-decode
	       (string-substitute 
		(regexp "_") "=" msg #t)))
	   (lambda ()(deserialize)))
	 (begin
	   (debug:print 0 "ERROR: reception failed. Received " msg " but cannot translate it.")
	   #f))) ;; crude reply for when things go awry
    ((zmq)(with-input-from-string msg (lambda ()(deserialize))))
    (else msg)))

(define (db:test-set-status-state dbstruct run-id test-id status state msg)
  (let ((dbdat  (db:get-db dbstruct run-id)))
    (if (member state '("LAUNCHED" "REMOTEHOSTSTART"))
	(db:general-call dbdat 'set-test-start-time (list test-id)))
    (if msg







|
|









|


|
|












|







2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
       res))))

;;======================================================================
;; QUEUE UP META, TEST STATUS AND STEPS REMOTE ACCESS
;;======================================================================

;; NOTE: Can remove the regex and base64 encoding for zmq
(define (db:obj->string obj #!key (transport 'http))
  (case transport
    ;; ((fs) obj)
    ((http fs)
     (string-substitute
      (regexp "=") "_"
      (base64:base64-encode 
       (z3:encode-buffer
	(with-output-to-string
	  (lambda ()(serialize obj)))))
      #t))
    ((zmq nmsg)(with-output-to-string (lambda ()(serialize obj))))
    (else obj)))

(define (db:string->obj msg #!key (transport 'http))
  (case transport
    ;; ((fs) msg)
    ((http fs)
     (if (string? msg)
	 (with-input-from-string 
	     (z3:decode-buffer
	      (base64:base64-decode
	       (string-substitute 
		(regexp "_") "=" msg #t)))
	   (lambda ()(deserialize)))
	 (begin
	   (debug:print 0 "ERROR: reception failed. Received " msg " but cannot translate it.")
	   #f))) ;; crude reply for when things go awry
    ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize))))
    (else msg)))

(define (db:test-set-status-state dbstruct run-id test-id status state msg)
  (let ((dbdat  (db:get-db dbstruct run-id)))
    (if (member state '("LAUNCHED" "REMOTEHOSTSTART"))
	(db:general-call dbdat 'set-test-start-time (list test-id)))
    (if msg

Modified http-transport.scm from [907ced71b2] to [e90ec8f303].

316
317
318
319
320
321
322
323
324
325
326
327
328
329

330
331
332
333
334
335
336
    (if (vector? server-dat)
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (close-connection! api-dat)
	  #t)
	#f)))


(define (make-http-transport:server-dat)(make-vector 5))
(define (http-transport:server-dat-get-iface         vec)    (vector-ref  vec 0))
(define (http-transport:server-dat-get-port          vec)    (vector-ref  vec 1))
(define (http-transport:server-dat-get-api-uri       vec)    (vector-ref  vec 2))
(define (http-transport:server-dat-get-api-url       vec)    (vector-ref  vec 3))
(define (http-transport:server-dat-get-api-req       vec)    (vector-ref  vec 4))
(define (http-transport:server-dat-get-last-access   vec)    (vector-ref  vec 5))


(define (http-transport:server-dat-make-url vec)
  (if (and (http-transport:server-dat-get-iface vec)
	   (http-transport:server-dat-get-port  vec))
      (conc "http://" 
	    (http-transport:server-dat-get-iface vec)
	    ":"







|






>







316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
    (if (vector? server-dat)
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (close-connection! api-dat)
	  #t)
	#f)))


(define (make-http-transport:server-dat)(make-vector 6))
(define (http-transport:server-dat-get-iface         vec)    (vector-ref  vec 0))
(define (http-transport:server-dat-get-port          vec)    (vector-ref  vec 1))
(define (http-transport:server-dat-get-api-uri       vec)    (vector-ref  vec 2))
(define (http-transport:server-dat-get-api-url       vec)    (vector-ref  vec 3))
(define (http-transport:server-dat-get-api-req       vec)    (vector-ref  vec 4))
(define (http-transport:server-dat-get-last-access   vec)    (vector-ref  vec 5))
(define (http-transport:server-dat-get-socket        vec)    (vector-ref  vec 6))

(define (http-transport:server-dat-make-url vec)
  (if (and (http-transport:server-dat-get-iface vec)
	   (http-transport:server-dat-get-port  vec))
      (conc "http://" 
	    (http-transport:server-dat-get-iface vec)
	    ":"
353
354
355
356
357
358
359

360
361
362
363
364
365
366
;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running server-id run-id)
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive

  (let* ((tdbdat      (tasks:open-db))
	 (server-info (let loop ((start-time (current-seconds))
				 (changed    #t)
				 (last-sdat  "not this"))
                        (let ((sdat #f))
			  (thread-sleep! 0.01)
			  (debug:print-info 0 "Waiting for server alive signature")







>







354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running server-id run-id)
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (debug:print-info 0 "Starting the sync-back, keep alive thread in server for run-id=" run-id)
  (let* ((tdbdat      (tasks:open-db))
	 (server-info (let loop ((start-time (current-seconds))
				 (changed    #t)
				 (last-sdat  "not this"))
                        (let ((sdat #f))
			  (thread-sleep! 0.01)
			  (debug:print-info 0 "Waiting for server alive signature")
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
      ;; (debug:print 11 "last-access=" last-access ", server-timeout=" server-timeout)
      ;;
      ;; no_traffic, no running tests, if server 0, no running servers
      ;;
      ;; (let ((wait-on-running (configf:lookup *configdat* "server" "wait-on-running"))) ;; wait on running tasks (if not true then exit on time out)
      ;;
      (if (and *server-run*
	       ;; (or
	       (> (+ last-access server-timeout)
		  (current-seconds)))
;;		   (and (eq? run-id 0)
;;			(> (tasks:num-servers-non-zero-running tdb) 0))
;;		   (and (not (eq? run-id 0)) ;; only makes sense in non-zero run-id servers
;;			(> (db:get-count-tests-actually-running *inmemdb* run-id) 0))
;;		   ))
	  (begin
	    (debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
	    ;;
	    ;; Consider implementing some smarts here to re-insert the record or kill self is
	    ;; the db indicates so
	    ;;
	    ;; (if (tasks:server-am-i-the-server? tdb run-id)







<


<
<
<
<
<







445
446
447
448
449
450
451

452
453





454
455
456
457
458
459
460
      ;; (debug:print 11 "last-access=" last-access ", server-timeout=" server-timeout)
      ;;
      ;; no_traffic, no running tests, if server 0, no running servers
      ;;
      ;; (let ((wait-on-running (configf:lookup *configdat* "server" "wait-on-running"))) ;; wait on running tasks (if not true then exit on time out)
      ;;
      (if (and *server-run*

	       (> (+ last-access server-timeout)
		  (current-seconds)))





	  (begin
	    (debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
	    ;;
	    ;; Consider implementing some smarts here to re-insert the record or kill self is
	    ;; the db indicates so
	    ;;
	    ;; (if (tasks:server-am-i-the-server? tdb run-id)

Modified newdashboard.scm from [24924c0cda] to [c632e597af].

76
77
78
79
80
81
82




83
84
85
86
87
88
89

;; (if (args:get-arg "-host")
;;     (begin
;;       (set! *runremote* (string-split (args:get-arg "-host" ":")))
;;       (client:launch))
;;     (client:launch))






(define *dbdir* (conc (configf:lookup *configdat* "setup" "linktree") "/.db"))
(define *dbstruct-local*  (make-dbr:dbstruct path:  *dbdir*
					     local: #t))
(define *db-file-path* (db:dbfile-path 0))

;; HACK ALERT: this is a hack, please fix.







>
>
>
>







76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93

;; (if (args:get-arg "-host")
;;     (begin
;;       (set! *runremote* (string-split (args:get-arg "-host" ":")))
;;       (client:launch))
;;     (client:launch))

;; ease debugging by loading ~/.dashboardrc
(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.dashboardrc")))
  (if (file-exists? debugcontrolf)
      (load debugcontrolf)))

(define *dbdir* (conc (configf:lookup *configdat* "setup" "linktree") "/.db"))
(define *dbstruct-local*  (make-dbr:dbstruct path:  *dbdir*
					     local: #t))
(define *db-file-path* (db:dbfile-path 0))

;; HACK ALERT: this is a hack, please fix.

Added nmsg-transport.scm version [b2990ea4bd].













































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326

;; Copyright 2006-2012, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

(require-extension (srfi 18) extras tcp s11n)

(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
(import (prefix sqlite3 sqlite3:))

(use nanomsg)

(declare (unit nmsg-transport))

(declare (uses common))
(declare (uses db))
(declare (uses tests))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
(declare (uses server))

(include "common_records.scm")
(include "db_records.scm")

;; Transition to pub --> sub with pull <-- push
;;
;;   1. client sends request to server via push to the pull port
;;   2. server puts request in queue or processes immediately as appropriate
;;   3. server puts responses from completed requests into pub port 
;;
;; TODO
;;
;; Done Tested
;; [x]  [ ]    1. Add columns pullport pubport to servers table
;; [x]  [ ]    2. Add rm of monitor.db if older than 11/12/2012 
;; [x]  [ ]    3. Add create of pullport and pubport with finding of available ports
;; [x]  [ ]    4. Add client compose of request
;; [x]  [ ]        - name of client: testname/itempath-test_id-hostname 
;; [x]  [ ]        - name of request: callname, params
;; [x]  [ ]        - request key: f(clientname, callname, params)
;; [x]  [ ]    5. Add processing of subscription hits
;; [x]  [ ]        - done when get key 
;; [x]  [ ]        - return results
;; [x]  [ ]    6. Add timeout processing
;; [x]  [ ]        - after 60 seconds
;; [ ]  [ ]            i. check server alive, connect to new if necessary
;; [ ]  [ ]           ii. resend request
;; [ ]  [ ]    7. Turn self ping back on

(define (nmsg-transport:make-server-url hostport #!key (bindall #f))
  (if (not hostport)
      #f
      (conc "tcp://" (if bindall "*" (car hostport)) ":" (cadr hostport))))

(define *server-loop-heart-beat* (current-seconds))
(define *heartbeat-mutex* (make-mutex))

;;======================================================================
;; S E R V E R
;;======================================================================

(define (nmsg-transport:run dbstruct hostn run-id server-id)
  (debug:print 2 "Attempting to start the server ...")
  (let* ((start-port      (portlogger:open-run-close portlogger:find-port))
	 (server-thread   (make-thread (lambda ()
					 (nmsg-transport:try-start-server dbstruct run-id start-port server-id))
				       "server thread"))
	 (tdbdat          (tasks:open-db)))
    (thread-start! server-thread)
    (if (nmsg-transport:ping hostn start-port timeout: 2 expected-key: (current-process-id))
	(let ((interface (if (equal? hostn "-")(get-host-name) hostn)))
	  (tasks:server-set-interface-port (db:delay-if-busy tdbdat) server-id interface start-port)
	  (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep")
	  (set! *server-info* (list hostn start-port)) ;; probably not needed anymore? currently used by keep-running
	  (thread-sleep! 3) ;; give some margin for queries to complete before switching from file based access to server based access
	  (set! *inmemdb*  dbstruct)
	  (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running")
	  (thread-start! (make-thread
			  (lambda ()(nmsg-transport:keep-running server-id))
			  "keep running"))
	  (thread-join! server-thread))
	(begin
	  (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id "failed to start, never received server alive signature")
	  (portlogger:open-run-close portlogger:set-failed start-port)
	  (nmsg-transport:run dbstruct hostn run-id server-id)))))

(define (nmsg-transport:try-start-server dbstruct run-id portnum server-id)
  (let ((repsoc (nn-socket 'rep)))
    (nn-bind repsoc (conc "tcp://*:" portnum))
    (let loop ((msg-in (nn-recv repsoc)))
      (cond
       ((equal? msg-in "quit")
	(nn-send repsoc "Ok, quitting"))
       ((and (>= (string-length msg-in) 4)
	     (equal? (substring msg-in 0 4) "ping"))
	(nn-send repsoc (conc (current-process-id)))
	(loop (nn-recv repsoc)))
       (else
	(let* ((dat    (db:string->obj msg-in transport: 'nmsg))
	       (cmd    (vector-ref dat 0))
	       (params (vector-ref dat 1))
	       (result (api:execute-requests dbstruct cmd params))
	       (newdat (db:obj->string result transport: 'nmsg)))
	  (nn-send repsoc newdat)
	  (loop (nn-recv repsoc))))))))


;; all routes though here end in exit ...
;;
(define (nmsg-transport:launch run-id)
  (let* ((tdbdat   (tasks:open-db))
	 (dbstruct (db:setup run-id))
	 (hostn    (or (args:get-arg "-server") "-")))
    (set! *run-id*   run-id)
    ;; with nbfake daemonize isn't really needed
    ;;
    ;; (if (args:get-arg "-daemonize")
    ;;     (begin
    ;;       (daemon:ize)
    ;;       (if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it
    ;;           (begin
    ;;     	(current-error-port *alt-log-file*)
    ;;     	(current-output-port *alt-log-file*)))))
    (if (server:check-if-running run-id)
	(begin
	  (debug:print-info 0 "Server for run-id " run-id " already running")
	  (exit 0)))
    (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id))
	       (remtries  4))
      (if (not server-id)
	  (if (> remtries 0)
	      (begin
		(thread-sleep! 2)
		(if (not (server:check-if-running run-id))
		    (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id)
			  (- remtries 1))
		    (begin
		      (debug:print-info 0 "Another server took the slot, exiting")
		      (exit 0))))
	      (begin
		;; since we didn't get the server lock we are going to clean up and bail out
		(debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue")
		(tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat) " http-transport:launch")
		))
	  ;; locked in a server id, try to start up
	  (nmsg-transport:run dbstruct hostn run-id server-id))
      (set! *didsomething* #t)
      (exit))))

;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================

(define (nmsg-transport:mk-signature)
  (message-digest-string (md5-primitive) 
			 (with-output-to-string
			   (lambda ()
			     (write (list (current-directory)
					  (argv)))))))

;;======================================================================
;; C L I E N T S
;;======================================================================

;; ping the server at host:port
;;   return the open socket if successful (return-socket == #t)
;;   expect the key expected-key returned in payload
;;   send our-key or #f as payload
;;
(define (nmsg-transport:ping hostn port #!key (timeout 3)(return-socket #t)(expected-key #f)(our-key #f)(socket #f))
  ;; send a random number along with pid and check that we get it back
  (let* ((req     (or socket (nn-socket 'req)))
	 (host    (if (or (not hostn)
			  (equal? hostn "-")) ;; use localhost
		      (get-host-name)
		      hostn))
	 (success #f)
	 (keepwaiting #t)
	 (dat     (db:obj->string (vector "ping" our-key) transport: 'nmsg))
	 (ping    (make-thread
		   (lambda ()
		     (nn-send req dat)
		     (let* ((result  (nn-recv req))
			    (key     (vector-ref (db:string->obj result transport: 'nmsg) 1)))
		       (if (or (not expected-key) ;; just getting a reply is good enough then
			       (equal? key expected-key)) 
			   (begin
			     ;; (print "ping, success: received \"" result "\"")
			     (set! success #t))
			   (begin
			     ;; (print "ping, failed: received key \"" result "\"")
			     (set! keepwaiting #f)
			     (set! success #f)))))
		   "ping"))
	 (timeout (make-thread (lambda ()
				 (let loop ((count 0))
				   (thread-sleep! 1)
				   (print "still waiting after count seconds...")
				   (if (and keepwaiting (< count timeout)) ;; yes, this is very aproximate
				       (loop (+ count 1))))
				 (if keepwaiting
				     (begin
				       (print "timeout waiting for ping")
				       (thread-terminate! ping))))
			       "timeout")))
    (if (not socket)(nn-connect req (conc "tcp://" host ":" port)))
    (handle-exceptions
     exn
     (begin
       ;; (print-call-chain)
       ;; (print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
       ;; (print "exn=" (condition->list exn))
       (debug:print-info 1 "ping failed to connect to " host ":" port))
     (thread-start! timeout)
     (thread-start! ping)
     (thread-join! ping)
     (if success (thread-terminate! timeout)))
    (if return-socket
	(if success req #f)
	(begin
	  (nn-close req) ;; should it be closed if we were handed a socket?
	  success))))

;; run nmsg-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (nmsg-transport:keep-running server-id)
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (let* ((server-info (let loop ()
                        (let ((sdat #f))
                          (mutex-lock! *heartbeat-mutex*)
                          (set! sdat *server-info*)
                          (mutex-unlock! *heartbeat-mutex*)
                          (if sdat 
			      (begin
				(debug:print-info 0 "keep-running got sdat=" sdat)
				sdat)
                              (begin
                                (thread-sleep! 0.5)
                                (loop))))))
         (iface       (car server-info))
         (port        (cadr server-info))
         (last-access 0)
	 (tdbdat      (tasks:open-db))
	 (server-timeout (let ((tmo (configf:lookup  *configdat* "server" "timeout")))
			   (if (and (string? tmo)
				    (string->number tmo))
			       (* 60 60 (string->number tmo))
			       ;; (* 3 24 60 60) ;; default to three days
			       (* 60 1)         ;; default to one minute
			       ;; (* 60 60 25)      ;; default to 25 hours
			       ))))
    (print "Keep-running got server pid " server-id ", using iface " iface " and port " port)
    (let loop ((count 0))
      (thread-sleep! 4) ;; no need to do this very often
      ;; NB// sync currently does NOT return queue-length
      (let () ;; (queue-len (cdb:client-call server-info 'sync #t 1)))
      ;; (print "Server running, count is " count)
        (if (< count 1) ;; 3x3 = 9 secs aprox
            (loop (+ count 1)))
        
        ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access
        (mutex-lock! *heartbeat-mutex*)
        (set! last-access *last-db-access*)
        (mutex-unlock! *heartbeat-mutex*)
        (if (and *server-run*
	       (> (+ last-access server-timeout)
		  (current-seconds)))
            (begin
              (debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
              (loop 0))
            (begin
              (debug:print-info 0 "Starting to shutdown the server.")
              (set! *time-to-exit* #t)
              (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running")
              (debug:print-info 0 "Server shutdown complete. Exiting")
              (exit)
	      ))))))

;;======================================================================
;; C L I E N T S
;;======================================================================

(define (nmsg-transport:client-connect iface portnum)
  (let* ((reqsoc      (nmsg-transport:ping iface portnum return-socket: #t)))
    (vector iface portnum #f #f #f (current-seconds) reqsoc)))

(define (nmsg-transport:client-api-send-receive run-id connection-info cmd param)
  (mutex-lock! *http-mutex*)
  (let ((packet  (vector cmd param))
	(reqsoc  (http-transport:server-dat-get-socket connection-info)))
    (nn-send reqsoc (db:obj->string packet transport: 'nmsg))
    (let ((res (db:string->obj (nn-recv reqsoc) transport: 'nmsg)))
      (mutex-unlock! *http-mutex*)
      res)))

;;======================================================================
;; J U N K 
;;======================================================================

;; DO NOT USE
;;
(define (nmsg-transport:client-signal-handler signum)
  (handle-exceptions
   exn
   (debug:print " ... exiting ...")
   (let ((th1 (make-thread (lambda ()
			     (if (not *received-response*)
				 (receive-message* *runremote*))) ;; flush out last call if applicable
			   "eat response"))
	 (th2 (make-thread (lambda ()
			     (debug:print 0 "ERROR: Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.")
			     (thread-sleep! 3) ;; give the flush three seconds to do it's stuff
			     (debug:print 0 "       Done.")
			     (exit 4))
			   "exit on ^C timer")))
     (thread-start! th2)
     (thread-start! th1)
     (thread-join! th2))))

Modified rmt.scm from [4e95475281] to [2b271743b3].

11
12
13
14
15
16
17

18
19
20
21
22
23
24

(use json format)

(declare (unit rmt))
(declare (uses api))
(declare (uses tdb))
(declare (uses http-transport))


;;
;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
;;

;; ;; For debugging add the following to ~/.megatestrc
;;







>







11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

(use json format)

(declare (unit rmt))
(declare (uses api))
(declare (uses tdb))
(declare (uses http-transport))
(declare (uses nmsg-transport))

;;
;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
;;

;; ;; For debugging add the following to ~/.megatestrc
;;
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

82
83
84
85
86
87

88
89

90


91
92
93
94

95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
	cinfo
	;; NB// can cache the answer for server running for 10 seconds ...
	;;  ;; (and (not (rmt:write-frequency-over-limit? cmd run-id))
	(if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)
	    (client:setup run-id)
	    #f))))

;; cmd is a symbol
;; vars is a json string encoding the parameters for the call
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 0))
  ;; clean out old connections
  (mutex-lock! *db-multi-sync-mutex*)
  (let ((expire-time (- (current-seconds) 60)))
    (for-each 
     (lambda (run-id)
       (let ((connection (hash-table-ref/default *runremote* run-id #f)))
	 (if (and connection 
		  (< (http-transport:server-dat-get-last-access connection) expire-time))
	     (begin
	       (debug:print-info 0 "Discarding connection to server for run-id " run-id ", too long between accesses")

	       (hash-table-delete! *runremote* run-id)))))
     (hash-table-keys *runremote*)))
  (mutex-unlock! *db-multi-sync-mutex*)
  (let* ((run-id          (if rid rid 0))
	 (connection-info (rmt:get-connection-info run-id))
	 (jparams         (db:obj->string params)))

    (if connection-info
	;; use the server if have connection info

	(let* ((dat     (http-transport:client-api-send-receive run-id connection-info cmd jparams))


	       (res     (if (and dat (vector? dat)) (vector-ref dat 1) #f))
	       (success (if (and dat (vector? dat)) (vector-ref dat 0) #f)))
	  (http-transport:server-dat-update-last-access connection-info)
	  (if success

	      (db:string->obj res)
	      ;; (if (< attemptnum 100)
	      ;;     (begin
	      ;;       (hash-table-delete! *runremote* run-id)
	      ;;       (thread-sleep! 0.5)
	      ;;       (rmt:send-receive cmd rid params attempnum: (+ attemptnum 1)))
	      ;;     (begin
	      ;;       (print-call-chain (current-error-port))
	      ;;       (debug:print 0 "ERROR: too many attempts to communicate have failed. Giving up. Kill your mtest processes and start over")
	      ;;       (exit 1)))))
	      (begin ;; let ((new-connection-info (client:setup run-id)))
		(debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.")
		(hash-table-delete! *runremote* run-id) ;; don't keep using the same connection

		;; no longer killing the server in http-transport:client-api-send-receive
		;; may kill it here but what are the criteria?
		;; start with three calls then kill server







<
<
<











>






>


>
|
>
>




>
|
<
<
<
|
<
<
<
<
<







62
63
64
65
66
67
68



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99



100





101
102
103
104
105
106
107
	cinfo
	;; NB// can cache the answer for server running for 10 seconds ...
	;;  ;; (and (not (rmt:write-frequency-over-limit? cmd run-id))
	(if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)
	    (client:setup run-id)
	    #f))))




(define (rmt:send-receive cmd rid params #!key (attemptnum 0))
  ;; clean out old connections
  (mutex-lock! *db-multi-sync-mutex*)
  (let ((expire-time (- (current-seconds) 60)))
    (for-each 
     (lambda (run-id)
       (let ((connection (hash-table-ref/default *runremote* run-id #f)))
	 (if (and connection 
		  (< (http-transport:server-dat-get-last-access connection) expire-time))
	     (begin
	       (debug:print-info 0 "Discarding connection to server for run-id " run-id ", too long between accesses")
	       ;; SHOULD CLOSE THE CONNECTION HERE
	       (hash-table-delete! *runremote* run-id)))))
     (hash-table-keys *runremote*)))
  (mutex-unlock! *db-multi-sync-mutex*)
  (let* ((run-id          (if rid rid 0))
	 (connection-info (rmt:get-connection-info run-id))
	 (jparams         (db:obj->string params)))
    ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also)
    (if connection-info
	;; use the server if have connection info
	(let* ((dat     (case *transport-type*
			  ((http)(http-transport:client-api-send-receive run-id connection-info cmd jparams))
			  ((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info cmd params))
			  (else  (exit))))
	       (res     (if (and dat (vector? dat)) (vector-ref dat 1) #f))
	       (success (if (and dat (vector? dat)) (vector-ref dat 0) #f)))
	  (http-transport:server-dat-update-last-access connection-info)
	  (if success
	      (case *transport-type* 
		((http)(db:string->obj res))



		((nmsg) res))





	      (begin ;; let ((new-connection-info (client:setup run-id)))
		(debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.")
		(hash-table-delete! *runremote* run-id) ;; don't keep using the same connection

		;; no longer killing the server in http-transport:client-api-send-receive
		;; may kill it here but what are the criteria?
		;; start with three calls then kill server
182
183
184
185
186
187
188
189

190
191
192
193
194
195
196
			     (let* ((dbdir (conc    (configf:lookup *configdat* "setup" "linktree") "/.db"))
				    (db (make-dbr:dbstruct path:  dbdir local: #t)))
			       (set! *dbstruct-db* db)
			       db)))
	 (db-file-path   (db:dbfile-path 0)))
    ;; (read-only      (not (file-read-access? db-file-path)))
    (let* ((start         (current-milliseconds))
	   (res           (api:execute-requests dbstruct-local (symbol->string cmd) params))

	   (duration      (- (current-milliseconds) start)))
      (rmt:update-db-stats run-id cmd params duration)
      ;; mark this run as dirty if this was a write
      (if (not (member cmd api:read-only-queries))
	  (let ((start-time (current-seconds)))
	    (mutex-lock! *db-multi-sync-mutex*)
	    ;; (if (not (hash-table-ref/default *db-local-sync* run-id #f))







|
>







178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
			     (let* ((dbdir (conc    (configf:lookup *configdat* "setup" "linktree") "/.db"))
				    (db (make-dbr:dbstruct path:  dbdir local: #t)))
			       (set! *dbstruct-db* db)
			       db)))
	 (db-file-path   (db:dbfile-path 0)))
    ;; (read-only      (not (file-read-access? db-file-path)))
    (let* ((start         (current-milliseconds))
	   (resdat        (api:execute-requests dbstruct-local (symbol->string cmd) params))
	   (res           (vector-ref resdat 1))
	   (duration      (- (current-milliseconds) start)))
      (rmt:update-db-stats run-id cmd params duration)
      ;; mark this run as dirty if this was a write
      (if (not (member cmd api:read-only-queries))
	  (let ((start-time (current-seconds)))
	    (mutex-lock! *db-multi-sync-mutex*)
	    ;; (if (not (hash-table-ref/default *db-local-sync* run-id #f))

Modified runs.scm from [396462afab] to [f2976f1213].

942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
	     (newtal      (append tal (list hed)))
	     (regfull     (>= (length reg) reglen))
	     (num-running (rmt:get-count-tests-running-for-run-id run-id)))

	;; every couple minutes verify the server is there for this run
	(if (and (common:low-noise-print 60 "try start server"  run-id)
		 (tasks:need-server run-id))
	    (tasks:start-and-wait-for-server tdbdat run-id 10))
	
	(if (> num-running 0)
	  (set! last-time-some-running (current-seconds)))

      (if (> (current-seconds)(+ last-time-some-running 240))
	  (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 "max-tries-hash: " (hash-table->alist *max-tries-hash*))







|







942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
	     (newtal      (append tal (list hed)))
	     (regfull     (>= (length reg) reglen))
	     (num-running (rmt:get-count-tests-running-for-run-id run-id)))

	;; every couple minutes verify the server is there for this run
	(if (and (common:low-noise-print 60 "try start server"  run-id)
		 (tasks:need-server run-id))
	    (tasks:start-and-wait-for-server tdbdat run-id 10)) ;; NOTE: delay and wait is done under the hood
	
	(if (> num-running 0)
	  (set! last-time-some-running (current-seconds)))

      (if (> (current-seconds)(+ last-time-some-running 240))
	  (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 "max-tries-hash: " (hash-table->alist *max-tries-hash*))

Modified server.scm from [f2b9d5f3d9] to [3a939720aa].

18
19
20
21
22
23
24

25
26
27
28
29
30
31
(declare (unit server))

(declare (uses common))
(declare (uses db))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
(declare (uses synchash))
(declare (uses http-transport))

(declare (uses launch))
;; (declare (uses zmq-transport))
(declare (uses daemon))

(include "common_records.scm")
(include "db_records.scm")








>







18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
(declare (unit server))

(declare (uses common))
(declare (uses db))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
(declare (uses synchash))
(declare (uses http-transport))
(declare (uses nmsg-transport))
(declare (uses launch))
;; (declare (uses zmq-transport))
(declare (uses daemon))

(include "common_records.scm")
(include "db_records.scm")

45
46
47
48
49
50
51

52


53
54
55
56
57
58
59
;;

;; all routes though here end in exit ...
;;
;; start_server
;;
(define (server:launch run-id)

  (http-transport:launch run-id))



;;======================================================================
;; Q U E U E   M A N A G E M E N T
;;======================================================================

;; We don't want to flush the queue if it was just flushed
(define *server:last-write-flush* (current-milliseconds))







>
|
>
>







46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
;;

;; all routes though here end in exit ...
;;
;; start_server
;;
(define (server:launch run-id)
  (case *transport-type*
    ((http)(http-transport:launch run-id))
    ((nmsg)(nmsg-transport:launch run-id))
    (else (debug:print 0 "ERROR: unknown server type " *transport-type*))))

;;======================================================================
;; Q U E U E   M A N A G E M E N T
;;======================================================================

;; We don't want to flush the queue if it was just flushed
(define *server:last-write-flush* (current-milliseconds))
134
135
136
137
138
139
140

141
142
143



144
145
146
147
148
149
150
	       (trycount 0))
    (if server
	;; note: client:start will set *runremote*. this needs to be changed
	;;       also, client:start will login to the server, also need to change that.
	;;
	;; client:start returns #t if login was successful.
	;;

	(let ((res (server:ping-server run-id 
				       (tasks:hostinfo-get-interface server)
				       (tasks:hostinfo-get-port      server))))



	  ;; if the server didn't respond we must remove the record
	  (if res
	      #t
	      (begin
		(debug:print-info 0 "server at " server " not responding, removing record")
		(tasks:server-force-clean-running-records-for-run-id (db:delay-if-busy tdbdat) run-id 
				" server:check-if-running")







>
|
|
|
>
>
>







138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
	       (trycount 0))
    (if server
	;; note: client:start will set *runremote*. this needs to be changed
	;;       also, client:start will login to the server, also need to change that.
	;;
	;; client:start returns #t if login was successful.
	;;
	(let ((res (case *transport-type*
		     ((http)(server:ping-server run-id 
						(tasks:hostinfo-get-interface server)
						(tasks:hostinfo-get-port      server)))
		     ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
						 (tasks:hostinfo-get-port      server)
						 timeout: 2)))))
	  ;; if the server didn't respond we must remove the record
	  (if res
	      #t
	      (begin
		(debug:print-info 0 "server at " server " not responding, removing record")
		(tasks:server-force-clean-running-records-for-run-id (db:delay-if-busy tdbdat) run-id 
				" server:check-if-running")

Modified tasks.scm from [af4bc3dbb1] to [42971aa928].

183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
   (get-host-name)            ;; hostname
   -1                         ;; port
   -1                         ;; pubport
   (random 1000)              ;; priority (used a tiebreaker on get-available)
   "available"                ;; state
   (common:version-signature) ;; mt_version
   -1                         ;; interface
   "http"                     ;; transport
   run-id
   ))

(define (tasks:num-in-available-state mdb run-id)
  (let ((res 0))
    (sqlite3:for-each-row
     (lambda (num-in-queue)







|







183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
   (get-host-name)            ;; hostname
   -1                         ;; port
   -1                         ;; pubport
   (random 1000)              ;; priority (used a tiebreaker on get-available)
   "available"                ;; state
   (common:version-signature) ;; mt_version
   -1                         ;; interface
   (conc *transport-type*)    ;; transport
   run-id
   ))

(define (tasks:num-in-available-state mdb run-id)
  (let ((res 0))
    (sqlite3:for-each-row
     (lambda (num-in-queue)

Added testnanomsg/basic-req-rep.scm version [1436c827c9].







>
>
>
1
2
3
(use nanomsg srfi-18 sqlite3 numbers)

(define resp (nn-socket 'rep))

Added testnanomsg/mockupclient.scm version [63a8c6685a].





















































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
(use zmq posix numbers)

(define cname "Bob")
(define runtime 10)
(let ((args (argv)))
  (if (< (length args) 3)
      (begin
	(print "Usage: mockupclient clientname runtime")
	(exit))
      (begin
	(set! cname (cadr args))
	(set! runtime (string->number (caddr args))))))
      
;; (define start-delay (/ (random 100) 9))
;; (define runtime     (+ 1 (/ (random 200) 2)))

(print "Starting client " cname " with runtime " runtime)

(include "mockupclientlib.scm")

(set! endtime (+ (current-seconds) runtime))

;; first ping the server to ensure we have a connection
(if (server-ping cname 5)
    (print "SUCCESS: Client " cname " connected to server")
    (begin
      (print "ERROR: Client " cname " failed ping of server, exiting")
      (exit)))

(let loop ()
  (let ((x (random 15))
	(varname (list-ref (list "hello" "goodbye" "saluton" "kiaorana")(random 4))))
    (case x
      ;; ((1)(dbaccess cname 'sync "nodat"    #f))
      ((2 3 4 5)(dbaccess cname 'set varname (random 999)))
      ((6 7 8 9 10)(print cname ": Get \"" varname "\" " (dbaccess cname 'get varname #f)))
      (else
       (thread-sleep! 0.011)))
    (if (< (current-seconds) endtime)
	(loop))))

(print "Client " cname " all done!!")

Added testnanomsg/mockupclientlib.scm version [3b245ba7a9].





















































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
(define reqs (nn-socket 'req))

(connect-socket reqs "tcp://localhost:6563")

(thread-sleep! 0.2)

(define (server-ping cname timeout)
  (let ((msg     (conc cname ":ping:" timeout))
	(maxtime (+ (current-seconds) timeout)))
    (print "pinging server from " cname " with timeout " timeout)
    (let loop ((res   #f))
      (if (< maxtime (current-seconds))
	  #f ;; failed to ping
	  (if (equal? res "Got ping")
	      #t
	      (begin
		(print "Ping received from server " res)
		(send-message push msg)
		(thread-sleep! 0.1)
		(loop (receive-message sub non-blocking: #t))))))))
  
(define (dbaccess cname cmd var val #!key (numtries 20))
  (let* ((msg (conc cname ":" cmd ":" (if val (conc var " " val) var)))
	 (res #f)
	 (mtx1 (make-mutex))
	 (do-access (lambda ()
		      (let ((tmpres #f))
			(print "Sending msg: " msg)
			(send-message push msg)
			(print "Message " msg " sent")
			(print "Client " cname " waiting for response to " msg)
			(print "Client " cname " received address " (receive-message* sub))
			(set! tmpres (receive-message* sub))
			(mutex-lock! mtx1)
			(set! res tmpres)
			(mutex-unlock! mtx1))))
	 (th1 (make-thread do-access "do access"))
	 (th2 (make-thread (lambda ()
			     (let ((result #f))
			       (mutex-lock! mtx1)
			       (set! result res)
			       (mutex-unlock! mtx1)
			       (thread-sleep! 5)
			       (if (not result)
				   (if (> numtries 0)
				       (begin
					 (print "WARNING: access timed out for " cname ", trying again. Trys remaining=" numtries)
					 (dbaccess cname cmd var val numtries: (- numtries 1)))
				       (begin
					 (print "ERROR: dbaccess timed out. Exiting")
					 (exit)))))
			     "timeout thread"))))
    (thread-start! th1)
    (thread-start! th2)
    (thread-join! th1)
    (if res (print "SUCCESS: received " res " with " numtries " remaining possible attempts"))
    res))

Added testnanomsg/mockupserver.scm version [a4d3e5594c].





































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
;; pub/sub with envelope address
;; Note that if you don't insert a sleep, the server will crash with SIGPIPE as soon
;; as a client disconnects.  Also a remaining client may receive tons of
;; messages afterward.

(use nanomsg srfi-18 sqlite3 numbers)

(define resp (nn-socket 'rep))
(define cname "server")
(define total-db-accesses 0)
(define start-time (current-seconds))

(nn-bind resp  "tcp://*:6563")

(thread-sleep! 0.2)

(define (open-db)
  (let* ((dbpath    "mockup.db")
	 (dbexists  (file-exists? dbpath))
	 (db        (open-database dbpath)) ;; (never-give-up-open-db dbpath))
	 (handler   (make-busy-timeout 10)))
    (set-busy-handler! db handler)
    (if (not dbexists)
	(for-each
	 (lambda (stmt)
	   (execute db stmt))
	 (list
	  "PRAGMA SYNCHRONOUS=0;"
	  "CREATE TABLE clients (id INTEGER PRIMARY KEY,name TEXT,num_accesses INTEGER DEFAULT 0);"
	  "CREATE TABLE vars    (var TEXT,val TEXT,CONSTRAINT vars_constraint UNIQUE (var));")))
    db))

(define cid-cache (make-hash-table))

(define (get-client-id db cname)
  (let ((cid (hash-table-ref/default cid-cache cname #f)))
    (if cid 
	cid
	(begin
	  (execute db "INSERT OR REPLACE INTO clients (name) VALUES(?);" cname)
	  (for-each-row 
	   (lambda (id)
	     (set! cid id))
	   db
	   "SELECT id FROM clients WHERE name=?;" cname)
	  (hash-table-set! cid-cache cname cid)
	  (set! total-db-accesses (+ total-db-accesses 2))
	  cid))))

(define (count-client db cname)
  (let ((cid (get-client-id db cname)))
    (execute db "UPDATE clients SET num_accesses=num_accesses+1 WHERE id=?;" cid)
    (set! total-db-accesses (+ total-db-accesses 1))
    ))

(define db (open-db))
;; (define queuelst '())
;; (define mx1 (make-mutex))

(define max-queue-len 0)

(define (process-queue queuelst)
  (let ((queuelen (length queuelst)))
    (if (> queuelen max-queue-len)
	(set! max-queue-len queuelen))
    (for-each
     (lambda (item)
       (let ((cname (vector-ref item 1))
	     (clcmd (vector-ref item 2))
	     (cdata (vector-ref item 3)))
	 (send-message pub cname send-more: #t)
	 (send-message pub (case clcmd
			     ((sync)
			      (conc queuelen))
			     ((set)
			      (set! total-db-accesses (+ total-db-accesses 1))
			      (apply execute db "INSERT OR REPLACE INTO vars (var,val) VALUES (?,?);" (string-split cdata))
			      "ok")
			     ((get)
			      (set! total-db-accesses (+ total-db-accesses 1))
			      (let ((res "noval"))
				(for-each-row
				 (lambda (val)
				   (set! res val))
				 db 
				 "SELECT val FROM vars WHERE var=?;" cdata)
				res))
			     (else (conc "unk cmd: " clcmd))))))
     queuelst)))

;; SERVER THREAD
(define th1 (make-thread 
	     (lambda ()
	       (let ((last-run 0)) ;; current-seconds when run last
		 (let loop ((queuelst '()))
		   (let* ((indat (receive-message* pull))
			  (parts (string-split indat ":"))
			  (cname (car parts))                   ;; client name
			  (clcmd (string->symbol (cadr parts))) ;; client cmd
			  (cdata (caddr parts))                 ;; client data
			  (svect (vector (current-seconds) cname clcmd cdata))) ;; record for the queue
		     ;; (print "Server received message: " indat)
		     (count-client db cname)
		     (case clcmd
		       ((ping)
			(print "Got ping from " cname)
			(send-message pub cname send-more: #t)
			(send-message pub "Got ping")
			(loop queuelst))
		       ((sync) ;; just process the queue
			(print "Got sync from " cname)
			(process-queue (cons svect queuelst))
			(loop '()))
		       ((get)
			(process-queue (cons svect queuelst))
			(loop '()))
		       (else
			(loop (cons svect queuelst))))))))
	     "server thread"))

(include "mockupclientlib.scm")

;; SYNC THREAD
;; send a sync to the pull port
(define th2 (make-thread
	     (lambda ()
	       (let ((last-action-time (current-seconds)))
		 (let loop ()
		   (thread-sleep! 5)
		   (let ((queuelen (string->number (dbaccess "server" 'sync "nada" #f)))
			 (last-action-delta #f))
		     (if (> queuelen 1)(set! last-action-time (current-seconds)))
		     (set! last-action-delta (- (current-seconds) last-action-time))
		     (print "Server: Got queuelen=" queuelen ", last-action-delta=" last-action-delta)
		     (if (< last-action-delta 60)
			 (loop)
			 (print "Server exiting, 25 seconds since last access"))))))
	     "sync thread"))

(thread-start! th1)
(thread-start! th2)
(thread-join! th2)

(let* ((run-time       (- (current-seconds) start-time))
       (queries/second (/  total-db-accesses run-time)))
  (print "Server exited! Total db accesses=" total-db-accesses " in " run-time " seconds for " queries/second " queries/second with max queue length of: " max-queue-len))

Added testnanomsg/pipeline.scm version [1d4d831eb6].



















































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
;; watch nanomsg's pipeline load-balancer in action.
(use nanomsg)

(define push (nn-socket 'push))
(define pull1 (nn-socket 'pull))
(define pull2 (nn-socket 'pull))

(nn-bind    push  "inproc://test")
(nn-connect pull1 "inproc://test")
(nn-connect pull2 "inproc://test")

(nn-send push "a")
(nn-send push "b")
(nn-send push "c")
(nn-send push "d")

(define ((th sock))
  (print (current-thread) ": " (nn-recv sock))
  (print (current-thread) ": " (nn-recv sock))
  (print (current-thread) " is done"))

(thread-start! (th pull1))
(thread-start! (th pull2))

(thread-sleep! 1)

Added testnanomsg/req-rep-client.scm version [7998d54555].





























































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
;; watch nanomsg's pipeline load-balancer in action.
(use nanomsg)

(define req   (nn-socket 'req))

(nn-connect req  "tcp://localhost:22022")

;; (with-output-to-string (lambda ()(serialize obj)))
(define (client-send-receive soc msg)
  (nn-send soc msg)
  (nn-recv soc))

(define ((talk-to-server soc))
  (let loop ((cnt 20))
    (let ((name (list-ref '("Matt" "Tom" "Bob" "Jill" "James" "Jane")(random 6))))
      (print "Sending " name)
      (print (client-send-receive req name))
      (if (> cnt 0)(loop (- cnt 1)))))
  (print (client-send-receive req "quit"))
  (nn-close req)
  (exit))

;; (thread-start! (lambda ()
;; 		 (thread-sleep! 20)
;; 		 (print "Give up on waiting for the server")
;; 		 (nn-close req)
;; 		 (exit)))

(thread-join! (thread-start! (talk-to-server req)))

Added testnanomsg/req-rep-server.scm version [d9de6da037].





















































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
;; watch nanomsg's pipeline load-balancer in action.
(use nanomsg)

;; (use trace)
;; (trace nn-bind nn-socket nn-assert nn-recv nn-send thread-terminate! nn-close )

(define port  22022)
(define host  "127.0.0.1")

(define rep   (nn-socket 'rep))

(print "connecting, got: " (nn-bind    rep  (conc "tcp://" "*" ":" port)))

(define (server soc)
  (print "server starting")
  (let loop ((msg-in (nn-recv soc)))
    (print "server received: " msg-in)
    (cond
     ((equal? msg-in "quit")
      (nn-send soc "Ok, quitting"))
     ((and (>= (string-length msg-in) 4)
	   (equal? (substring msg-in 0 4) "ping"))
      (nn-send soc (conc (current-process-id)))
      (loop (nn-recv soc)))
     ;;((and (>= (string-length msg-in)
     (else
      (let ((this-task (random 15)))
	(thread-sleep! this-task)
	(nn-send soc (conc "hello " msg-in " this task took " this-task " seconds to complete"))
	(loop (nn-recv soc)))))))

(define (ping-self host port #!key (return-socket #t))
  ;; send a random number along with pid and check that we get it back
  (let* ((req     (nn-socket 'req))
	 (key     "ping")
	 (success #f)
	 (keepwaiting #t)
	 (ping    (make-thread
		   (lambda ()
		     (print "ping: sending string \"" key "\", expecting " (current-process-id))
		     (nn-send req key)
		     (let ((result  (nn-recv req)))
		       (if (equal? (conc (current-process-id)) result)
			   (begin
			     (print "ping, success: received \"" result "\"")
			     (set! success #t))
			   (begin
			     (print "ping, failed: received key \"" result "\"")
			     (set! keepwaiting #f)
			     (set! success #f)))))
		   "ping"))
	 (timeout (make-thread (lambda ()
				 (let loop ((count 0))
				   (thread-sleep! 1)
				   (print "still waiting after count seconds...")
				   (if (and keepwaiting (< count 10))
				       (loop (+ count 1))))
				 (if keepwaiting
				     (begin
				       (print "timeout waiting for ping")
				       (thread-terminate! ping))))
			       "timeout")))
    (nn-connect req (conc "tcp://" host ":" port))
    (handle-exceptions
     exn
     (begin
       (print-call-chain)
       (print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
       (print "exn=" (condition->list exn))
       (print "ping failed to connect to " host ":" port))
     (thread-start! timeout)
     (thread-start! ping)
     (thread-join! ping)
     (if success (thread-terminate! timeout)))
    (if return-socket
	(if success req #f)
	(begin
	  (nn-close req)
	  success))))

(let ((server-thread (make-thread (lambda ()(server rep)) "server")))
  (thread-start! server-thread)
  ;; (thread-sleep! 1)
  (if (ping-self host port)
      (begin
	(thread-join! server-thread)
	(nn-close rep))
      (print "ping failed")))

(exit)

Added testnanomsg/req-rep.scm version [b77ebf1421].





























































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
;; watch nanomsg's pipeline load-balancer in action.
(use nanomsg)

(define req   (nn-socket 'req))
(define rep   (nn-socket 'rep))

(nn-bind    rep  "inproc://test")
(nn-connect req  "inproc://test")

(define (client-send-receive soc msg)
  (nn-send soc msg)
  (nn-recv soc))

(define ((server soc))
  (let loop ((msg-in (nn-recv soc)))
    (if (not (equal? msg-in "quit"))
	(begin
	  (nn-send soc (conc "hello " msg-in))
	  (loop (nn-recv soc))))))

(thread-start! (server rep))

(print (client-send-receive req "Matt"))
(print (client-send-receive req "Tom"))

;; (client-send-receive req "quit")

(nn-close req)
(nn-close rep)
(exit)

Modified tests/Makefile from [502a984b43] to [608cf0bdb8].

32
33
34
35
36
37
38
39
40
41
42
43
44
45
46

stopserver :
	cd ..;make -j && make install
	cd fullrun;$(MEGATEST) -stop-server 0

repl :
	cd ..;make -j && make install
	cd fullrun;$(MEGATEST) -repl

test0 : cleanprep
	cd simplerun ; $(MEGATEST) -server - -debug $(DEBUG)

test1 : cleanprep

test2 : fullprep







|







32
33
34
35
36
37
38
39
40
41
42
43
44
45
46

stopserver :
	cd ..;make -j && make install
	cd fullrun;$(MEGATEST) -stop-server 0

repl :
	cd ..;make -j && make install
	cd fullrun;$(MEGATEST) -:b -repl

test0 : cleanprep
	cd simplerun ; $(MEGATEST) -server - -debug $(DEBUG)

test1 : cleanprep

test2 : fullprep
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# Some simple checks for bootstrapping and run loop logic 

test9 : minsetup test9a test9b test9c test9d test9e

test9a :
	@echo Run super-simple mintest e, no waitons.
	cd mintest;$(DASHBOARD)&
	cd mintest;$(MEGATEST) -preclean -runtests e -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG)

test9b :
	@echo Run simple mintest d with one waiton c
	cd mintest;$(MEGATEST) -preclean -runtests d -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG)

test9c :
	@echo Run mintest a with full waiton chain a -> b -> c -> d -> e
	cd mintest;$(MEGATEST) -preclean -runtests a -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG)

test9d :
	@echo Run an itemized test with no items
	cd mintest;$(MEGATEST) -preclean -runtests g -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG)

test9e :
	@echo Run mintest a1 with full waiton chain with d1fail: a1 -> b1 -> c1 -> d1fail -> e1
	cd mintest;$(MEGATEST) -preclean -runtests a1 -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG)

test10 :
	@echo Run a bunch of different targets simultaneously
	(cd fullrun;$(MEGATEST) -server - ;sleep 2)&
	for targ in mint/btrfs/mintdir sunos/sshfs/loc; do \
	   (cd fullrun;$(MEGATEST) -preclean -runtests priority_10_waiton_1 -target $$targ :runname $(RUNNAME) &); done
	for sys in ubuntu suse redhat debian;do \







|



|



|



|



|







114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# Some simple checks for bootstrapping and run loop logic 

test9 : minsetup test9a test9b test9c test9d test9e

test9a :
	@echo Run super-simple mintest e, no waitons.
	cd mintest;$(DASHBOARD)&
	cd mintest;$(MEGATEST) -preclean -runtests e -target $(VER) -runname $(shell date +%H.%M.%S) -debug $(DEBUG)

test9b :
	@echo Run simple mintest d with one waiton c
	cd mintest;$(MEGATEST) -preclean -runtests d -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG)

test9c :
	@echo Run mintest a with full waiton chain a -> b -> c -> d -> e
	cd mintest;$(MEGATEST) -preclean -runtests a -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG)

test9d :
	@echo Run an itemized test with no items
	cd mintest;$(MEGATEST) -preclean -runtests g -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG)

test9e :
	@echo Run mintest a1 with full waiton chain with d1fail: a1 -> b1 -> c1 -> d1fail -> e1
	cd mintest;$(MEGATEST) -preclean -runtests a1 -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG)

test10 :
	@echo Run a bunch of different targets simultaneously
	(cd fullrun;$(MEGATEST) -server - ;sleep 2)&
	for targ in mint/btrfs/mintdir sunos/sshfs/loc; do \
	   (cd fullrun;$(MEGATEST) -preclean -runtests priority_10_waiton_1 -target $$targ :runname $(RUNNAME) &); done
	for sys in ubuntu suse redhat debian;do \

Modified tests/fullrun/megatest.config from [a6f800861f] to [7518de8980].

134
135
136
137
138
139
140
141
142
143

144
145
146
147
148
149
150
151
152

# This server will keep running this number of hours after last access. 
# Three minutes is 0.05 hours
# timeout 0.025
timeout 0.1

# Server is required - slower but more resistant to Sqlite issues.
# required yes

# Start server when average query takes longer than this

server-query-threshold 100
# 55500

# daemonize yes
# hostname #{scheme (get-host-name)}

## disks are:
## name host:/path/to/area
## -or-







|


>
|
<







134
135
136
137
138
139
140
141
142
143
144
145

146
147
148
149
150
151
152

# This server will keep running this number of hours after last access. 
# Three minutes is 0.05 hours
# timeout 0.025
timeout 0.1

# Server is required - slower but more resistant to Sqlite issues.
required yes

# Start server when average query takes longer than this
# server-query-threshold 55500
server-query-threshold -1


# daemonize yes
# hostname #{scheme (get-host-name)}

## disks are:
## name host:/path/to/area
## -or-

Modified tests/mintest/megatest.config from [158955d103] to [74b434d2c6].

1
2
3
4
5
6
7
8
9
10
11
12
13
[fields]
X TEXT

[setup]
max_concurrent_jobs 50
linktree #{getenv PWD}/linktree
transport http

[server]
port 8090

[jobtools]
useshell yes





|







1
2
3
4
5
6
7
8
9
10
11
12
13
[fields]
X TEXT

[setup]
max_concurrent_jobs 50
linktree #{getenv MT_RUN_AREA_HOME}/linktree
transport http

[server]
port 8090

[jobtools]
useshell yes

Modified utils/Makefile.installall from [c3d10e5280] to [507fd637d5].

143
144
145
146
147
148
149
















150
151
152
153
154
155
156

$(PREFIX)/bin/sqlite3 : sqlite-autoconf-$(SQLITE3_VERSION)/config.log
	cd sqlite-autoconf-$(SQLITE3_VERSION);./configure --prefix=$(PREFIX);make;make install

$(PREFIX)/lib/sqlite3.so : $(PREFIX)/bin/sqlite3
	CSC_OPTIONS="-I$(PREFIX)/include -L$(PREFIX)/lib" $(CHICKEN_INSTALL) $(PROX) sqlite3

















#======================================================================
# M A T T S   U T I L S
#======================================================================

opensrc.fossil :
	fossil clone http://www.kiatoa.com/fossils/opensrc opensrc.fossil








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172

$(PREFIX)/bin/sqlite3 : sqlite-autoconf-$(SQLITE3_VERSION)/config.log
	cd sqlite-autoconf-$(SQLITE3_VERSION);./configure --prefix=$(PREFIX);make;make install

$(PREFIX)/lib/sqlite3.so : $(PREFIX)/bin/sqlite3
	CSC_OPTIONS="-I$(PREFIX)/include -L$(PREFIX)/lib" $(CHICKEN_INSTALL) $(PROX) sqlite3

#======================================================================
# N  A N O M S G
#======================================================================

nanomsg-0.5-beta.tar.gz :
	wget http://download.nanomsg.org/nanomsg-0.5-beta.tar.gz

nanomsg-0.5-beta/COPYING : nanomsg-0.5-beta.tar.gz
	tar xfvz nanomsg-0.5-beta.tar.gz

$(PREFIX)/bin/nanocat : nanomsg-0.5-beta/COPYING
	cd nanomsg-0.5-beta;./configure --prefix=$(PREFIX);make;make install

$(PREFIX)/lib/nanomsg.so : $(PREFIX)/bin/nanocat
	CSC_OPTIONS="-I$(PREFIX)/include -L$(PREFIX)/lib" $(CHICKEN_INSTALL) $(PROX) nanomsg

#======================================================================
# M A T T S   U T I L S
#======================================================================

opensrc.fossil :
	fossil clone http://www.kiatoa.com/fossils/opensrc opensrc.fossil

Modified utils/plot-code.scm from [de4d05b676] to [e7d92b1b39].

1



2
3
4
5
6
7
8
#!/mfs/pkgs/chicken/4.8.0.5/bin/csi -nbq




(use regex srfi-69 srfi-13)

(define targs #f) 
(define files (cddddr (argv)))

(let ((targdat (cadddr (argv))))

>
>
>







1
2
3
4
5
6
7
8
9
10
11
#!/mfs/pkgs/chicken/4.8.0.5/bin/csi -nbq

;; Usage: plot-code file1.scm,file2.scm *.scm > plot.dot
;;        dot -Tpdf plot.dot > plot.pdf

(use regex srfi-69 srfi-13)

(define targs #f) 
(define files (cddddr (argv)))

(let ((targdat (cadddr (argv))))

Added utils/trace/trace.import.scm version [937dcb55c1].

































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
;;;; trace.import.scm - GENERATED BY CHICKEN 4.9.0.1 -*- Scheme -*-

(eval '(import
         scheme
         chicken
         csi
         advice
         extras
         ports
         data-structures
         (except srfi-1 break)
         miscmacros))
(##sys#register-compiled-module
  'trace
  (list)
  '((breakpoint . trace#breakpoint)
    (trace . trace#trace)
    (untrace . trace#untrace)
    (break . trace#break)
    (unbreak . trace#unbreak)
    (trace-output-port . trace#trace-output-port)
    (continue . trace#continue)
    (c . trace#c)
    (traced? . trace#traced?)
    (trace-module . trace#trace-module)
    (untrace-module . trace#untrace-module)
    (trace-verbose . trace#trace-verbose)
    (trace/untrace . trace#trace/untrace))
  (list)
  (list))

;; END OF FILE

Added utils/trace/trace.meta version [9714181a62].





















>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
;;;; trace.meta -*- Scheme -*-


((category tools)
 (synopsis "tracing and breakpoints")
 (author "felix winkelmann")
 (license "public domain")
 (needs advice				; don't we all?
	miscmacros)
 (files "tests/run.scm" "trace.meta" "trace.release-info" "trace.scm" "trace.setup") )

Added utils/trace/trace.scm version [dc3560e035].







































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
;;;; trace.scm


(module trace (breakpoint 
	       trace untrace
	       break unbreak
	       trace-output-port
	       continue c 
	       traced?
	       trace-module untrace-module
	       trace-verbose
	       trace/untrace)
	       
(import scheme chicken csi)

(use advice extras ports data-structures)
(require-library srfi-1)
(import (except srfi-1 break) miscmacros)


(define *last-breakpoint* #f)
(define *traced-procedures* '())
(define *broken-procedures* '())
(define *trace-indent-level* 0)

(define trace-output-port (make-parameter (current-output-port)))
(define trace-verbose (make-parameter #t))

(define (break-entry name args)
  ;; Does _not_ unwind!
  (##sys#call-with-current-continuation
   (lambda (c)
     (let ((exn (##sys#make-structure
		 'condition
		 '(exn breakpoint)
		 (list '(exn . message) "*** breakpoint ***"
		       '(exn . arguments) (list (cons name args))
		       '(exn . location) name
		       '(exn . continuation) c) ) ) )
       (set! *last-breakpoint* exn)
       (signal exn) ) ) ) )

(define (break-resume exn)
  (let ((a (member '(exn . continuation) (##sys#slot exn 2))))
    (if a
	((cadr a) (void))
	(error "condition has no continuation" exn) ) ) )

(define (breakpoint #!optional (name 'breakpoint))
  (break-entry name '()) )

(define (trace-indent)
  (let ((port (trace-output-port)))
    (do ((i (fxmin 3 *trace-indent-level*) (fx- i 1)))
	((fx<= i 0))
      (write-char #\space port) )
    (fprintf port "[~a] " *trace-indent-level*) ) )

(define (traced-procedure-entry name args)
  (let ((port (trace-output-port)))
    (trace-indent)
    (set! *trace-indent-level* (fx+ 1 *trace-indent-level*))
    (write (cons name args) port)
    (write ", Called from: " port)
    (write (conc (car (reverse (get-call-chain)))))
    (write-char #\newline port)
    (flush-output port) ) )

(define (traced-procedure-exit name results)
  (let ((port (trace-output-port)))
    (set! *trace-indent-level* (fx- *trace-indent-level* 1))
    (trace-indent)
    (fprintf port "~a -> " name)
    (if results
	(for-each
	 (lambda (x)
	   (write x port)
	   (write-char #\space port) )
	 results)
	(display "(escaping)" port))
    (write-char #\newline port)
    (flush-output port) ) )

(define (procedure-name proc)
  (cond ((procedure-information proc) =>
	 (lambda (info)
	   (if (pair? info) (car info) info) ) )
	(else '<unknown>)) )

(define (do-trace procs)
  (for-each
   (lambda (s)
     (ensure procedure? s)
     (cond ((traced? s)
	    (warning "procedure already traced" s) )
	   (else
	    (let ((name (procedure-name s)))
	      (when (trace-verbose)
		(fprintf (current-error-port) "; tracing ~a~%" name))
	      (set! *traced-procedures* (cons (cons s name) *traced-procedures*))
	      (advise 
	       'around s
	       (lambda (next args)
		 (let ((results #f))
		   (dynamic-wind
		       (cut traced-procedure-entry name args)
		       (lambda () 
			 (call-with-values (cut apply next args)
			   (lambda rs
			     (set! results rs)
			     (apply values rs))))
		       (cut traced-procedure-exit name results))))
	       '*trace*)))))
   procs) )

(define (do-untrace-all)
  (define (unadvise* p)
    (ignore-errors (unadvise p '*trace*)))
  (for-each
   (lambda (proc)
     (let ((proc (car proc)))
       (when (trace-verbose)
	 (fprintf (current-error-port) "; untracing ~a~%" (procedure-name proc))
	 (unadvise* proc))))
   *traced-procedures*)
  (set! *traced-procedures* '()))

(define (do-untrace procs)
  (for-each
   (lambda (s)
     (ensure procedure? s)
     (let ((p (assq s *traced-procedures*)) 
	   (name (procedure-name s)))
       (cond ((not p) (warning "procedure not traced" name))
	     (else
	      (when (trace-verbose)
		(fprintf (current-error-port) "; untracing ~a~%" name))
	      (ignore-errors (unadvise s '*trace*))
	      (set! *traced-procedures* 
		(delete 
		 p *traced-procedures* 
		 eq?))))))
   procs) )

(define (do-break procs)
  (for-each
   (lambda (s)
     (let ((name (procedure-name s)))
       (ensure procedure? s)
       (cond ((assq s *broken-procedures*)
	      (warning "procedure already has break-point" name))
	     (else
	      (when (trace-verbose)
		(fprintf (current-error-port) "; setting break-point in ~a~%" name))
	      (set! *broken-procedures* (cons (cons s name) *broken-procedures*))
	      (advise 
	       'before s
	       (lambda (args)
		 (break-entry name args) )
	       '*break*) ) )))
   procs) )

(define (do-unbreak procs)
  (for-each
   (lambda (s)
     (ensure procedure? s)
     (let ((p (assq s *broken-procedures*)) 
	   (name (procedure-name s)))
       (cond ((not p) (warning "procedure has no breakpoint" name))
	     (else
	      (when (trace-verbose)
		(fprintf (current-error-port) "; removing break-point in ~a~%" name))
	      (ignore-errors (unadvise s '*break*))
	      (set! *broken-procedures* (delete p *broken-procedures* eq?) ) ) ) ) )
   procs) )

(define (do-unbreak-all)
  (for-each
   (lambda (bp)
     (ignore-errors (unadvise (car bp) '*break*)))
   *broken-procedures*)
  (set! *broken-procedures* '())
  (void))

(define (trace . procs)
  (cond ((null? procs)
	 (when (pair? *traced-procedures*)
	   (printf "Traced:~%~%")
	   (for-each (lambda (p) (printf "  ~a~%" (cdr p))) *traced-procedures*)) )
	(else
	 (do-trace procs) ) ) )

(define (untrace . procs)
  (cond ((null? procs) (do-untrace-all))
	(else (do-untrace procs)))
  (void))

(define (break . procs)
  (cond ((null? procs)
	 (when (pair? *broken-procedures*)
	   (printf "Breakpoints:~%~%")
	   (for-each (lambda (p) (printf "  ~a~%" (cdr p))) *broken-procedures*)) )
	(else
	 (do-break procs) ) ) )

(define (unbreak . procs)
  (cond ((null? procs) (do-unbreak-all))
	(else (do-unbreak procs))))

(define (continue #!optional (bp *last-breakpoint*))
  (cond (*last-breakpoint*
	 (let ((exn *last-breakpoint*))
	   (set! *last-breakpoint* #f)
	   (break-resume exn) ) )
	(else (display "no breakpoint pending\n") ) ) )

(define c continue)

(define (traced? proc)
  (assq proc *traced-procedures*))

(define (trace/untrace . procs)
  (for-each
   (lambda (proc)
     ((if (traced? proc) do-untrace do-trace) (list proc)))
   procs))

(define (walk-module mname proc)
  (let* ((m (##sys#find-module mname))
	 (exps (nth-value 1 (##sys#module-exports m))))
    (for-each
     (lambda (exp)
       (let* ((realname (cdr exp))
	      (prim (get realname '##core#primitive)))
	 (if prim
	     (warning "export is a core-library primitive - not traced" (car exp))
	     (when (##sys#symbol-has-toplevel-binding? realname)
	       (let ((val (##sys#slot realname 0)))
		 (when (procedure? val)
		   (proc val)))))))
     exps)))

(define (trace-module . mnames)
  (for-each
   (lambda (mname)
     (walk-module mname trace))
   mnames))

(define (untrace-module . mnames)
  (for-each
   (lambda (mname)
     (walk-module 
      mname 
      (lambda (proc)
	(when (traced? proc)
	  (do-untrace (list proc))))))
   mnames))

)

Added utils/trace/trace.setup version [d222d610b4].



















>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
;;;; trace.setup -*- Scheme -*-


(compile -s trace.scm -O3 -d1 -j trace)
(compile -s trace.import.scm -O3 -d0)

(install-extension
 'trace
 '("trace.so" "trace.import.so"))