Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -28,10 +28,17 @@ (declare (uses common)) (declare (uses db)) (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. +(module client +* + +) + +(import client) + (include "common_records.scm") (include "db_records.scm") ;; client:get-signature (define (client:get-signature) @@ -44,13 +51,10 @@ #;(define (client:logout serverdat) (let ((ok (and (socket? serverdat) (cdb:logout serverdat *toppath* (client:get-signature))))) ok)) -(define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0)) - (client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects)) - ;; Do all the connection work, look up the transport type and set up the ;; connection if required. ;; ;; There are two scenarios. ;; 1. We are a test manager and we received *transport-type* and *runremote* via cmdline @@ -60,17 +64,30 @@ ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; -(define (client:setup-http areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f)) +;;(define (http-transport:server-dat-make-url runremote) +(define (client:get-url runremote) + (if (and (remote-iface runremote) + (remote-port runremote)) + (conc "http://" + (remote-iface runremote) + ":" + (remote-port runremote)) + #f)) + +;; if successfully connected to a server runremote will be populated with appropriate info. +;; the result returned should not be used other than as an indicator of success +;; +(define (client:setup areapath runremote #!key (remaining-tries 100) (failed-connects 0)(area-dat #f)) (mutex-lock! *rmt-mutex*) - (let ((res (client:setup-http-baby areapath remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat))) + (let ((res (client:setup-http areapath runremote remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat))) (mutex-unlock! *rmt-mutex*) res)) -(define (client:setup-http-baby areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f)) +(define (client:setup-http areapath runremote #!key (remaining-tries 100) (failed-connects 0)) (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries) (server:start-and-wait areapath) (if (<= remaining-tries 0) (begin (debug:print-error 0 *default-log-port* "failed to start or connect to server") @@ -77,48 +94,72 @@ (exit 1)) ;; ;; Alternatively here, we can get the list of candidate servers and work our way ;; through them searching for a good one. ;; - (let* ((server-dat (server:choose-server areapath 'best)) - (runremote (or area-dat *runremote*))) + (let* ((server-dat (server:choose-server areapath 'best))) ;; list host port start-time server-id pid +;; (runremote (or runremote *runremote*))) (if (not server-dat) ;; no server found - (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)) + (begin + (if (< remaining-tries 99)(thread-sleep! 1)) ;; obviously it needs time + (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1))) (match server-dat ((host port start-time server-id pid) (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) - (if (and (not area-dat) - (not *runremote*)) - (begin - (set! *runremote* (make-remote)) - (let* ((server-info (remote-server-info *runremote*))) + (if (not runremote) + (begin + ;; Here we are creating a runremote where there was none or it was clobbered with #f + ;; + (set! runremote (make-remote)) + (let* ((server-info (server:check-if-running areapath))) + (remote-server-info-set! runremote server-info) (if server-info (begin - (remote-server-url-set! *runremote* (server:record->url server-info)) - (remote-server-id-set! *runremote* (server:record->id server-info))))))) + (remote-server-url-set! runremote (server:record->url server-info)) + (remote-server-id-set! runremote (server:record->id server-info))))))) + ;; at this point we have a runremote (if (and host port server-id) - (let* ((start-res (http-transport:client-connect host port server-id)) - (ping-res (rmt:login-no-auto-client-setup start-res))) - (if (and start-res - ping-res) - (let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago - (if runremote - (begin - (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res) - (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res)) - start-res) - (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)))) + (let* ((nada (client:connect host port server-id runremote)) + (ping-res (rmt:login-no-auto-client-setup runremote))) + (if ping-res + (if runremote + (begin + (debug:print-info 2 *default-log-port* "connected to " (client:get-url runremote)) + runremote) + (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1))) (begin ;; login failed but have a server record, clean out the record and try again - (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332 - (http-transport:close-connections *runremote*) + (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... ping-res=" ping-res) ;; had runid. Fixes part of Randy;s ticket 1405717332 + (http-transport:close-connections runremote) (thread-sleep! 1) - (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)) + (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1)) ))) (begin ;; no server registered ;; (server:kind-run areapath) (server:start-and-wait areapath) (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries) (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. - (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))))) + (client:setup-http areapath runremote remaining-tries: (- remaining-tries 1))))) (else (debug:print 0 *default-log-port* "ERROR: malformed server-dat="server-dat))))))) +;; +;; connect - stored in remote-condat +;; +;; (define (http-transport:client-connect iface port server-id runremote) +(define (client:connect iface port server-id runremote-in) + (let* ((runremote (or runremote-in + (make-runremote)))) + (debug:print-info 2 *default-log-port* "Connecting to server at "iface":"port", id "server-id) + (let* ((api-url (conc "http://" iface ":" port "/api")) + (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) + (api-req (make-request method: 'POST uri: api-uri))) + ;; (server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id))) + (remote-iface-set! runremote iface) + (remote-port-set! runremote port) + (remote-server-id-set! runremote server-id) + (remote-connect-time-set! runremote (current-seconds)) + (remote-last-access-set! runremote (current-seconds)) + (remote-api-url-set! runremote api-url) + (remote-api-uri-set! runremote api-uri) + (remote-api-req-set! runremote api-req) + runremote))) + Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -315,26 +315,33 @@ (else "FAIL"))) (define (common:logpro-exit-code->test-status exit-code) (status-sym->string (common:logpro-exit-code->status-sym exit-code))) +;; (defstruct remote (hh-dat (let ((res (or (server:choose-server *toppath* 'homehost) (cons #f #f)))) (assert (pair? res)(conc "FATAL: hh-dat should be a pair, got "res)) res)) (server-url #f) ;; (server:check-if-running *toppath*) #f)) (server-id #f) - (server-info (if *toppath* (server:check-if-running *toppath*) #f)) + (server-info #f) ;; (if *toppath* (server:check-if-running *toppath*) #f)) (last-server-check 0) ;; last time we checked to see if the server was alive - (connect-time (current-seconds)) - (conndat #f) - (transport *transport-type*) + (connect-time (current-seconds)) ;; when we first connected + (last-access (current-seconds)) ;; last time we talked to server (server-timeout (server:expiration-timeout)) (force-server #f) (ro-mode #f) - (ro-mode-checked #f)) ;; flag that indicates we have checked for ro-mode + (ro-mode-checked #f) ;; flag that indicates we have checked for ro-mode + + ;; conndat stuff + (iface #f) ;; TODO: Consolidate this data with server-url and server-info above + (port #f) + (api-url #f) + (api-uri #f) + (api-req #f)) ;; launching and hosts (defstruct host (reachable #f) (last-update 0) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -92,19 +92,10 @@ (define (db:hoh-get dat key1 key2) (let* ((subhash (hash-table-ref/default dat key1 #f))) (and subhash (hash-table-ref/default subhash key2 #f)))) -(define (db:get-cache-stmth dbdat run-id db stmt) - (let* (;; (dbdat (dbfile:get-dbdat dbstruct run-id)) - (stmt-cache (dbr:dbdat-stmt-cache dbdat)) - (stmth (db:hoh-get stmt-cache db stmt))) - (or stmth - (let* ((newstmth (sqlite3:prepare db stmt))) - (db:hoh-set! stmt-cache db stmt newstmth) - newstmth)))) - ;;====================================================================== ;; SQLITE3 HELPERS ;;====================================================================== (define (db:general-sqlite-error-dump exn stmt . params) @@ -1047,12 +1038,12 @@ ;; what to do with toplevel? call rollup? (begin (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id)) (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) - db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');" + (db:get-cache-stmth dbdat db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');") run-id deadtime) ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config ;; ;; (db:delay-if-busy dbdat) @@ -1061,12 +1052,12 @@ (if (and (equal? uname "n/a") (equal? item-path "")) ;; this is a toplevel test ;; what to do with toplevel? call rollup? (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) - db - "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');" + (db:get-cache-stmth dbdat db + "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');") run-id) (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") (if (and (null? incompleted) (null? oldlaunched) @@ -1113,21 +1104,21 @@ (db:with-db dbstruct run-id #f (lambda (dbdat db) (let* ((stmth1 (db:get-cache-stmth - dbdat run-id db + dbdat db "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING');")) (stmth2 (db:get-cache-stmth - dbdat run-id db + dbdat db "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('REMOTEHOSTSTART');")) (stmth3 (db:get-cache-stmth - dbdat run-id db + dbdat db "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');"))) ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes ;; @@ -1359,27 +1350,27 @@ dbstruct #f #f ;; for the moment vars are only stored in main.db (lambda (dbdat db) (sqlite3:for-each-row (lambda (val) (set! res val)) - db - "SELECT val FROM metadat WHERE var=?;" var) + (db:get-cache-stmth dbdat db + "SELECT val FROM metadat WHERE var=?;") var) ;; convert to number if can (if (string? res) (let ((valnum (string->number res))) (if valnum (set! res valnum)))) res)))) (define (db:inc-var dbstruct var) (db:with-db dbstruct #f #t (lambda (dbdat db) - (sqlite3:execute db "UPDATE metadat SET val=val+1 WHERE var=?;" var)))) + (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE metadat SET val=val+1 WHERE var=?;") var)))) (define (db:dec-var dbstruct var) (db:with-db dbstruct #f #t (lambda (dbdat db) - (sqlite3:execute db "UPDATE metadat SET val=val-1 WHERE var=?;" var)))) + (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE metadat SET val=val-1 WHERE var=?;") var)))) ;; This was part of db:get-var. It was used to estimate the load on ;; the database files. ;; ;; scale by 10, average with current value. @@ -1392,21 +1383,21 @@ ;; (set! *last-global-delta-printed* *global-delta*))) (define (db:set-var dbstruct var val) (db:with-db dbstruct #f #t (lambda (dbdat db) - (sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" var val)))) + (sqlite3:execute (db:get-cache-stmth dbdat db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);") var val)))) (define (db:add-var dbstruct var val) (db:with-db dbstruct #f #t (lambda (dbdat db) - (sqlite3:execute db "UPDATE metadat SET val=val+? WHERE var=?;" val var)))) + (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE metadat SET val=val+? WHERE var=?;") val var)))) (define (db:del-var dbstruct var) (db:with-db dbstruct #f #t (lambda (dbdat db) - (sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var)))) + (sqlite3:execute (db:get-cache-stmth dbdat db "DELETE FROM metadat WHERE var=?;") var)))) ;;====================================================================== ;; no-sync.db - small bits of data to be shared between servers ;;====================================================================== @@ -1519,12 +1510,12 @@ (lambda (dbdat db) (let ((res #f)) (sqlite3:for-each-row (lambda (runname) (set! res runname)) - db - "SELECT runname FROM runs WHERE id=?;" + (db:get-cache-stmth dbdat db + "SELECT runname FROM runs WHERE id=?;") run-id) res)))) (define (db:get-run-key-val dbstruct run-id key) (db:with-db @@ -1582,13 +1573,14 @@ (db:with-db dbstruct #f #f (lambda (dbdat db) ;; (debug:print 0 *default-log-port* "Got here 1.") (let ((res #f)) - (apply sqlite3:execute db - (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time,contour" - comma keystr ") VALUES (?,?,?,?,strftime('%s','now'),?" comma valslots ");") + (apply sqlite3:execute + (db:get-cache-stmth dbdat db + (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time,contour" + comma keystr ") VALUES (?,?,?,?,strftime('%s','now'),?" comma valslots ");")) allvals) (apply sqlite3:for-each-row (lambda (id) (set! res id)) db @@ -1743,12 +1735,12 @@ (let ((numruns 0)) (debug:print-info 11 *default-log-port* "db:get-num-runs START " runpatt) (sqlite3:for-each-row (lambda (count) (set! numruns count)) - db - "SELECT COUNT(id) FROM runs WHERE runname LIKE ? AND state != 'deleted';" runpatt) + (db:get-cache-stmth dbdat db + "SELECT COUNT(id) FROM runs WHERE runname LIKE ? AND state != 'deleted';" )runpatt) (debug:print-info 11 *default-log-port* "db:get-num-runs END " runpatt) numruns)))) ;; just get count of runs (define (db:get-runs-cnt-by-patt dbstruct runpatt targetpatt keys) @@ -1797,12 +1789,12 @@ (lambda (dbdat db) (sqlite3:fold-row (lambda (res state status count) (cons (list state status count) res)) '() - db - "SELECT state,status,count(id) AS count FROM tests WHERE run_id=? AND NOT(uname='n/a' AND item_path='') GROUP BY state,status;" + (db:get-cache-stmth dbdat db + "SELECT state,status,count(id) AS count FROM tests WHERE run_id=? AND NOT(uname='n/a' AND item_path='') GROUP BY state,status;") run-id)))) ;; Update run_stats for given run_id ;; input data is a list (state status count) ;; @@ -1814,24 +1806,21 @@ #f (lambda (dbdat db) ;; remove previous data - (let* ((stmt1 (sqlite3:prepare db "DELETE FROM run_stats WHERE run_id=? AND state=? AND status=?;")) - (stmt2 (sqlite3:prepare db "INSERT INTO run_stats (run_id,state,status,count) VALUES (?,?,?,?);")) + (let* ((stmt1 (db:get-cache-stmth dbdat db "DELETE FROM run_stats WHERE run_id=? AND state=? AND status=?;")) + (stmt2 (db:get-cache-stmth dbdat db "INSERT INTO run_stats (run_id,state,status,count) VALUES (?,?,?,?);")) (res (sqlite3:with-transaction db (lambda () (for-each (lambda (dat) (sqlite3:execute stmt1 run-id (car dat)(cadr dat)) (apply sqlite3:execute stmt2 run-id dat)) stats))))) - (sqlite3:finalize! stmt1) - (sqlite3:finalize! stmt2) - ;; (mutex-unlock! *db-transaction-mutex*) res)))) (define (db:get-main-run-stats dbstruct run-id) (db:with-db dbstruct @@ -1840,12 +1829,12 @@ (lambda (dbdat db) (sqlite3:fold-row (lambda (res state status count) (cons (list state status count) res)) '() - db - "SELECT state,status,count FROM run_stats WHERE run_id=? AND run_id IN (SELECT id FROM runs WHERE state NOT IN ('DELETED','deleted'));" + (db:get-cache-stmth dbdat db + "SELECT state,status,count FROM run_stats WHERE run_id=? AND run_id IN (SELECT id FROM runs WHERE state NOT IN ('DELETED','deleted'));") run-id)))) (define (db:print-current-query-stats) ;; generate stats from *db-api-call-time* (let ((ordered-keys (sort (hash-table-keys *db-api-call-time*) @@ -1873,12 +1862,12 @@ (lambda (dbdat db) (let ((run-ids '())) (sqlite3:for-each-row (lambda (run-id) (set! run-ids (cons run-id run-ids))) - db - "SELECT id FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;") + (db:get-cache-stmth dbdat db + "SELECT id FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;")) (reverse run-ids))))) ;; get some basic run stats ;; ;; data structure: @@ -1896,12 +1885,12 @@ dbstruct #f #f (lambda (dbdat db) (sqlite3:for-each-row (lambda (run-id runname) (set! runs-info (cons (list run-id runname) runs-info))) - db - "SELECT id,runname FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;"))) ;; If you change this to the more logical ASC please adjust calls to db:get-run-stats + (db:get-cache-stmth dbdat db + "SELECT id,runname FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;")))) ;; If you change this to the more logical ASC please adjust calls to db:get-run-stats ;; for each run get stats data (for-each (lambda (run-info) ;; get the net state/status counts for this run (let* ((run-id (car run-info)) @@ -1916,12 +1905,12 @@ (let ((netstate (if (equal? state "COMPLETED") status state))) (if (string? netstate) (begin (hash-table-set! totals netstate (+ (hash-table-ref/default totals netstate 0) count)) (hash-table-set! curr netstate (+ (hash-table-ref/default curr netstate 0) count)))))) - db - "SELECT state,status,count(id) FROM tests AS t WHERE run_id=? GROUP BY state,status ORDER BY state,status DESC;" + (db:get-cache-stmth dbdat db + "SELECT state,status,count(id) FROM tests AS t WHERE run_id=? GROUP BY state,status ORDER BY state,status DESC;") run-id) ;; add the per run counts to res (for-each (lambda (state) (set! res (cons (list run-name state (hash-table-ref curr state)) res))) (sort (hash-table-keys curr) string>=)) @@ -2012,11 +2001,11 @@ (define (db:set-comment-for-run dbstruct run-id comment) (db:with-db dbstruct #f #f (lambda (dbdat db) - (sqlite3:execute db "UPDATE runs SET comment=? WHERE id=?;" comment ;; (sdb:qry 'getid comment) + (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE runs SET comment=? WHERE id=?;") comment ;; (sdb:qry 'getid comment) run-id)))) ;; does not (obviously!) removed dependent data. But why not!!? (define (db:delete-run dbstruct run-id) (db:with-db @@ -2033,11 +2022,11 @@ (define (db:update-run-event_time dbstruct run-id) (db:with-db dbstruct #f #t (lambda (dbdat db) - (sqlite3:execute db "UPDATE runs SET event_time=strftime('%s','now') WHERE id=?;" run-id)))) + (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE runs SET event_time=strftime('%s','now') WHERE id=?;") run-id)))) (define (db:lock/unlock-run dbstruct run-id lock unlock user) (db:with-db dbstruct #f #t (lambda (dbdat db) @@ -2053,32 +2042,32 @@ (define (db:set-run-status dbstruct run-id status msg) (db:with-db dbstruct #f #f (lambda (dbdat db) (if msg - (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id) - (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id))))) + (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE runs SET status=?,comment=? WHERE id=?;") status msg run-id) + (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE runs SET status=? WHERE id=?;") status run-id))))) -(define (db:set-run-state-status-db db run-id state status ) - (sqlite3:execute db "UPDATE runs SET status=?,state=? WHERE id=?;" status state run-id)) +(define (db:set-run-state-status-db dbdat db run-id state status ) + (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE runs SET status=?,state=? WHERE id=?;") status state run-id)) (define (db:set-run-state-status dbstruct run-id state status ) (db:with-db dbstruct #f #f (lambda (dbdat db) - (db:set-run-state-status-db db run-id state status)))) + (db:set-run-state-status-db dbdat db run-id state status)))) (define (db:get-run-status dbstruct run-id) (let ((res "n/a")) (db:with-db dbstruct #f #f (lambda (dbdat db) (sqlite3:for-each-row (lambda (status) (set! res status)) - db - "SELECT status FROM runs WHERE id=?;" + (db:get-cache-stmth dbdat db + "SELECT status FROM runs WHERE id=?;" ) run-id) res)))) (define (db:get-run-state dbstruct run-id) (let ((res "n/a")) @@ -2086,12 +2075,12 @@ dbstruct #f #f (lambda (dbdat db) (sqlite3:for-each-row (lambda (status) (set! res status)) - db - "SELECT state FROM runs WHERE id=?;" + (db:get-cache-stmth dbdat db + "SELECT state FROM runs WHERE id=?;" ) run-id) res)))) ;;====================================================================== @@ -2110,11 +2099,11 @@ (lambda (key) (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) (sqlite3:for-each-row (lambda (key-val) (set! res (cons (list key (if (string? key-val) key-val "")) res))) ;; replace non-string bad values with empty string to prevent crashes. This scenario can happen when Megatest is killed on updating the db - db qry run-id))) + (db:get-cache-stmth dbdat db qry) run-id))) keys))) (reverse res))) ;; get key vals for a given run-id (define (db:get-key-vals dbstruct run-id) @@ -2128,11 +2117,11 @@ (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) ;; (db:delay-if-busy dbdat) (sqlite3:for-each-row (lambda (key-val) (set! res (cons (if (string? key-val) key-val "") res))) ;; check that the key-val is a string for cases where a crash injected bad data in the megatest.db - db qry run-id))) + (db:get-cache-stmth dbdat db qry) run-id))) keys))) (let ((final-res (reverse res))) (hash-table-set! *keyvals* run-id final-res) final-res))) @@ -2156,12 +2145,12 @@ (db:with-db dbstruct #f #f ;; #f means work with the zeroth db - i.e. the runs db (lambda (dbdat db) (apply sqlite3:for-each-row (lambda (id) (set! prev-run-ids (cons id prev-run-ids))) - db - (conc "SELECT id FROM runs WHERE " qrystr " AND state != 'deleted' AND id != ?;") + (db:get-cache-stmth dbdat db + (conc "SELECT id FROM runs WHERE " qrystr " AND state != 'deleted' AND id != ?;")) (append kvalues (list run-id))))) prev-run-ids))))) ;;====================================================================== ;; T E S T S @@ -2333,12 +2322,12 @@ (lambda (dbdat db) (sqlite3:for-each-row (lambda (run-id testname item-path state status) ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment (set! res (vector test-id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-"))) - db - "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=? and run_id=?;" + (db:get-cache-stmth dbdat db + "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=? and run_id=?;" ) test-id run-id))) res)) ;; get a useful subset of the tests data (used in dashboard ;; use db:mintest-get-{id ,run_id,testname ...} @@ -2368,11 +2357,11 @@ (db:general-call dbstruct run-id 'delete-test-step-records (list test-id)) (db:general-call dbstruct run-id 'delete-test-data-records (list test-id)) (db:with-db dbstruct run-id #f (lambda (dbdat db) - (sqlite3:execute db "UPDATE tests SET state='DELETED',status='n/a',comment='' WHERE id=?;" test-id)))) + (sqlite3:execute (db:get-cache-stmth dbdat db "UPDATE tests SET state='DELETED',status='n/a',comment='' WHERE id=?;") test-id)))) ;; (define (db:delete-old-deleted-test-records dbstruct) (let ((targtime (- (current-seconds) (or (configf:lookup-number *configdat* "setup" "keep-deleted-records") @@ -2383,13 +2372,13 @@ #t (lambda (dbdat db) (sqlite3:with-transaction db (lambda () - (sqlite3:execute db "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE state='DELETED' AND event_timelist rec)) ",") "\n") (apply sqlite3:execute qry (append (vector->list rec)(list run-id)))) testrecs))) - (sqlite3:finalize! qry))))) + ;; (sqlite3:finalize! qry) + )))) ;; map a test-id into the proper range ;; (define (db:adj-test-id mtdb min-test-id test-id) (if (>= test-id min-test-id) @@ -2716,12 +2706,12 @@ (let ((res #f)) (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test (lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update) ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update))) - db - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=?;") + (db:get-cache-stmth dbdat db + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=?;")) test-id) res)))) ;; Use db:test-get* to access ;; Get test data using test_ids. NB// Only works within a single run!! @@ -2746,19 +2736,19 @@ (db:with-db dbstruct run-id #f (lambda (dbdat db) - (db:get-test-info-db db run-id test-name item-path)))) + (db:get-test-info-db dbdat db run-id test-name item-path)))) -(define (db:get-test-info-db db run-id test-name item-path) +(define (db:get-test-info-db dbdat db run-id test-name item-path) (let ((res #f)) (sqlite3:for-each-row (lambda (a . b) (set! res (apply vector a b))) - db - (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE testname=? AND item_path=? AND run_id=?;") + (db:get-cache-stmth dbdat db + (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE testname=? AND item_path=? AND run_id=?;")) test-name item-path run-id) res)) (define (db:test-get-rundir-from-test-id dbstruct run-id test-id) (db:with-db @@ -2783,12 +2773,12 @@ #f ;; does not modify db (lambda (dbdat db) (sqlite3:for-each-row (lambda (test-name item-path test-time target ) (set! res (cons (vector test-name item-path test-time) res))) - db - qry + (db:get-cache-stmth dbdat db + qry ) run-name target) res)))) ;;====================================================================== ;; S T E P S @@ -2799,12 +2789,12 @@ dbstruct run-id #t (lambda (dbdat db) (sqlite3:execute - db - "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);" + (db:get-cache-stmth dbdat db + "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);") test-id teststep-name state-in status-in (current-seconds) (if comment comment "") (if logfile logfile ""))))) @@ -2815,12 +2805,12 @@ dbstruct run-id #t (lambda (dbdat db) (sqlite3:execute - db - "UPDATE test_steps set status='DELETED' where test_id=?" ;; and run_id=? !! - run_id not in table (bummer) TODO: get run_id into schema for test_steps + (db:get-cache-stmth dbdat db + "UPDATE test_steps set status='DELETED' where test_id=?") ;; and run_id=? !! - run_id not in table (bummer) TODO: get run_id into schema for test_steps test-id)))) ;; db-get-test-steps-for-run (define (db:get-steps-for-test dbstruct run-id test-id) @@ -2831,12 +2821,12 @@ (lambda (dbdat db) (let* ((res '())) (sqlite3:for-each-row (lambda (id test-id stepname state status event-time logfile comment) (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "") comment) res))) - db - "SELECT id,test_id,stepname,state,status,event_time,logfile,comment FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; + (db:get-cache-stmth dbdat db + "SELECT id,test_id,stepname,state,status,event_time,logfile,comment FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;") ;; event_time DESC,id ASC; test-id) (reverse res))))) (define (db:get-steps-info-by-id dbstruct run-id test-step-id) (db:with-db @@ -2846,12 +2836,12 @@ (lambda (dbdat db) (let* ((res (vector #f #f #f #f #f #f #f #f #f))) (sqlite3:for-each-row (lambda (id test-id stepname state status event-time logfile comment last-update) (set! res (vector id test-id stepname state status event-time (if (string? logfile) logfile "") comment last-update))) - db - "SELECT id,test_id,stepname,state,status,event_time,logfile,comment,last_update FROM test_steps WHERE id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; + (db:get-cache-stmth dbdat db + "SELECT id,test_id,stepname,state,status,event_time,logfile,comment,last_update FROM test_steps WHERE id=? ORDER BY id ASC;") ;; event_time DESC,id ASC; test-step-id) res)))) (define (db:get-steps-data dbstruct run-id test-id) (db:with-db @@ -2861,12 +2851,12 @@ (lambda (dbdat db) (let ((res '())) (sqlite3:for-each-row (lambda (id test-id stepname state status event-time logfile) (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "")) res))) - db - "SELECT id,test_id,stepname,state,status,event_time,logfile FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC; + (db:get-cache-stmth dbdat db + "SELECT id,test_id,stepname,state,status,event_time,logfile FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;") ;; event_time DESC,id ASC; test-id) (reverse res))))) ;;====================================================================== ;; T E S T D A T A @@ -2877,11 +2867,11 @@ (db:with-db dbstruct run-id #f (lambda (dbdat db) - (let* ((stmth (db:get-cache-stmth dbdat #f db stmt)) + (let* ((stmth (db:get-cache-stmth dbdat db stmt)) (res (sqlite3:fold-row (lambda (res id test-id category variable value expected tol units comment status type last-update) (vector id test-id category variable value expected tol units comment status type last-update)) (vector #f #f #f #f #f #f #f #f #f #f #f #f) stmth @@ -2901,13 +2891,13 @@ (lambda (dbdat db) (sqlite3:for-each-row (lambda (fcount pcount) (set! fail-count fcount) (set! pass-count pcount)) - db + (db:get-cache-stmth dbdat db "SELECT (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'fail') AS fail_count, - (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'pass') AS pass_count;" + (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'pass') AS pass_count;") test-id test-id) ;; Now rollup the counts to the central megatest.db (db:general-call dbstruct run-id 'pass-fail-counts (list pass-count fail-count test-id)) ;; if the test is not FAIL then set status based on the fail and pass counts. (db:general-call dbstruct run-id 'test_data-pf-rollup (list test-id test-id test-id test-id)))))) @@ -3062,12 +3052,12 @@ dbstruct #f #f (lambda (dbdat db) (sqlite3:for-each-row (lambda (id test_id category variable value expected tol units comment status type) (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) - db - "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt) + (db:get-cache-stmth dbdat db + "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;") test-id categorypatt) (reverse res))))) ;; This routine moved from tdb.scm, :read-test-data ;; (define (db:read-test-data-varpatt dbstruct run-id test-id categorypatt varpatt) @@ -3076,12 +3066,12 @@ dbstruct #f #f (lambda (dbdat db) (sqlite3:for-each-row (lambda (id test_id category variable value expected tol units comment status type) (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) - db - "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? AND variable LIKE ? ORDER BY category,variable;" test-id categorypatt varpatt) + (db:get-cache-stmth dbdat db + "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? AND variable LIKE ? ORDER BY category,variable;") test-id categorypatt varpatt) (reverse res))))) ;;====================================================================== ;; Misc. test related queries @@ -3134,12 +3124,12 @@ (lambda (dbdat db) (let ((res 0)) (sqlite3:for-each-row (lambda (num-items) (set! res num-items)) - db - "SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND state NOT IN ('DELETED');" + (db:get-cache-stmth dbdat db + "SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND state NOT IN ('DELETED');") run-id testname) res)))) ;;====================================================================== @@ -3225,13 +3215,13 @@ (let ((tr-res (sqlite3:with-transaction db (lambda () ;; NB// Pass the db so it is part fo the transaction - (db:test-set-state-status-db db run-id test-id state status comment) ;; this call sets the item state/status + (db:test-set-state-status-db dbdat db run-id test-id state status comment) ;; this call sets the item state/status (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item - (let* ((state-status-counts (db:get-all-state-status-counts-for-test db run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test + (let* ((state-status-counts (db:get-all-state-status-counts-for-test dbdat db run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test (state-statuses (db:roll-up-rules state-status-counts state status)) (newstate (car state-statuses)) (newstatus (cadr state-statuses))) (set! new-state-eh newstate) (set! new-status-eh newstatus) @@ -3240,11 +3230,11 @@ (map (lambda (x) (conc (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | ")) state-status-counts))); end debug:print (if tl-test-id - (db:test-set-state-status-db db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct + (db:test-set-state-status-db dbdat db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct )))))) (mutex-unlock! *db-transaction-mutex*) (if (and test-id state status (equal? status "AUTO")) (db:test-data-rollup dbstruct run-id test-id status)) (if new-state-eh ;; moved from db:test-set-state-status @@ -3319,20 +3309,20 @@ (let* ((state-status-counts (db:get-all-state-status-counts-for-run-db db run-id)) (state-statuses (db:roll-up-rules state-status-counts #f #f )) (newstate (car state-statuses)) (newstatus (cadr state-statuses))) (if (or (not (eq? newstate curr-state)) (not (eq? newstatus curr-status))) - (db:set-run-state-status-db db run-id newstate newstatus ))))))) + (db:set-run-state-status-db dbdat db run-id newstate newstatus ))))))) (mutex-unlock! *db-transaction-mutex*) tr-res)))) (define (db:get-all-state-status-counts-for-run-db db run-id) (sqlite3:map-row (lambda (state status count) (make-dbr:counts state: state status: status count: count)) - db - "SELECT state,status,count(id) FROM tests WHERE run_id=? GROUP BY state,status;" + (db:get-cache-stmth dbdat db + "SELECT state,status,count(id) FROM tests WHERE run_id=? GROUP BY state,status;") run-id )) (define (db:get-all-state-status-counts-for-run dbstruct run-id) (db:with-db dbstruct #f #f @@ -3341,20 +3331,20 @@ ;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status* ;; ;; NOTE: This is called within a transaction ;; -(define (db:get-all-state-status-counts-for-test db run-id test-name item-path item-state-in item-status-in) - (let* ((test-info (db:get-test-info-db db run-id test-name item-path)) +(define (db:get-all-state-status-counts-for-test dbdat db run-id test-name item-path item-state-in item-status-in) + (let* ((test-info (db:get-test-info-db dbdat db run-id test-name item-path)) (item-state (or item-state-in (db:test-get-state test-info))) (item-status (or item-status-in (db:test-get-status test-info))) (other-items-count-recs (sqlite3:map-row (lambda (state status count) (make-dbr:counts state: state status: status count: count)) - db + (db:get-cache-stmth dbdat db ;; ignore current item because we have changed its value in the current transation so this select will see the old value. - "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND item_path !=? GROUP BY state,status;" + "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND item_path !=? GROUP BY state,status;") run-id test-name item-path)) ;; add current item to tally outside of sql query (match-countrec-lambda (lambda (countrec) (and (equal? (dbr:counts-state countrec) item-state) (equal? (dbr:counts-status countrec) item-status)))) @@ -3402,12 +3392,12 @@ (set! logf final_logf) (set! res (list path final_logf)) (if (directory? path) (debug:print 2 *default-log-port* "Found path: " path) (debug:print 2 *default-log-port* "No such path: " path))) ;; ) - db - "SELECT rundir,final_logf FROM tests WHERE testname=? AND item_path='' AND run_id=?;" + (db:get-cache-stmth dbdat db + "SELECT rundir,final_logf FROM tests WHERE testname=? AND item_path='' AND run_id=?;") test-name run-id) res)))) ;;====================================================================== ;; A G R E G A T E D T R A N S A C T I O N D B W R I T E S @@ -3587,11 +3577,11 @@ db:queries))) (if q (car q) #f)))) (db:with-db dbstruct run-id #f (lambda (dbdat db) - (apply sqlite3:execute db query params) + (apply sqlite3:execute (db:get-cache-stmth dbdat db query) params) #t)))) ;; get a summary of state and status counts to calculate a rollup ;; (define (db:get-state-status-summary dbstruct run-id testname) @@ -3600,12 +3590,12 @@ dbstruct run-id #f (lambda (dbdat db) (sqlite3:for-each-row (lambda (state status count) (set! res (cons (vector state status count) res))) - db - "SELECT state,status,count(state) FROM tests WHERE run_id=? AND testname=? AND item_path='' GROUP BY state,status;" + (db:get-cache-stmth dbdat db + "SELECT state,status,count(state) FROM tests WHERE run_id=? AND testname=? AND item_path='' GROUP BY state,status;") run-id testname) res)))) (define (db:get-latest-host-load dbstruct raw-hostname) (let* ((hostname (string-substitute "\\..*$" "" raw-hostname)) @@ -3613,12 +3603,12 @@ (db:with-db dbstruct #f #f (lambda (dbdat db) (sqlite3:for-each-row (lambda (cpuload update-time) (set! res (cons cpuload update-time))) - db - "SELECT tr.cpuload, tr.update_time FROM test_rundat tr, tests t WHERE t.host=? AND tr.cpuload != -1 AND tr.test_id=t.id ORDER BY tr.update_time DESC LIMIT 1;" + (db:get-cache-stmth dbdat db + "SELECT tr.cpuload, tr.update_time FROM test_rundat tr, tests t WHERE t.host=? AND tr.cpuload != -1 AND tr.test_id=t.id ORDER BY tr.update_time DESC LIMIT 1;") hostname))) res )) (define (db:set-top-level-from-items dbstruct run-id testname) (let* ((summ (db:get-state-status-summary dbstruct run-id testname)) (find (lambda (state status) @@ -3659,23 +3649,23 @@ dbstruct #f #f (lambda (dbdat db) (sqlite3:for-each-row (lambda (a . b) (set! keyvals (cons a b))) - db - (conc "SELECT " selstr " FROM runs WHERE id=? ORDER BY event_time DESC;") run-id))) + (db:get-cache-stmth dbdat db + (conc "SELECT " selstr " FROM runs WHERE id=? ORDER BY event_time DESC;")) run-id))) (if (not keyvals) '() (let ((prev-run-ids '())) (db:with-db dbstruct #f #f (lambda (dbdat db) (apply sqlite3:for-each-row (lambda (id) (set! prev-run-ids (cons id prev-run-ids))) - db - (conc "SELECT id FROM runs WHERE " qrystr " AND id != ?;") (append keyvals (list run-id))))) + (db:get-cache-stmth dbdat db + (conc "SELECT id FROM runs WHERE " qrystr " AND id != ?;")) (append keyvals (list run-id))))) ;; collect all matching tests for the runs then ;; extract the most recent test and return that. (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids) (if (null? prev-run-ids) '() ;; no previous runs? return null @@ -3749,12 +3739,12 @@ #f (lambda (dbdat db) (sqlite3:for-each-row (lambda (id itempath state status run_duration logf comment) (set! res (cons (vector id itempath state status run_duration logf comment) res))) - db - "SELECT id,item_path,state,status,run_duration,final_logf,comment FROM tests WHERE testname=? AND item_path != '' AND run_id=?;" ;; BUG! WHY NO run_id? + (db:get-cache-stmth dbdat db + "SELECT id,item_path,state,status,run_duration,final_logf,comment FROM tests WHERE testname=? AND item_path != '' AND run_id=?;") ;; BUG! WHY NO run_id? test-name run-id) res)))) ;;====================================================================== @@ -3775,12 +3765,12 @@ (lambda (tag) (hash-table-set! res tag (delete-duplicates (cons testname (hash-table-ref/default res tag '()))))) tags))) - db - "SELECT testname,tags FROM test_meta") + (db:get-cache-stmth dbdat db + "SELECT testname,tags FROM test_meta")) (hash-table->alist res))))) ;; read the record given a testname (define (db:testmeta-get-record dbstruct testname) (let ((res #f)) @@ -3790,40 +3780,40 @@ #f (lambda (dbdat db) (sqlite3:for-each-row (lambda (id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup) (set! res (vector id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup))) - db - "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta WHERE testname=?;" + (db:get-cache-stmth dbdat db + "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta WHERE testname=?;") testname) res)))) ;; create a new record for a given testname (define (db:testmeta-add-record dbstruct testname) (db:with-db dbstruct #f #f (lambda (dbdat db) (sqlite3:execute - db - "INSERT OR IGNORE INTO test_meta (testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags) VALUES (?,'','','','','','','','');" testname)))) + (db:get-cache-stmth dbdat db + "INSERT OR IGNORE INTO test_meta (testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags) VALUES (?,'','','','','','','','');") testname)))) ;; update one of the testmeta fields (define (db:testmeta-update-field dbstruct testname field value) (db:with-db dbstruct #f #f (lambda (dbdat db) (sqlite3:execute - db - (conc "UPDATE test_meta SET " field "=? WHERE testname=?;") value testname)))) + (db:get-cache-stmth dbdat db + (conc "UPDATE test_meta SET " field "=? WHERE testname=?;")) value testname)))) (define (db:testmeta-get-all dbstruct) (db:with-db dbstruct #f #f (lambda (dbdat db) (let ((res '())) (sqlite3:for-each-row (lambda (a . b) (set! res (cons (apply vector a b) res))) - db - "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta;") + (db:get-cache-stmth dbdat db + "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta;")) res)))) ;;====================================================================== ;; M I S C M A N A G E M E N T I T E M S ;;====================================================================== Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -62,22 +62,22 @@ ;; (defstruct dbr:subdb (dbname #f) ;; .megatest/1.db (mtdbfile #f) ;; mtrah/.megatest/1.db (mtdbdat #f) ;; only need one of these for syncing - ;; (dbdats (make-hash-table)) ;; id => dbdat (tmpdbfile #f) ;; /tmp/.../.megatest/1.db - ;; (refndbfile #f) ;; /tmp/.../.megatest/1.db_ref - (dbstack (make-stack)) ;; stack for tmp dbr:dbdat, + (dbstack (make-stack)) ;; stack for tmp dbr:dbdat, ie. dbdats (homehost #f) ;; not used yet (on-homehost #f) ;; not used yet (read-only #f) (last-sync 0) (last-write (current-seconds)) ) ;; goal is to converge on one struct for an area but for now it is too confusing ;; need to keep dbhandles and cached statements together +;; This is the handle and associated file for a single db +;; (defstruct dbr:dbdat (dbfile #f) (dbh #f) (stmt-cache (make-hash-table)) (read-only #f) @@ -101,10 +101,23 @@ (dbfile:print-err ", error: " ((condition-property-accessor 'exn 'message) exn) ", arguments: " ((condition-property-accessor 'exn 'arguments) exn) ", location: " ((condition-property-accessor 'exn 'location) exn) )) + +;; the stmt-cache is associated with a single db handle +;; so there is no need for the hoh stuff. +(define (db:get-cache-stmth dbdat db stmt) + (let* ((stmt-cache (dbr:dbdat-stmt-cache dbdat)) + ;; (stmth (db:hoh-get stmt-cache db stmt)) + (stmth (hash-table-ref/default stmt-cache stmt #f)) + ) + (or stmth + (let* ((newstmth (sqlite3:prepare db stmt))) + ;; (db:hoh-set! stmt-cache db stmt newstmth) + (hash-table-set! stmt-cache stmt newstmth) + newstmth)))) (define (dbfile:run-id->key run-id) (or run-id 'main)) (define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3)) @@ -116,50 +129,37 @@ (print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn) (thread-sleep! 3) (sqlite3:interrupt! db) (db:safely-close-sqlite3-db db stmt-cache try-num: (- try-num 1))) (if (sqlite3:database? db) - (let* ((stmts (and stmt-cache (hash-table-ref/default stmt-cache db #f)))) - (if stmts (map sqlite3:finalize! (hash-table-values stmts))) + ;; (let* ((stmts (and stmt-cache (hash-table-ref/default stmt-cache #f)))) + (begin + (if stmt-cache + (map sqlite3:finalize! (hash-table-values stmt-cache))) (sqlite3:finalize! db) #t) (begin (dbfile:print-err "db:safely-close-sqlite3-db: " db " is not an sqlite3 db") - #f - ) - )))) + #f))))) ;; close all opened run-id dbs (define (db:close-all dbstruct) (if (dbr:dbstruct? dbstruct) -;; (handle-exceptions -;; exn -;; (begin -;; (debug:print 0 *default-log-port* "WARNING: Finalizing failed, " ((condition-property-accessor 'exn 'message) exn) ", note - exn=" exn) -;; (print-call-chain *default-log-port*)) - ;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server. - (let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct)))) - (for-each - (lambda (subdb) - (let* ((tdbs (stack->list (dbr:subdb-dbstack subdb))) - (mtdbdat (dbr:dbdat-dbh (dbr:subdb-mtdbdat subdb))) - #;(rdb (dbr:dbdat-dbh (dbr:subdb-refndb subdb)))) - - (map (lambda (dbdat) - (let* ((stmt-cache (dbr:dbdat-stmt-cache dbdat)) - (dbh (dbr:dbdat-dbh dbdat))) - (db:safely-close-sqlite3-db dbh stmt-cache))) - tdbs) - (db:safely-close-sqlite3-db mtdbdat (dbr:dbdat-stmt-cache (dbr:subdb-mtdbdat subdb))) - ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb)) - #;(db:safely-close-sqlite3-db rdb #f))) ;; stmt-cache))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb)))))) - subdbs) - #t - ) - #f - ) -) + (let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct)))) + (for-each + (lambda (subdb) + (let* ((tdbs (stack->list (dbr:subdb-dbstack subdb))) + (mtdbdat (dbr:dbdat-dbh (dbr:subdb-mtdbdat subdb)))) + (map (lambda (dbdat) + (let* ((stmt-cache (dbr:dbdat-stmt-cache dbdat)) + (dbh (dbr:dbdat-dbh dbdat))) + (db:safely-close-sqlite3-db dbh stmt-cache))) + tdbs) + (db:safely-close-sqlite3-db mtdbdat (dbr:dbdat-stmt-cache (dbr:subdb-mtdbdat subdb))))) + subdbs) + #t) + #f)) ;; ;; set up a single db (e.g. main.db, 1.db ... etc.) ;; ;; ;; (define (db:setup-db dbstruct areapath run-id) ;; (let* ((dbname (db:run-id->dbname run-id)) @@ -402,11 +402,10 @@ "cp "backupfname" "fname))) (dbfile:print-err "WARNING: attempting recovery of file "fname" by running commands:\n" " "cmd) (system cmd))) - (define (dbfile:open-no-sync-db dbpath) (if *no-sync-db* *no-sync-db* (begin (if (not (file-exists? dbpath)) @@ -419,27 +418,32 @@ (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));")) ))) (db (dbfile:cautious-open-database dbname init-proc 0 "WAL"))) ;; (sqlite3:open-database dbname))) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") ;; (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) ;; done in cautious-open-database + (set! *no-sync-set-prep* (sqlite3:prepare db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);")) + (set! *no-sync-del-prep* (sqlite3:prepare db "DELETE FROM no_sync_metadat WHERE var=?;")) + (set! *no-sync-get-prep* (sqlite3:prepare db "SELECT val FROM no_sync_metadat WHERE var=?;")) (set! *no-sync-db* db) db)))) + +(define *no-sync-set-prep* #f) +(define *no-sync-del-prep* #f) +(define *no-sync-get-prep* #f) (define (db:no-sync-set db var val) - (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val)) + (sqlite3:execute *no-sync-set-prep* var val)) (define (db:no-sync-del! db var) - (sqlite3:execute db "DELETE FROM no_sync_metadat WHERE var=?;" var)) + (sqlite3:execute *no-sync-del-prep* var)) (define (db:no-sync-get/default db var default) (let ((res default)) (sqlite3:for-each-row (lambda (val) (set! res val)) - db - "SELECT val FROM no_sync_metadat WHERE var=?;" - var) + *no-sync-get-prep* var) (if res (let ((newres (if (string? res) (string->number res) #f))) (if newres @@ -1011,10 +1015,11 @@ ;; (define (db:with-db dbstruct run-id r/w proc . params) (assert dbstruct "FATAL: db:with-db called with dbstruct "#f) (assert (dbr:dbstruct? dbstruct) "FATAL: dbstruct is "dbstruct) (let* ((use-mutex (> *api-process-request-count* 25)) ;; risk of db corruption + (load-delay (/ *api-process-request-count* 25)) (have-struct (dbr:dbstruct? dbstruct)) (dbdat (if have-struct ;; this stuff just allows us to call with a db handle directly (db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id) #f)) (db (if have-struct ;; this stuff just allows us to call with a db handle directly @@ -1032,16 +1037,20 @@ (if (file-exists? jfile) (begin (dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load") (thread-sleep! 0.2))) (if (and use-mutex - (common:low-noise-print 120 "over-50-parallel-api-requests")) + (common:low-noise-print 120 "over-25-parallel-api-requests")) (dbfile:print-err *api-process-request-count* " parallel api requests being processed in process " - (current-process-id))) ;; ", throttling access")) + (current-process-id) + ", throttling access for "load-delay" seconds.")) (condition-case (begin - (if use-mutex (mutex-lock! *db-with-db-mutex*)) + (if use-mutex + (begin + (thread-sleep! load-delay) ;; time to slow everything down + (mutex-lock! *db-with-db-mutex*))) (let ((res (apply proc dbdat db params))) ;; the actual call is here. (if use-mutex (mutex-unlock! *db-with-db-mutex*)) ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) (if dbdat (dbfile:add-dbdat dbstruct run-id dbdat)) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -241,27 +241,19 @@ (mutex-lock! *http-mutex*) (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))) ;; Send "cmd" with json payload "params" to serverdat and receive result ;; -(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)(area-dat #f)) - (let* ((fullurl (if (vector? serverdat) - (http-transport:server-dat-get-api-req serverdat) - (begin - (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info") - (exit 1)))) +(define (http-transport:client-api-send-receive run-id runremote cmd params #!key (numretries 3)) + (assert (remote? runremote) "FATAL: http-transport:client-api-send-receive called with serverdat="serverdat) + (let* ((fullurl (remote-api-req runremote)) (res (vector #f "uninitialized")) (success #t) (sparams (db:obj->string params transport: 'http)) - (runremote (or area-dat *runremote*)) - (server-id (if (vector? serverdat) - (http-transport:server-dat-get-server-id serverdat) - (begin - (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info") - (exit 1))))) + (server-id (remote-server-id runremote))) (debug:print-info 11 *default-log-port* "cmd=" cmd " fullurl=" fullurl " server-id=" server-id " current time:" (current-seconds)) - + (assert fullurl "FATAL: http-transposrt:client-api-send-receive remote-api-req not set") ;; set up the http-client here (max-retry-attempts 1) ;; consider all requests indempotent (retry-request? (lambda (request) #f)) @@ -286,22 +278,13 @@ (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") (debug:print 0 *default-log-port* " message: " msg ", exn=" exn) (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey")) (debug:print 0 *default-log-port* " call-chain: " call-chain))) ;; what if another thread is communicating ok? Can't happen due to mutex - (set! *runremote* #f) - (set! runremote #f) - ;; (if runremote - ;; (remote-conndat-set! runremote #f)) - ;; Killing associated server to allow clean retry.") - ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? + (http-transport:close-connections runremote) (mutex-unlock! *http-mutex*) - ;; (signal (make-composite-condition - ;; (make-property-condition 'commfail 'message "failed to connect to server"))) - ;; "communications failed" - ;; (close-all-connections!) - (close-connection! fullurl) + ;; (close-connection! fullurl) (db:obj->string #f)) (with-input-from-request ;; was dat fullurl (list (cons 'key (or server-id "thekey")) (cons 'cmd cmd) @@ -347,69 +330,27 @@ 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server"))))))) ;; careful closing of connections stored in *runremote* ;; (define (http-transport:close-connections runremote) - (let* (;; (runremote (or area-dat *runremote*)) - (server-dat (if runremote - (remote-conndat runremote) - #f))) ;; (hash-table-ref/default *runremote* run-id #f))) - (if (vector? server-dat) - (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) - (handle-exceptions + (if (remote? runremote) + (let ((api-dat (remote-api-uri runremote))) + (handle-exceptions exn - (begin - (print-call-chain *default-log-port*) - (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) - (if (args:any-defined? "-server" "-execute" "-run") - (debug:print-info 0 *default-log-port* "Closing connections to "api-dat)) - (close-connection! api-dat) - ;; (close-connection! (http-transport:server-dat-make-url server-dat)) - (remote-conndat-set! runremote #f) - #t)) - #f))) - - -(define (make-http-transport:server-dat)(make-vector 6)) -(define (http-transport:server-dat-get-iface vec) (vector-ref vec 0)) -(define (http-transport:server-dat-get-port vec) (vector-ref vec 1)) -(define (http-transport:server-dat-get-api-uri vec) (vector-ref vec 2)) -(define (http-transport:server-dat-get-api-url vec) (vector-ref vec 3)) -(define (http-transport:server-dat-get-api-req vec) (vector-ref vec 4)) -(define (http-transport:server-dat-get-last-access vec) (vector-ref vec 5)) -;(define (http-transport:server-dat-get-socket vec) (vector-ref vec 6)) -(define (http-transport:server-dat-get-server-id vec) (vector-ref vec 6)) - -(define (http-transport:server-dat-make-url vec) - (if (and (http-transport:server-dat-get-iface vec) - (http-transport:server-dat-get-port vec)) - (conc "http://" - (http-transport:server-dat-get-iface vec) - ":" - (http-transport:server-dat-get-port vec)) + (begin + (print-call-chain *default-log-port*) + (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) + (if (not (args:get-arg "-server")) + (begin ;; NOTE: Verify this actually ever gets hit. Jan 16, 2023. + (if api-dat + (begin + (debug:print-info 0 *default-log-port* "Closing connections to "api-dat) + (close-connection! api-dat))) + (remote-api-req-set! runremote #f) ;; use api-req as a flag for a working connection + #t) + #f))) #f)) - -(define (http-transport:server-dat-update-last-access vec) - (if (vector? vec) - (vector-set! vec 5 (current-seconds)) - (begin - (print-call-chain (current-error-port)) - (debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!")))) - -;; -;; connect -;; -(define (http-transport:client-connect iface port server-id) - (debug:print-info 2 *default-log-port* "Connecting to server at "iface":"port", id "server-id) - (let* ((api-url (conc "http://" iface ":" port "/api")) - (api-uri (uri-reference (conc "http://" iface ":" port "/api"))) - (api-req (make-request method: 'POST uri: api-uri)) - (server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id))) - server-dat)) - - - ;; run http-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; (define (http-transport:keep-running) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -44,16 +44,16 @@ ;; if a server is either running or in the process of starting call client:setup ;; else return #f to let the calling proc know that there is no server available ;; (define (rmt:get-connection-info areapath runremote) ;; TODO: push areapath down. (let* ((cinfo (if (remote? runremote) - (remote-conndat runremote) + (remote-api-req runremote) #f))) (if cinfo cinfo (if (server:check-if-running areapath) - (client:setup areapath) + (client:setup areapath runremote) #f)))) (define (rmt:on-homehost? runremote) (let* ((hh-dat (remote-hh-dat runremote))) (if (pair? hh-dat) @@ -114,12 +114,12 @@ (begin (set! *runremote* (make-remote)) (let* ((server-info (remote-server-info *runremote*))) (if server-info (begin - (remote-server-url-set! *runremote* (server:record->url server-info)) - (remote-server-id-set! *runremote* (server:record->id server-info))))) + (remote-server-url-set! *runremote* (server:record->url server-info)) + (remote-server-id-set! *runremote* (server:record->id server-info))))) (set! runremote *runremote*))) ;; new runremote will come from this on next iteration ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"]; ;; DOT SET_HOMEHOST -> MUTEXLOCK; @@ -129,16 +129,16 @@ (let ((hh-data (server:choose-server areapath 'homehost))) (remote-hh-dat-set! runremote (or hh-data (cons #f #f))))) ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) (cond - #;((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds - (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.") - (set! *runremote* #f) - ;; BUG: close-connections should go here? - (mutex-unlock! *rmt-mutex*) - (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat)) + ;; ((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds + ;; (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.") + ;; (set! *runremote* #f) + ;; ;; BUG: close-connections should go here? + ;; (mutex-unlock! *rmt-mutex*) + ;; (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat)) ;;DOT EXIT; ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } ;; give up if more than 150 attempts ((> attemptnum 150) @@ -169,18 +169,17 @@ ;;DOT CASE4 [label="reset\nconnection"]; ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4} ;;DOT CASE4 -> "rmt:send-receive"; ;; reset the connection if it has been unused too long ((and runremote - (remote-conndat runremote) + ;; (remote-conndat runremote) (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on - (+ (http-transport:server-dat-get-last-access (remote-conndat runremote)) + (+ (remote-last-access runremote) (remote-server-timeout runremote)))) (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") (http-transport:close-connections runremote) ;; moving this setting of runremote conndat to #f to inside the http-transport:close-connections - ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. (mutex-unlock! *rmt-mutex*) (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;;DOT CASE5 [label="local\nread"]; ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5}; @@ -198,27 +197,27 @@ ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6}; ;;DOT CASE6 -> "rmt:send-receive"; ;; on homehost and this is a write, we already have a server, but server has died ;; reinstate this keep-alive section but inject a time condition into the (add ... - - #;((and (cdr (remote-hh-dat runremote)) ;; on homehost - (not (member cmd api:read-only-queries)) ;; this is a write - (remote-server-url runremote) ;; have a server - (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. - (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6") - (http-transport:close-connections area-dat: runremote) ;; make sure to clean up - (set! *runremote* (make-remote)) - (let* ((server-info (remote-server-info *runremote*))) - (if server-info - (begin - (remote-server-url-set! *runremote* (server:record->url server-info)) - (remote-server-id-set! *runremote* (server:record->id server-info))))) - (remote-force-server-set! runremote (common:force-server?)) - (mutex-unlock! *rmt-mutex*) - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") - (rmt:send-receive cmd rid params attemptnum: attemptnum)) + ;; + ;; ((and (cdr (remote-hh-dat runremote)) ;; on homehost + ;; (not (member cmd api:read-only-queries)) ;; this is a write + ;; (remote-server-url runremote) ;; have a server + ;; (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. + ;; (debug:print 0 *default-log-port* "WARNING: server appears to have died, trying to reconnect, case 6") + ;; (http-transport:close-connections area-dat: runremote) ;; make sure to clean up + ;; (set! *runremote* (make-remote)) + ;; (let* ((server-info (remote-server-info *runremote*))) + ;; (if server-info + ;; (begin + ;; (remote-server-url-set! *runremote* (server:record->url server-info)) + ;; (remote-server-id-set! *runremote* (server:record->id server-info))))) + ;; (remote-force-server-set! runremote (common:force-server?)) + ;; (mutex-unlock! *rmt-mutex*) + ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") + ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;;DOT CASE7 [label="homehost\nwrite"]; ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7}; ;;DOT CASE7 -> "rmt:open-qry-close-locally"; ;; on homehost and this is a write, we already have a server @@ -254,18 +253,18 @@ ;;DOT CASE9 [label="force server\nnot on homehost"]; ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9}; ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive"; ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one - (not (remote-conndat runremote))) + (not (remote-api-req runremote))) (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost - (not (remote-conndat runremote)))) ;; and no connection - (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) + (not (remote-api-req runremote)))) ;; and no connection + (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-api-req runremote)) (mutex-unlock! *rmt-mutex*) (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up? (server:start-and-wait *toppath*)) - (remote-conndat-set! runremote (rmt:get-connection-info *toppath* runremote)) ;; calls client:setup which calls client:setup-http + (rmt:get-connection-info *toppath* runremote) ;; calls client:setup which calls client:setup-http (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as ;;DOT CASE10 [label="on homehost"]; ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10}; ;;DOT CASE10 -> "rmt:open-qry-close-locally"; @@ -282,69 +281,41 @@ ;;DOT CASE11 -> "RESULT" [label="call succeeded"]; ;; not on homehost, do server query (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))) ;;DOT } -;; No Title -;; Error: (vector-ref) out of range -;; #(# (#("db.scm:3740: regex#regexp" #f #f) #("db.scm:3739: regex#string-substitute" #f #f) #("db.scm:3738: base64#base64-decode" #f #f) #("db.scm:3737: z3#z3:decode-buffer" #f #f) #("db.scm:3736: with-input-from-string" #f #f) #("db.scm:3741: s11n#deserialize" #f #f) #("api.scm:374: api:execute-requests" #f #f) #("api.scm:139: call-with-current-continuation" #f #f) #("api.scm:139: with-exception-handler" #f #f) #("api.scm:139: ##sys#call-with-values" #f #f) #("api.scm:158: string->symbol" #f #f) #("api.scm:160: current-milliseconds" #f #f) #("api.scm:161: dbr:dbstruct-read-only" #f #f) #("api.scm:139: k15" #f #f) #("api.scm:139: g19" #f #f) #("api.scm:142: get-call-chain" #f #f)) #("get-test-info-by-id" (1102 507299))) -;; 6 -;; -;; Call history: -;; -;; http-transport.scm:306: thread-terminate! -;; http-transport.scm:307: debug:print-info -;; common_records.scm:235: debug:debug-mode -;; rmt.scm:259: k587 -;; rmt.scm:259: g591 -;; rmt.scm:276: http-transport:server-dat-update-last-access -;; http-transport.scm:364: current-seconds -;; rmt.scm:282: debug:print-info -;; common_records.scm:235: debug:debug-mode -;; rmt.scm:283: mutex-unlock! -;; rmt.scm:287: extras-transport-succeded <-- -;; +-----------------------------------------------------------------------------+ -;; | Exit Status : 70 -;; - ;; bunch of small functions factored out of send-receive to make debug easier ;; (define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid) ;; (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") ;; (mutex-lock! *rmt-mutex*) - (let* ((conninfo (remote-conndat runremote)) - (dat-in (case (remote-transport runremote) - ((http) (condition-case ;; handling here has - ;; caused a lot of - ;; problems. However it - ;; is needed to deal with - ;; attemtped - ;; communication to - ;; servers that have gone - ;; away - (http-transport:client-api-send-receive 0 conninfo cmd params) - ((servermismatch) (vector #f "Server id mismatch" )) - ((commfail)(vector #f "communications fail")) - ((exn)(vector #f "other fail" (print-call-chain))))) - (else - (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported") - (exit)))) + (let* ((dat-in (condition-case ;; handling here has + ;; caused a lot of + ;; problems. However it + ;; is needed to deal with + ;; attemtped + ;; communication to + ;; servers that have gone + ;; away + (http-transport:client-api-send-receive 0 runremote cmd params) + ((servermismatch) (vector #f "Server id mismatch" )) + ((commfail)(vector #f "communications fail")) + ((exn)(vector #f "other fail" (print-call-chain))))) (dat (if (and (vector? dat-in) ;; ... check it is a correct size (> (vector-length dat-in) 1)) dat-in (vector #f (conc "communications fail (type 2), dat-in=" dat-in)))) (success (if (vector? dat) (vector-ref dat 0) #f)) (res (if (vector? dat) (vector-ref dat 1) #f))) - (if (and (vector? conninfo) (< 5 (vector-length conninfo))) - (http-transport:server-dat-update-last-access conninfo) ;; refresh access time + (if (remote? runremote) + (remote-last-access-set! runremote (current-seconds)) ;; refresh access time (begin - (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo) - (set! conninfo #f) + (debug:print 0 *default-log-port* "INFO: Should not get here! runremote="runremote) (http-transport:close-connections runremote))) - (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote) + (debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. runremote="runremote " dat=" dat) (mutex-unlock! *rmt-mutex*) (if success ;; success only tells us that the transport was ;; successful, have to examine the data to see if ;; there was a detected issue at the other end (extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd) @@ -431,18 +402,13 @@ (mutex-lock! *db-multi-sync-mutex*) / (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) (mutex-unlock! *db-multi-sync-mutex*))))) res)) -(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) +(define (rmt:send-receive-no-auto-client-setup runremote cmd run-id params) (let* ((run-id (if run-id run-id 0)) - (res ;; (handle-exceptions - ;; exn - ;; (begin - ;; (print "transport failed. exn=" exn) - ;; #f) - (http-transport:client-api-send-receive run-id connection-info cmd params))) ;; ) + (res (http-transport:client-api-send-receive run-id runremote cmd params))) (if (and res (vector-ref res 0)) (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!! #f))) ;;====================================================================== @@ -469,15 +435,12 @@ (rmt:send-receive 'login run-id (list *toppath* megatest-version (client:get-signature)))) ;; This login does no retries under the hood - it acts a bit like a ping. ;; Deprecated for nmsg-transport. ;; -(define (rmt:login-no-auto-client-setup connection-info) - (case *transport-type* ;; run-id of 0 is just a placeholder - ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version (client:get-signature)))) - ;;((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info 'login (list *toppath* megatest-version run-id *my-client-signature*))) - )) +(define (rmt:login-no-auto-client-setup runremote) + (rmt:send-receive-no-auto-client-setup runremote 'login 0 (list *toppath* megatest-version (client:get-signature)))) ;; hand off a call to one of the db:queries statements ;; added run-id to make looking up the correct db possible ;; (define (rmt:general-call stmtname run-id . params) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -664,48 +664,43 @@ ;; called in megatest.scm, host-port is string hostname:port ;; ;; NOTE: This is NOT called directly from clients as not all transports support a client running ;; in the same process as the server. ;; -(define (server:ping host-port-in server-id #!key (do-exit #f)) - (let ((host:port (if (not host-port-in) ;; use read-dotserver to find - #f ;; (server:check-if-running *toppath*) - ;; (if (number? host-port-in) ;; we were handed a server-id - ;; (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in))) - ;; ;; (print "srec: " srec " host-port-in: " host-port-in) - ;; (if srec - ;; (conc (vector-ref srec 3) ":" (vector-ref srec 4)) - ;; (conc "no such server-id " host-port-in))) - host-port-in))) ;; ) - (let* ((host-port (if host:port - (let ((slst (string-split host:port ":"))) - (if (eq? (length slst) 2) - (list (car slst)(string->number (cadr slst))) - #f)) - #f))) -;; (toppath (launch:setup))) - ;; (print "host-port=" host-port) - (if (not host-port) - (begin - (if host-port-in - (debug:print 0 *default-log-port* "ERROR: bad host:port")) - (if do-exit (exit 1)) - #f) - (let* ((iface (car host-port)) - (port (cadr host-port)) - (server-dat (http-transport:client-connect iface port server-id)) - (login-res (rmt:login-no-auto-client-setup server-dat))) - (if (and (list? login-res) - (car login-res)) - (begin - ;; (print "LOGIN_OK") - (if do-exit (exit 0)) - #t) - (begin - ;; (print "LOGIN_FAILED") - (if do-exit (exit 1)) - #f))))))) +(define (server:ping host:port server-id #!key (do-exit #f)) + (let* ((host-port (cond + ((string? host:port) + (let ((slst (string-split host:port ":"))) + (if (eq? (length slst) 2) + (list (car slst)(string->number (cadr slst))) + #f))) + (else + #f)))) + (cond + ((and (list? host-port) + (eq? (length host-port) 2)) + (let* ((myrunremote (make-remote)) + (iface (car host-port)) + (port (cadr host-port)) + (server-dat (client:connect iface port server-id myrunremote)) + (login-res (rmt:login-no-auto-client-setup myrunremote))) + (if (and (list? login-res) + (car login-res)) + (begin + ;; (print "LOGIN_OK") + (if do-exit (exit 0)) + #t) + (begin + ;; (print "LOGIN_FAILED") + (if do-exit (exit 1)) + #f)))) + (else + (if host:port + (debug:print 0 *default-log-port* "ERROR: bad host:port "host:port)) + (if do-exit + (exit 1) + #f))))) ;; run ping in separate process, safest way in some cases ;; (define (server:ping-server ifaceport) (with-input-from-pipe