Index: TODO ================================================================== --- TODO +++ TODO @@ -16,12 +16,17 @@ # along with Megatest. If not, see . TODO ==== -NextSteps -. Remove servermod.scm +Loose ends +---------- + +. -list-servers not correct +. move *remotedat* into bigdata +. add back server stats on exit (look in rmt:run in rmtmod.scm) + WW15 . fill newview matrix with data, filter pipeline gui elements . improve [script], especially indent handling Index: apimod.scm ================================================================== --- apimod.scm +++ apimod.scm @@ -39,10 +39,11 @@ chicken.process-context.posix chicken.string chicken.time chicken.condition chicken.process + chicken.pathname chicken.random chicken.file ;; (prefix sqlite3 sqlite3:) typed-records @@ -166,15 +167,20 @@ tasks-add tasks-set-state-given-param-key )) (define (api:run-server-process apath dbname) - (let* ((cmd (conc "nbfake megatest -server - -area "apath - " -db "dbname)) - (cleandbname (string-translate dbname "./" "_-")) - (logd (conc apath "/logs")) - (logf (conc logd "/server-"(current-seconds)cleandbname".log"))) + (let* ((cleandbname (pathname-strip-directory dbname)) ;; (string-translate dbname "./" "--")) + (logd (conc apath "/logs")) + (logf (conc logd "/server-launch-";;(current-process-id) + (seconds->year-work-week/day-time-fname (current-seconds)) + "-"cleandbname".log")) + (logf2 (conc logd "/server-" + (seconds->year-work-week/day-time-fname (current-seconds)) + "-"cleandbname"-")) + (cmd (conc "nbfake megatest -server - -area "apath + " -db "dbname" -autolog "logf2))) (if (not (directory-exists? logd)) (create-directory logd #t)) (system (conc "NBFAKE_LOG="logf" "cmd)))) ;; special function to get server @@ -203,15 +209,16 @@ ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl ;; SERVERS ;; ((start-server) (apply server:kind-run params)) ((kill-server) (set! *server-run* #f)) - ((get-server) (api:start-server dbstruct params)) + ((start-server get-server) (api:start-server dbstruct params)) + ((get-server-info) (apply db:get-server-info dbstruct params)) ((register-server) (apply db:register-server dbstruct params) );; dbstruct host port servkey pid ipaddr dbpath) ((deregister-server) (apply db:deregister-server dbstruct params) );; dbstruct host port servkey pid ipaddr dbpath) ((get-count-servers) (apply db:get-count-servers dbstruct params)) - + ((get-servers-info) (apply db:get-servers-info dbstruct params)) ;; TESTS ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items. ((test-set-state-status-by-id) @@ -343,10 +350,11 @@ ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params)) ((get-main-run-stats) (apply db:get-main-run-stats dbstruct params)) + ((log-to-main) (apply debug:print 0 *default-log-port* params)) ((get-var) (apply db:get-var dbstruct params)) ((get-run-stats) (apply db:get-run-stats dbstruct params)) ((get-run-times) (apply db:get-run-times dbstruct params)) ;; STEPS ADDED build-assist/README Index: build-assist/README ================================================================== --- /dev/null +++ build-assist/README @@ -0,0 +1,28 @@ +Here is how I like to install chicken for building Megatest. + +This guide assumes you have the Megatest fossil and are in the build-assist directory and +that you have the opensrc fossil with uv synced: + +fossil clone https://www.kiatoa.com/fossils/megatest +fossil clone https://www.kiatoa.com/fossils/opensrc;cd opensrc;fossil uv sync + +Make a build directory and go to it: + +mkdir build;cd build + +Make a destination directory and set PREFIX + +export PREFIX=/opt/chicken/5.3.0; mkdir -p $PREFIX + +Get chicken: + +wget https://code.call-cc.org/releases/5.3.0/chicken-5.3.0.tar.gz + +Extract, build, and install chicken: + +tar xf chicken-5.3.0.tar.gz; cd chicken-5.3.0; make PLATFORM=linux PREFIX=$PREFIX install; cd .. + +Install all needed eggs. +for egg in $(cat ../ck5-egg.list);do echo $egg;ck5 chicken-install $egg;done + +Now run the script ../iup-compile.sh for remaining instructions Index: build-assist/ck5 ================================================================== --- build-assist/ck5 +++ build-assist/ck5 @@ -1,9 +1,17 @@ #!/bin/bash -export PATH=/home/matt/data/buildall/ck5.2/bin:$PATH -if [[ -z /home/matt/data/buildall/ck5.2/lib:/home/matt/data/buildall/ck5.2/lib64 ]];then - export LD_LIBRARY_PATH=/home/matt/data/buildall/ck5.2/lib:/home/matt/data/buildall/ck5.2/lib64:$LD_LIBRARY_PATH +# /opt/chicken/5.3.0 +# WHICHCKVER=5.1.0_WW45 +WHICHCKVER=5.3.0 + +BASEDIR=/opt/chicken/$WHICHCKVER +export PATH="$BASEDIR/bin:$PATH" + +NEW_LD_LIBRARY_PATH="$BASEDIR/lib:$BASEDIR/lib64" +if [[ -z "$LD_LIBRARY_PATH" ]];then + export LD_LIBRARY_PATH=$NEW_LD_LIBRARY_PATH else - export LD_LIBRARY_PATH=/home/matt/data/buildall/ck5.2/lib:/home/matt/data/buildall/ck5.2/lib64 + export LD_LIBRARY_PATH="$NEW_LD_LIBRARY_PATH:$LD_LIBRARY_PATH" fi -export CHICKEN_DOC_PAGER=cat + exec "$@" + Index: build-assist/ck5-eggs.list ================================================================== --- build-assist/ck5-eggs.list +++ build-assist/ck5-eggs.list @@ -1,19 +1,22 @@ +csm address-info ansi-escape-sequences apropos base64 crypt csv-abnf directory-utils +dot-locking filepath fmt format http-client itemsmod json linenoise +mailbox md5 message-digest nanomsg postgresql queues @@ -36,9 +39,10 @@ srfi-19 sxml-modifications sxml-serializer sxml-transforms system-information +tcp6 test typed-records uri-common z3 ADDED build-assist/installing-nng Index: build-assist/installing-nng ================================================================== --- /dev/null +++ build-assist/installing-nng @@ -0,0 +1,23 @@ +wget https://github.com/nanomsg/nng/archive/refs/tags/v1.5.2.tar.gz +tar xf v1.5.2.tar.gz +cd nng-1.5.2 +mkdir build +cd build +make +sudo make install +vi CMakeCache.txt + +Change OFF to ON for shared libraries: + +//Build shared library +BUILD_SHARED_LIBS:BOOL=ON + +make +sudo make install +sudo ldconfig + +chicken-install nng +-or- +git clone https://gitlab.com/ariSun/chicken-nng.git +cd chicken-ngg;chicken-install + Index: build-assist/iup-compile.sh ================================================================== --- build-assist/iup-compile.sh +++ build-assist/iup-compile.sh @@ -4,11 +4,16 @@ fi echo "Put iup, im and cd .a and .so files in PREFIX/lib" echo " 1. get opensrc fossil from https://www.kiatoa.com/fossils/opensrc" echo " 2. list the unversioned files and export the cd, im and iup lib for your kernel (try uname -a for the kernel number) 4.15 ==> 415_64" +echo ' for x in $(fossil uv list | grep 415| awk ''{print $6}'');do targ=$(echo $x|cut -d/ -f3); fossil uv export $x $targ; done' echo " 3. untar iup, im and cp tars into a clean working dir and then copy:" +echo " find . -name \*.a -print -exec cp {} $PREFIX/lib \;" +echo " find . -name \*.so -print -exec cp {} $PREFIX/lib \;" +echo " rsync -av include/ $PREFIX/include/" +echo " or (depending on versions and what you see in the iup tars - they seem to vary" echo " cp *.a *.so $PREFIX/lib" echo " cp include/*.h $PREFIX/include" echo " 4. run the chicken-install like this:" echo "If you use a wrapper (e.g. ck5) to create the chicken environment:" Index: commonmod.scm ================================================================== --- commonmod.scm +++ commonmod.scm @@ -275,16 +275,16 @@ launch:is-test-alive common:get-num-cpus common:wait-for-normalized-load common:wait-for-cpuload tasks:kill-server -server:get-logs-list -server:get-list -server:get-num-alive -server:get-best -server:get-first-best -server:get-rand-best +;; server:get-logs-list +;; server:get-list +;; server:get-num-alive +;; server:get-best +;; server:get-first-best +;; server:get-rand-best server:record->id server:get-num-servers server:logf-get-start-info get-uname realpath @@ -314,10 +314,11 @@ seconds->time-string seconds->work-week/day-time seconds->work-week/day seconds->year-work-week/day seconds->year-work-week/day-time +seconds->year-work-week/day-time-fname seconds->year-week/day-time seconds->quarter common:date-time->seconds common:find-start-mark-and-mark-delta common:expand-cron-slash @@ -2825,21 +2826,21 @@ (system (conc "gzip " logfile)) (unset-environment-variable! "TARGETHOST_LOGF") (unset-environment-variable! "TARGETHOST")))) -(define (server:get-logs-list area-path) +#;(define (server:get-logs-list area-path) (let* (;; (server-logs-cmd (conc "grep -iL exiting " area-path "/logs/server-*-*.log")) ;; (server-logs (string-split (string-chomp (with-input-from-pipe server-logs-cmd read-string)))) (server-logs (glob (conc area-path"/logs/server-*-*.log"))) ) server-logs)) ;; get a list of servers with all relevant data ;; ( mod-time host port start-time pid ) ;; -(define (server:get-list areapath #!key (limit #f)) +#;(define (server:get-list areapath #!key (limit #f)) (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) (day-seconds (* 24 60 60))) ;; if the directory exists continue to get the list ;; otherwise attempt to create the logs dir and then ;; continue @@ -2887,11 +2888,11 @@ (> (length new-res) limit)) new-res ;; (take new-res limit) <= need intelligent sorting before this will work new-res) (loop (string-chomp (car tal)) (cdr tal) new-res))))))))) -(define (server:get-num-alive srvlst) +#;(define (server:get-num-alive srvlst) (let ((num-alive 0)) (for-each (lambda (server) (handle-exceptions exn @@ -2914,11 +2915,11 @@ ;; mod-time host port start-time pid ;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off ;; and servers should stick around for about two hours or so. ;; -(define (server:get-best srvlst) +#;(define (server:get-best srvlst) (let* ((nums (server:get-num-servers)) (now (current-seconds)) (slst (sort (filter (lambda (rec) (if (and (list? rec) @@ -2942,18 +2943,18 @@ (list-ref b 3)))))) (if (> (length slst) nums) (take slst nums) slst))) -(define (server:get-first-best areapath) +#;(define (server:get-first-best areapath) (let ((srvrs (server:get-best (server:get-list areapath)))) (if (and srvrs (not (null? srvrs))) (car srvrs) #f))) -(define (server:get-rand-best areapath) +#;(define (server:get-rand-best areapath) (let ((srvrs (server:get-best (server:get-list areapath)))) (if (and (list? srvrs) (not (null? srvrs))) (let* ((len (length srvrs)) (idx (pseudo-random-integer len))) @@ -3579,10 +3580,14 @@ (seconds->local-time sec) "ww%V.%u")) (define (seconds->year-work-week/day sec) (time->string (seconds->local-time sec) "%yww%V.%w")) + +(define (seconds->year-work-week/day-time-fname sec) + (time->string + (seconds->local-time sec) "%yww%V.%w.%H%M%S")) (define (seconds->year-work-week/day-time sec) (time->string (seconds->local-time sec) "%Yww%V.%w %H:%M")) Index: configfmod.scm ================================================================== --- configfmod.scm +++ configfmod.scm @@ -86,10 +86,11 @@ (prefix base64 base64:) (prefix dbi dbi:) (prefix sqlite3 sqlite3:) (srfi 18) directory-utils + dot-locking format matchable md5 message-digest regex @@ -112,10 +113,15 @@ ;; parameters ;;====================================================================== ;; while targets are Megatest specific they are a useful concept (define mytarget (make-parameter #f)) + +;; locking is optional, many environments don't care (e.g. running on one machine) +;; NOTE: the locker must follow the same syntax as with-dot-lock* +;; +(define my-with-lock (make-parameter with-dot-lock*)) ;;====================================================================== ;; move debug stuff to separate module then put these back where they belong ;;====================================================================== ;;====================================================================== @@ -1186,34 +1192,36 @@ ;; DO THE LOCKING AROUND THE CALL ;;====================================================================== ;; (define (configf:write-alist cdat fname) ;; (if (not (common:faux-lock fname)) - (debug:print 0 *default-log-port* "INFO: NEED LOCKING ADDED HERE " fname) - (let* ((dat (configf:config->alist cdat)) - (res - (begin - (with-output-to-file fname ;; first write out the file - (lambda () - (pp dat))) - ;; I don't like this. It makes write-alist opaque and complicated. -mrw- - (if (file-exists? fname) ;; now verify it is readable - (if (configf:read-alist fname) - #t ;; data is good. - (begin - (handle-exceptions + ;; (debug:print 0 *default-log-port* "INFO: NEED LOCKING ADDED HERE " fname) + ((my-with-lock) + fname + (lambda () + (let* ((dat (configf:config->alist cdat)) + (res + (begin + (with-output-to-file fname ;; first write out the file + (lambda () + (pp dat))) + ;; I don't like this. It makes write-alist opaque and complicated. -mrw- + (if (file-exists? fname) ;; now verify it is readable + (if (configf:read-alist fname) + #t ;; data is good. + (begin + (handle-exceptions exn - (begin - (debug:print 0 *default-log-port* "deleting " fname " failed, exn=" exn) - #f) - (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.") - (delete-file fname)) - #f)) - #f)))) - ;; (common:faux-unlock fname) - res)) + (begin + (debug:print 0 *default-log-port* "deleting " fname " failed, exn=" exn) + #f) + (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.") + (delete-file fname)) + #f)) + #f)))) + res)))) (define (common:get-fields cfgdat) (let ((fields (hash-table-ref/default cfgdat "fields" '()))) (map car fields))) ) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -42,10 +42,11 @@ (declare (uses subrunmod)) (declare (uses tree)) (declare (uses vgmod)) (declare (uses testsmod)) (declare (uses tasksmod)) +(declare (uses dbi)) ;; needed for configf scripts, scheme etc. ;; (declare (uses apimod.import)) ;; (declare (uses debugprint.import)) ;; (declare (uses mtargs.import)) @@ -89,10 +90,11 @@ (prefix iup iup:) canvas-draw canvas-draw-iup (prefix sqlite3 sqlite3:) + (prefix dbi dbi:) srfi-1 regex regex-case srfi-69 typed-records sparse-vectors format @@ -238,10 +240,11 @@ ;; ;;;(if (file-write-access? (conc *toppath* "/megatest.db")) ;;(debug:print-info 13 *default-log-port* "Before common:watchdog spawn") (thread-start! (make-thread common:watchdog "Watchdog thread")) + ;;(debug:print-info 13 *default-log-port* "After common:watchdog spawn") ;; (if (not (args:get-arg "-use-db-cache")) ;; (begin ;; (debug:print-info 0 *default-log-port* "Forcing db-cache mode due to read-only access to megatest.db") ;; (hash-table-set! args:arg-hash "-use-db-cache" #t)));;;) @@ -2842,11 +2845,11 @@ (dashboard:runs-horizontal-slider runs-dat)))) controls )) (views-cfgdat (common:load-views-config)) (additional-tabnames '()) - (tab-start-num 5) ;; DON'T FORGET TO UPDATE THIS WHEN CHANGING THE STANDARD TABS BELOW + (tab-start-num 6) ;; DON'T FORGET TO UPDATE THIS WHEN CHANGING THE STANDARD TABS BELOW ;; (data (dboard:tabdat-init (make-d:data))) (additional-views ;; process views-dat (let ((tab-num tab-start-num) (result '())) (for-each @@ -2889,18 +2892,21 @@ runs-view ;; (make-runs-view commondat runs2-dat 2) (dashboard:runs-summary commondat onerun-dat tab-num: 2) (dashboard:run-controls commondat runcontrols-dat tab-num: 3) (dashboard:run-times commondat runtimes-dat tab-num: 4) + (iup:vbox (iup:button "Pushme")) ;; tab 5 additional-views))) ;; (set! (iup:callback tabs tabchange-cb:) (lambda (a b c)(print "SWITCHED TO TAB: " a " " b " " c))) (iup:attribute-set! tabs "TABTITLE0" "Summary") (iup:attribute-set! tabs "TABTITLE1" "Runs") ;; (iup:attribute-set! tabs "TABTITLE2" "Runs2") (iup:attribute-set! tabs "TABTITLE2" "Run Summary") (iup:attribute-set! tabs "TABTITLE3" "Run Control") (iup:attribute-set! tabs "TABTITLE4" "Run Times") + (iup:attribute-set! tabs "TABTITLE5" "Sys Status") + ;; (iup:attribute-set! tabs "TABTITLE3" "New View") ;; (iup:attribute-set! tabs "TABTITLE4" "Run Control") ;; set the tab names for user added tabs (for-each @@ -3664,11 +3670,11 @@ ;;====================================================================== ;; The heavy lifting starts here ;;====================================================================== (define (dashboard-main) - (let ((mtdb-path (conc *toppath* "/megatest.db"))) ;; + (let ((mtdb-path (conc *toppath* "/megatest.db"))) ;; not used for now, update for .db area and use for write access detection #;(if (and (common:file-exists? mtdb-path) (file-writable? mtdb-path)) (if (not (args:get-arg "-skip-version-check")) (common:exit-on-version-changed))) (let* ((commondat (dboard:commondat-make))) Index: dbi.scm ================================================================== --- dbi.scm +++ dbi.scm @@ -17,7 +17,8 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit dbi)) +(declare (uses autoload)) (include "dbi/dbi.scm") Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -44,15 +44,16 @@ db:get-ddb db:open-dbdat db:open-run-db db:open-inmem-db db:setup -db:get-main-lock +;; db:get-main-lock db:with-lock-db db:get-iam-server-lock db:get-locker db:take-lock +db:steal-lock-db db:release-lock db:general-sqlite-error-dump db:first-result-default db:generic-error-printout db:with-db @@ -226,10 +227,11 @@ db:get-cache-stmth db:register-server db:deregister-server db:get-server-info db:get-count-servers +db:get-servers-info db:get-steps-info-by-id make-dbr:dbdat dbr:dbdat-db dbr:dbdat-inmem @@ -511,11 +513,11 @@ ;; The lockname is the filename (can have many to one, run-id to fname ;;====================================================================== ;; only use for main.db - need to re-write some of this :( ;; -(define (db:get-main-lock dbfile) +#;(define (db:get-main-lock dbfile) (db:with-lock-db dbfile (lambda (dbh dbfile) (db:get-iam-server-lock dbh dbfile)))) (define (db:with-lock-db dbfile proc) @@ -524,38 +526,47 @@ ;; (sqlite3:finalize! dbh) res)) ;; called before db is open? ;; -(define (db:get-iam-server-lock dbh dbfname) +(define (db:get-iam-server-lock dbh dbfname host port) (sqlite3:with-transaction dbh (lambda () (let* ((locker (db:get-locker dbh dbfname))) (if locker - #f - (db:take-lock dbh dbfname)))))) + locker + (db:take-lock dbh dbfname port)))))) ;; (exn sqlite3) (define (db:get-locker dbh dbfname) (condition-case - (sqlite3:first-row dbh "SELECT owner_pid,owner_host,event_time FROM locks WHERE lockname=?;" dbfname) + (sqlite3:first-row dbh "SELECT owner_pid,owner_host,owner_port,event_time FROM locks WHERE lockname=?;" dbfname) (exn (sqlite3) #f))) ;; should never fail because it is run in a transaction with a test for the lock ;; -(define (db:take-lock dbh dbfname) +(define (db:take-lock dbh dbfname port) ;; (condition-case ;; (begin - (sqlite3:execute dbh "INSERT INTO locks (lockname,owner_pid,owner_host) VALUES (?,?,?);" dbfname (current-process-id) (get-host-name)) + (sqlite3:execute dbh "INSERT INTO locks (lockname,owner_pid,owner_host,owner_port) VALUES (?,?,?,?);" dbfname (current-process-id) (get-host-name) port) ;; #t) ;; (exn (sqlite3) #f))) #t) -(define (db:release-lock dbh dbfname) +(define (db:steal-lock-db dbh dbfname port) + (sqlite3:execute dbh "DELETE FROM locks WHERE lockname=?;" dbfname) + (sqlite3:execute dbh "INSERT INTO locks (lockname,owner_pid,owner_host,owner_port) VALUES (?,?,?,?);" dbfname (current-process-id) (get-host-name) port) + #t) + +(define (db:release-lock-force dbh dbfname) (sqlite3:execute dbh "DELETE FROM locks WHERE lockname=?;" dbfname)) +;; release a lock if it matches +(define (db:release-lock dbh dbfname host port) + (sqlite3:execute dbh "DELETE FROM locks WHERE lockname=? AND owner_host=? AND owner_port=?;" dbfname host port)) + ;;====================================================================== ;; SQLITE3 HELPERS ;;====================================================================== (define (db:general-sqlite-error-dump exn stmt . params) @@ -1515,10 +1526,11 @@ (sqlite3:execute db "CREATE TABLE IF NOT EXISTS locks (id INTEGER PRIMARY KEY, lockname TEXT, owner_pid INTEGER, owner_host TEXT, + owner_port TEXT, event_time TIMESTAMP DEFAULT (strftime('%s','now')), CONSTRAINT lock_constraint UNIQUE (lockname));") ;; maps to *srvpktspec* from http-transportmod (sqlite3:execute db "CREATE TABLE IF NOT EXISTS servers @@ -5855,11 +5867,12 @@ #f) ;; server already deregistered (begin (sqlite3:execute db "DELETE FROM servers WHERE apath=? AND dbname=?;" ;; (host,port,servkey,pid,ipaddr,apath,dbname) VALUES (?,?,?,?,?,?,?);" ;; host port servkey pid ipaddr apath dbname) - #;(db:get-server-info dbstruct apath dbname))))))))) + #;(db:get-server-info dbstruct apath dbname) + 'done)))))))) (define (db:get-server-info dbstruct apath dbname) (db:with-db dbstruct #f #f @@ -5882,7 +5895,20 @@ (max res count)) 0 db "SELECT count(*) FROM servers WHERE apath=?;" apath)))) + +(define (db:get-servers-info dbstruct apath) + (db:with-db + dbstruct + #f #f + (lambda (db) + (sqlite3:fold-row + (lambda (res . row) + (cons row res)) + '() + db + "SELECT * FROM servers WHERE apath=?;" + apath)))) ) Index: dcommon.scm ================================================================== --- dcommon.scm +++ dcommon.scm @@ -902,14 +902,14 @@ (servers-matrix (iup:matrix #:expand "YES" #:numcol 7 #:numcol-visible 7 #:numlin-visible 5 )) - (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) + (colnames (list "Id" "MTver" "Pid" "Host" "Interface:port" "Runtime" "State" "Db")) (updater (lambda () (if (dashboard:monitor-changed? commondat tabdat) - (let ((servers (server:get-list *toppath* limit: 10))) + (let ((servers (rmt:get-servers-info *toppath*)#;(server:get-list *toppath* limit: 10))) (iup:attribute-set! servers-matrix "NUMLIN" (length servers)) ;; (set! colnum 0) ;; (for-each (lambda (colname) ;; ;; (print "colnum: " colnum " colname: " colname) ;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) @@ -916,28 +916,25 @@ ;; (set! colnum (+ 1 colnum))) ;; colnames) (set! rownum 1) (for-each (lambda (server) - (set! colnum 0) - (match-let (((mod-time host port start-time server-id pid) + (set! colnum 0) ;; id host port servkey pid ipaddr apath dbname event_time + (match-let (((id host port server-id pid ipaddr apath dbname start-time) ;; (mod-time host port start-time server-id pid) server)) - (let* ((uptime (- (current-seconds) mod-time)) - (runtime (if start-time + (let* ((uptime (- (current-seconds) start-time)) + #;(runtime (if start-time (- mod-time start-time) 0)) - (vals (list "-" ;; (vector-ref server 0) ;; Id + (vals (list server-id ;; (vector-ref server 0) ;; Id "-" ;; (vector-ref server 9) ;; MT-Ver pid ;; (vector-ref server 1) ;; Pid host ;; (vector-ref server 2) ;; Hostname (conc host ":" port) ;; (conc (vector-ref server 3) ":" (vector-ref server 4)) ;; IP:Port - (seconds->hr-min-sec runtime) ;; (- (current-seconds) start-time)) ;; (vector-ref server 6))) - (cond - ((< uptime 5) "alive") - ((< uptime 16) "probably alive");; less than 15 seconds since mod, call it alive (vector-ref server 8) ;; State - (else "dead")) - "-" ;; (vector-ref server 12) ;; RunId + (seconds->hr-min-sec uptime) ;; Runtime + "Running" ;; State - Do some kind of ping here + dbname ;; Db ))) (for-each (lambda (val) (let* ((row-col (conc rownum ":" colnum)) (curr-val (iup:attribute servers-matrix row-col))) (if (not (equal? (conc val) curr-val)) Index: debugprint.scm ================================================================== --- debugprint.scm +++ debugprint.scm @@ -8,20 +8,24 @@ (import scheme chicken.base chicken.string chicken.port chicken.process-context + chicken.process-context.posix + (prefix mtargs args:) srfi-1 + system-information ) ;;====================================================================== ;; debug stuff ;;====================================================================== (define verbosity (make-parameter '())) (define *default-log-port* (current-error-port)) +(define debug:print-logger (make-parameter #f)) ;; se to a proc to call on every logging print (define (debug:setup) (let ((debugstr (or (args:get-arg "-debug") (args:get-arg "-debug-noprop") (get-environment-variable "MT_DEBUG_MODE")))) @@ -99,25 +103,35 @@ ((and (number? vb) (list? n)) (member vb n)) (else #f)))) +(define (debug:handle-remote-logging params) + (if (debug:print-logger) ;; NOTE: turn params into string to keep things simple for now + ((debug:print-logger)(conc "REMOTE ("(get-host-name)", pid="(current-process-id)") " + (string-intersperse (map conc params) " ") "; " + (string-intersperse (command-line-arguments) " "))))) + (define (debug:print n e . params) (if (debug:debug-mode n) (with-output-to-port (or e (current-error-port)) (lambda () ;; (if *logging* ;; (db:log-event (apply conc params)) (apply print params) - )))) ;; ) + (debug:handle-remote-logging params) + ))) + #t ;; only here to make remote stuff happy. It'd be nice to fix that ... + ) (define (debug:print-error n e . params) ;; normal print (if (debug:debug-mode n) (with-output-to-port (if (port? e) e (current-error-port)) (lambda () (apply print "ERROR: " params) + (debug:handle-remote-logging (cons "ERROR: " params)) ))) ;; pass important messages to stderr (if (and (eq? n 0)(not (eq? e (current-error-port)))) (with-output-to-port (current-error-port) (lambda () @@ -127,8 +141,16 @@ (define (debug:print-info n e . params) (if (debug:debug-mode n) (with-output-to-port (if (port? e) e (current-error-port)) (lambda () (apply print "INFO: (" n ") " params) ;; res) + (debug:handle-remote-logging (cons "INFO: " params)) )))) +(define (debug:print-warn n e . params) + (if (debug:debug-mode n) + (with-output-to-port (if (port? e) e (current-error-port)) + (lambda () + (apply print "WARN: (" n ") " params) ;; res) + (debug:handle-remote-logging (cons "WARN: " params)) + )))) ) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -437,10 +437,11 @@ - to automatically figure out hostname -adjutant C,M : start the server/adjutant with allocated cores C and Mem M (Gig), use 0,0 to auto use full machine -transport http|rpc : use http or rpc for transport (default is http) -log logfile : send stdout and stderr to logfile + -autolog logfilebase : appends pid and host to logfilebase for logfile -list-servers : list the servers -kill-servers : kill all servers -repl : start a repl (useful for extending megatest) -load file.scm : load and run file.scm -mark-incompletes : find and mark incomplete tests @@ -630,10 +631,11 @@ "-run-id" "-ping" "-refdb2dat" "-o" "-log" + "-autolog" "-sync-log" "-since" "-fields" "-recover-test" ;; run-id,test-id - used internally to recover a test stuck in RUNNING state "-sort" @@ -784,20 +786,24 @@ ;; this segment will run launch:setup only if -log is not set. This is fairly safe as servers are not ;; manually started and thus should never be started in a non-megatest area. Thus no need to handle situation ;; where (launch:setup) returns #f? ;; - (if (or (args:get-arg "-log")#;(args:get-arg "-server")) ;; redirect the log always when a server + (if (or (args:get-arg "-log") ;;(args:get-arg "-server") ;; redirect the log always when a server + (args:get-arg "-autolog")) (handle-exceptions exn (begin (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) ) - (let* ((tl (or (args:get-arg "-log")(launch:setup))) ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified - (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name - (conc tl "/logs/server-" (current-process-id) "-" (get-host-name) ".log"))) - (oup (open-logfile logf))) + (let* ((tl (or (args:get-arg "-log") + (args:get-arg "-autolog") ;; autolog provides the basename .../logs/something- for the logfile + (launch:setup))) ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified + (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name + (conc tl (current-process-id)"-"(get-host-name)".log") + (conc tl "/logs/server-" (current-process-id) "-" (get-host-name) ".log"))) + (oup (open-logfile logf))) (if (not (args:get-arg "-log")) (hash-table-set! args:arg-hash "-log" logf)) ;; fake out future queries of -log (debug:print-info 0 *default-log-port* "Sending log output to " logf) (set! *default-log-port* oup)))) @@ -1135,38 +1141,40 @@ (if (or (args:get-arg "-list-servers") (args:get-arg "-kill-servers")) (let ((tl (launch:setup))) (if tl ;; all roads from here exit - (let* ((servers (server:get-list *toppath*)) + (let* ((servers (rmt:get-servers-info *toppath*)) (fmtstr "~8a~22a~20a~20a~8a\n")) - (format #t fmtstr "pid" "Interface:port" "age (hms)" "Last mod" "State") - (format #t fmtstr "===" "==============" "=========" "========" "=====") + ;; id INTEGER PRIMARY KEY, + ;; host TEXT, + ;; port INTEGER, + ;; servkey TEXT, + ;; pid TEXT, + ;; ipaddr TEXT, + ;; apath TEXT, + ;; dbname TEXT, + ;; event_time + (format #t fmtstr "pid" "Interface:port" "State" "dbname" "apath") + (format #t fmtstr "===" "==============" "=====" "======" "=====") (for-each ;; ( mod-time host port start-time pid ) (lambda (server) - (let* ((mtm (any->number (car server))) - (mod (if mtm (- (current-seconds) mtm) "unk")) - (age (- (current-seconds)(or (any->number (list-ref server 3)) (current-seconds)))) - (url (conc (cadr server) ":" (caddr server))) - (pid (list-ref server 4)) - (alv (if (number? mod)(< mod 10) #f))) - (format #t - fmtstr - pid - url - (seconds->hr-min-sec age) - (seconds->hr-min-sec mod) - (if alv "alive" "dead")) - (if (and alv - (args:get-arg "-kill-servers")) + (match-let + (((id host port servkey pid ipaddr apath dbname event_time) server)) + (format #t + fmtstr + pid + (conc host":"port) + (if (server-ready? host port servkey) "Running" "Dead") + dbname ;; (seconds->hr-min-sec mod) + apath + ) + (if (args:get-arg "-kill-servers") (begin (debug:print-info 0 *default-log-port* "Attempting to kill server with pid " pid " !!needs completion!!") #;(server:kill server))))) - (sort servers (lambda (a b) - (let ((ma (or (any->number (car a)) 9e9)) - (mb (or (any->number (car b)) 9e9))) - (> ma mb))))) + servers) ;; (debug:print-info 1 *default-log-port* "Done with listservers") (set! *didsomething* #t) (exit)) (exit)))) ;; must do, would have to add checks to many/all calls below Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -133,26 +133,26 @@ ) (define (servdat->url sdat) (conc (servdat-host sdat)":"(servdat-port sdat))) - ;; generate entries for ~/.megatestrc with the following ;; ;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u -(defstruct rmt:remote - (conns (make-hash-table)) ;; apath/dbname => rmt:conn +(defstruct remotedat + (conns (make-hash-table)) ;; apath/dbname => conndat ) -(defstruct rmt:conn +(defstruct conndat (apath #f) (dbname #f) (fullname #f) (hostport #f) (ipaddr #f) (port #f) + (socket #f) (srvpkt #f) (srvkey #f) (lastmsg 0) (expires 0) (inport #f) @@ -169,16 +169,16 @@ ;;====================================================================== ;; S U P P O R T F U N C T I O N S ;;====================================================================== ;; replaces *runremote* -(define *rmt:remote* (make-rmt:remote)) +(define *remotedat* (make-remotedat)) ;; -> http://abc.com:900/ ;; -(define (rmt:conn->uri conn entrypoint) - (conc "http://"(rmt:conn-ipaddr conn)":"(rmt:conn-port conn)"/"entrypoint)) +(define (conndat->uri conn entrypoint) + (conc "http://"(conndat-ipaddr conn)":"(conndat-port conn)"/"entrypoint)) ;; set up the api proc, seems like there should be a better place for this? (define api-proc (make-parameter conc)) (api-proc api:process-request) @@ -187,15 +187,15 @@ ;; ;; else setup a connection ;; ;; if that fails, return '(#f "some reason") ;; NB// convert to raising an exception ;; -(define (rmt:get-conn remote apath dbname) - (let* ((fullname (db:dbname->path apath dbname)) ;; we'll switch to full name later - (conn (hash-table-ref/default (rmt:remote-conns remote) dbname #f))) +(define (rmt:get-conn remdat apath dbname) + (let* ((fullname (db:dbname->path apath dbname)) + (conn (hash-table-ref/default (remotedat-conns remdat) fullname #f))) (if (and conn - (< (current-seconds) (rmt:conn-expires conn))) + (< (current-seconds) (conndat-expires conn))) conn #f))) (define (rmt:find-main-server apath dbname) (let* ((pktsdir (get-pkts-dir apath)) @@ -202,69 +202,105 @@ (all-srvpkts (get-all-server-pkts pktsdir *srvpktspec*)) ;; (dbpath (conc apath "/" dbname)) (viable-srvs (get-viable-servers all-srvpkts dbname))) (get-the-server apath viable-srvs))) -;; looks for a connection to main + +(define *connstart-mutex* (make-mutex)) +(define *last-main-start* 0) + +;; looks for a connection to main, returns if have and not exired +;; creates new otherwise +;; ;; connections for other servers happens by requesting from main ;; ;; TODO: This is unnecessarily re-creating the record in the hash table ;; -(define (rmt:open-main-connection remote apath) - (let* ((dbname (db:run-id->dbname #f)) - (the-srv (rmt:find-main-server apath dbname)) - (start-main-srv (lambda () - ;; srv not ready, delay a little and try again - (api:run-server-process apath dbname) - (thread-sleep! 4) - (rmt:open-main-connection remote apath) ;; TODO: Add limit to number of tries - ))) - (if the-srv ;; yes, we have a server, now try connecting to it - (let* ((srv-addr (server-address the-srv)) - (ipaddr (alist-ref 'ipaddr the-srv)) - (port (alist-ref 'port the-srv)) - (srvkey (alist-ref 'servkey the-srv)) - (fullpath (db:dbname->path apath dbname)) - (srvready (server-ready? ipaddr port srvkey))) - (if srvready - (begin - (hash-table-set! (rmt:remote-conns remote) - dbname ;; fullpath ;; yes, I'd prefer it to be fullpath - FIXME later - (make-rmt:conn - apath: apath - dbname: dbname - fullname: fullpath - hostport: srv-addr - ipaddr: ipaddr - port: port - srvpkt: the-srv - srvkey: srvkey ;; generated by rmt:get-signature on the server side - lastmsg: (current-seconds) - expires: (+ (current-seconds) 60) ;; this needs to be gathered during the ping - )) - #t) - (start-main-srv))) - (start-main-srv)))) - -;; NB// remote is a rmt:remote struct +(define (rmt:open-main-connection remdat apath) + (let* ((fullpath (db:dbname->path apath "/.db/main.db")) + (conns (remotedat-conns remdat)) + (conn (hash-table-ref/default conns fullpath #f))) ;; TODO - create call for this + (cond + ((and conn ;; conn is NOT a socket, just saying ... + (< (current-seconds) (conndat-expires conn))) + #t) ;; we are current and good to go - we'll deal elsewhere with a server that was killed or died + ((and conn + (>= (current-seconds)(conndat-expires conn))) + (debug:print-info 0 *default-log-port* "connection to "fullpath" server expired. Reconnecting.") + (if (conndat-socket conn) + (nng-close! (conndat-socket conn))) + (hash-table-set! conns fullpath #f) ;; clean up + (rmt:open-main-connection remdat apath)) + (else + ;; Below we will find or create and connect to main + (let* ((dbname (db:run-id->dbname #f)) + (the-srv (rmt:find-main-server apath dbname)) + (start-main-srv (lambda () ;; call IF there is no the-srv found + (mutex-lock! *connstart-mutex*) + (if (> (- (current-seconds) *last-main-start*) 5) ;; at least four seconds since last attempt to start main server + (begin + (api:run-server-process apath dbname) + (set! *last-main-start* (current-seconds)) + (thread-sleep! 1))) + (mutex-unlock! *connstart-mutex*) + (rmt:open-main-connection remdat apath) ;; TODO: Add limit to number of tries + ))) + (if (not the-srv) ;; have server, try connecting to it + (start-main-srv) + (let* ((srv-addr (server-address the-srv)) ;; need serv + (ipaddr (alist-ref 'ipaddr the-srv)) + (port (alist-ref 'port the-srv)) + (srvkey (alist-ref 'servkey the-srv)) + (fullpath (db:dbname->path apath dbname)) + + (new-the-srv (make-conndat + apath: apath + dbname: dbname + fullname: fullpath + hostport: srv-addr + socket: (open-nn-connection srv-addr) + ipaddr: ipaddr + port: port + srvpkt: the-srv + srvkey: srvkey ;; generated by rmt:get-signature on the server side + lastmsg: (current-seconds) + expires: (+ (current-seconds) 60) ;; this needs to be gathered during the ping + ))) + (hash-table-set! conns fullpath new-the-srv))) + #t))))) + +;; NB// remdat is a remotedat struct ;; -(define (rmt:general-open-connection remote apath dbname #!key (num-tries 5)) - (let* ((mdbname (db:run-id->dbname #f)) - (mconn (rmt:get-conn remote apath mdbname))) +(define (rmt:general-open-connection remdat apath dbname #!key (num-tries 5)) + (assert (not (equal? dbname ".db/main.db")) "ERROR: general-open-connection should never be called with main as the db") + (let* ((mdbname (db:run-id->dbname #f)) + (fullname (db:dbname->path apath dbname)) + (conns (remotedat-conns remdat)) + (mconn (rmt:get-conn remdat apath mdbname))) + (if (and mconn + (not (debug:print-logger))) + (begin + (debug:print-info 0 *default-log-port* "Turning on logging to main, look in logs dir for main log.") + (debug:print-logger rmt:log-to-main))) (cond ((or (not mconn) ;; no channel open to main? - (< (rmt:conn-expires mconn)(+ (current-seconds) 2))) ;; restablish connection if less than 2 seconds on the lease - (rmt:open-main-connection remote apath) - (rmt:general-open-connection remote apath mdbname)) - ((not (rmt:get-conn remote apath dbname)) ;; no channel open to dbname? - (let* ((res (rmt:send-receive-real remote apath mdbname 'get-server `(,apath ,dbname)))) + (< (conndat-expires mconn)(+ (current-seconds) 2))) ;; restablish connection if less than 2 seconds on the lease + (if mconn ;; previously opened - clean up NB// consolidate this with the similar code in open main above + (begin + (debug:print-info 0 *default-log-port* "Clearing out connection to main that has expired.") + (nng-close! (conndat-socket mconn)) + (hash-table-set! conns fullname #f))) + (rmt:open-main-connection remdat apath) + (rmt:general-open-connection remdat apath mdbname)) + ((not (rmt:get-conn remdat apath dbname)) ;; no channel open to dbname? + (let* ((res (rmt:send-receive-real remdat apath mdbname 'get-server `(,apath ,dbname)))) (case res ((server-started) (if (> num-tries 0) (begin (thread-sleep! 2) - (rmt:general-open-connection remote apath dbname num-tries: (- num-tries 1))) + (rmt:general-open-connection remdat apath dbname num-tries: (- num-tries 1))) (begin (debug:print-error 0 *default-log-port* "Failed to start servers needed or open channel to "apath", "dbname) (exit 1)))) (else (if (list? res) ;; server has been registered and the info was returned. pass it on. @@ -275,16 +311,17 @@ ;; ".db/1.db") (match res ((host port servkey pid ipaddr apath dbname) (debug:print-info 0 *default-log-port* "got "res) - (hash-table-set! (rmt:remote-conns remote) - dbname - (make-rmt:conn + (hash-table-set! conns + fullname + (make-conndat apath: apath dbname: dbname hostport: (conc host":"port) + socket: (open-nn-connection (conc host":"port)) ipaddr: ipaddr port: port srvkey: servkey lastmsg: (current-seconds) expires: (+ (current-seconds) 60)))) @@ -291,72 +328,78 @@ (else (debug:print-info 0 *default-log-port* "return data from starting server did not match host port servkey pid ipaddr apath dbname " res))) res) (begin (debug:print-info 0 *default-log-port* "Unexpected result: " res) - res)))))) - - - ))) + res))))))) + + #t)) ;;====================================================================== ;; FOR DEBUGGING SET TO #t -(define *localmode* #t) +;; (define *localmode* #t) +(define *localmode* #f) (define *dbstruct* (make-dbr:dbstruct)) ;; Defaults to current area ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) - (if (not *rmt:remote*)(set! *rmt:remote* (make-rmt:remote))) + ;; (if (not *remotedat*)(set! *remotedat* (make-remotedat))) (let* ((apath *toppath*) - (conns *rmt:remote*) + (remdat *remotedat*) + (conns (remotedat-conns remdat)) ;; just checking that remdat is a remotedat (dbname (db:run-id->dbname rid))) (if *localmode* (let* ((dbdat (dbr:dbstruct-get-dbdat *dbstruct* dbname)) (indat `((cmd . ,cmd)(params . ,params)))) (api:process-request *dbstruct* indat) ;; (api:process-request dbdat indat) ) (begin - (rmt:general-open-connection conns apath dbname) - (rmt:send-receive-real conns apath dbname cmd params))))) + (rmt:open-main-connection remdat apath) + (if rid (rmt:general-open-connection remdat apath dbname)) + (rmt:send-receive-real remdat apath dbname cmd params))))) #;(define (rmt:send-receive-setup conn) - (if (not (rmt:conn-inport conn)) - (let-values (((i o) (tcp-connect (rmt:conn-ipaddr conn) - (rmt:conn-port conn)))) - (rmt:conn-inport-set! conn i) - (rmt:conn-outport-set! conn o)))) + (if (not (conndat-inport conn)) + (let-values (((i o) (tcp-connect (conndat-ipaddr conn) + (conndat-port conn)))) + (conndat-inport-set! conn i) + (conndat-outport-set! conn o)))) ;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed ;; sometime in the future ;; -(define (rmt:send-receive-real remote apath dbname cmd params) - (let* ((conn (rmt:get-conn remote apath dbname))) +(define (rmt:send-receive-real remdat apath dbname cmd params) + (let* ((conn (rmt:get-conn remdat apath dbname))) (assert conn "FATAL: rmt:send-receive-real called without the needed channels opened") - (let* ((key #f) - (host (rmt:conn-ipaddr conn)) - (port (rmt:conn-port conn)) + (assert (conndat-socket conn) "FATAL: rmt:send-receive-real called without the channel socket opened.") + (let* ((soc (conndat-socket conn)) + (key #f) + (host (conndat-ipaddr conn)) + (port (conndat-port conn)) (payload `((cmd . ,cmd) - (key . ,(rmt:conn-srvkey conn)) + (key . ,(conndat-srvkey conn)) (params . ,params))) - (res (open-send-receive-nn (conc host":"port) - (sexpr->string payload)))) - (string->sexpr res)))) + (res (send-receive-nn soc ;; (open-send-receive-nn (conc host":"port) + (sexpr->string payload)))) + (if (member res '("#")) ;; TODO - fix this in string->sexpr + #f + (string->sexpr res))))) ;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed ;; sometime in the future. ;; ;; Purpose - call the main.db server and request a server be started ;; for the given area path and dbname ;; -;; (define (rmt:send-receive-server-start remote apath dbname) -;; (let* ((conn (rmt:get-conn remote apath dbname))) +;; (define (rmt:send-receive-server-start remdat apath dbname) +;; (let* ((conn (rmt:get-conn remdat apath dbname))) ;; (assert conn "FATAL: Unable to connect to db "apath"/"dbname) ;; #;(let* ((res (with-input-from-request -;; (rmt:conn->uri conn "api") +;; (conndat->uri conn "api") ;; `((params . (,apath ,dbname))) ;; read-string))) ;; (string->sexpr res)))) (define (rmt:print-db-stats) @@ -406,14 +449,17 @@ ;;====================================================================== ;; S E R V E R ;;====================================================================== (define (rmt:kill-server run-id) - (rmt:send-receive 'kill-server run-id (list run-id))) + (rmt:send-receive 'kill-server #f (list run-id))) (define (rmt:start-server run-id) - (rmt:send-receive 'start-server 0 (list run-id))) + (rmt:send-receive 'start-server #f (list run-id))) + +(define (rmt:get-server-info apath dbname) + (rmt:send-receive 'get-server-info #f (list apath dbname))) ;;====================================================================== ;; M I S C ;;====================================================================== @@ -764,10 +810,13 @@ ) ;; ) (define (rmt:get-main-run-stats run-id) (rmt:send-receive 'get-main-run-stats #f (list run-id))) +(define (rmt:log-to-main . params) + (rmt:send-receive 'log-to-main #f params)) + (define (rmt:get-var run-id varname) (rmt:send-receive 'get-var run-id (list run-id varname))) (define (rmt:del-var run-id varname) (rmt:send-receive 'del-var run-id (list run-id varname))) @@ -1471,54 +1520,56 @@ (define (common:api-changed?) (not (equal? (substring (->string megatest-version) 0 4) (substring (conc (common:get-last-run-version)) 0 4)))) -(define (rmt:server-shutdown) +;; host and port are used to ensure we are remove proper records +(define (rmt:server-shutdown host port) (let ((dbfile (servdat-dbfile *server-info*))) (debug:print-info 0 *default-log-port* "dbfile is "dbfile) (if dbfile (let* ((am-server (args:get-arg "-server")) (dbfile (args:get-arg "-db")) (apath *toppath*) - (dbdat (db:get-dbdat *dbstruct-db* apath dbfile)) - (db (dbr:dbdat-db dbdat)) - (inmem (dbr:dbdat-db dbdat)) - ) - ;; do a final sync here - (debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds)) - (db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t) - ;; let's finalize here - (debug:print-info 0 *default-log-port* "Finalizing db and inmem") - (sqlite3:finalize! db) - (sqlite3:finalize! inmem) - (debug:print-info 0 *default-log-port* "Finalizing db and inmem complete") - (if am-server + (remdat *remotedat*)) ;; foundation for future fix + (if *dbstruct-db* + (let* ((dbdat (db:get-dbdat *dbstruct-db* apath dbfile)) + (db (dbr:dbdat-db dbdat)) + (inmem (dbr:dbdat-db dbdat)) + ) + ;; do a final sync here + (debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds)) + (db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t) + ;; let's finalize here + (debug:print-info 0 *default-log-port* "Finalizing db and inmem") + (if (sqlite3:database? db) + (sqlite3:finalize! db) + (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing...")) + (if (sqlite3:database? inmem) + (sqlite3:finalize! inmem) + (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing...")) + (debug:print-info 0 *default-log-port* "Finalizing db and inmem complete")) + (debug:print-info 0 *default-log-port* "Db was never opened, no cleanup to do.")) + (if (not am-server) + (debug:print-info 0 *default-log-port* "I am not a server, should NOT get here!") (if (string-match ".*/main.db$" dbfile) (let ((pkt-file (conc (get-pkts-dir *toppath*) "/" (servdat-uuid *server-info*) ".pkt"))) (debug:print-info 0 *default-log-port* "removing pkt "pkt-file) (delete-file* pkt-file) - (debug:print-info 0 *default-log-port* "Releasing lock for "dbfile) - (db:with-lock-db (servdat-dbfile *server-info*) - (lambda (dbh dbfile) - (db:release-lock dbh dbfile)))) + (debug:print-info 0 *default-log-port* "Releasing lock (if any) for "dbfile ", host "host", port "port) + (db:with-lock-db + (servdat-dbfile *server-info*) + (lambda (dbh dbfile) + (db:release-lock dbh dbfile host port)))) ;; I'm not the server - should not have a lock to remove (let* ((sdat *server-info*) ;; we have a run-id server (host (servdat-host sdat)) (port (servdat-port sdat)) - (uuid (servdat-uuid sdat))) - (if (not (string-match ".db/main.db" (args:get-arg "-db"))) - (let* ((res (rmt:deregister-server *rmt:remote* ;; TODO/BUG: why is this requiring *rmt:remote*? - *toppath* - (servdat-host *server-info*) ;; iface - (servdat-port *server-info*) - (servdat-uuid *server-info*) - (current-process-id) - ))) - (debug:print-info 0 *default-log-port* "deregistered-server, res="res))) - + (uuid (servdat-uuid sdat)) + (res (rmt:deregister-server remdat *toppath* host port uuid dbfile))) + (debug:print-info 0 *default-log-port* "deregistered-server, res="res) (debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid) ))))))) (define (std-exit-procedure) ;;(common:telemetry-log-close) @@ -1537,11 +1588,12 @@ (let* ((start-time (current-seconds))) (if (and *server-info* *unclean-shutdown*) (begin (debug:print-info 0 *default-log-port* "Unclean server exit, calling server-shtudown") - (rmt:server-shutdown))) + (rmt:server-shutdown (servdat-host *server-info*) + (servdat-port *server-info*)))) (debug:print-info 0 *default-log-port* "Shutdown activities completed in "(- (current-seconds) start-time)" seconds")) ;; (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated #;(if (bdat-task-db *bdat*) ;; TODO: Check that this is correct for task db (let ((db (cdr (bdat-task-db *bdat*)))) (if (sqlite3:database? db) @@ -1581,12 +1633,17 @@ ;; called in megatest.scm, host-port is string hostname:port ;; ;; NOTE: This is NOT called directly from clients as not all transports support a client running ;; in the same process as the server. ;; -(define (server:ping host port server-id #!key (do-exit #f)) - (server-ready? host port server-id)) +;; conn is a conndat record +;; +(define (server:ping conn #!key (do-exit #f)) + (let* ((req (conndat-socket conn)) + (srvkey (conndat-srvkey conn)) + (msg (sexpr->string '(ping ,srvkey)))) + (send-receive-nn req msg))) ;; (server-ready? host port server-id)) ;;====================================================================== ;; http-transportmod.scm contents moved here ;;====================================================================== @@ -1600,24 +1657,25 @@ ;; ====================================================================== (define (http-get-function fnkey) (hash-table-ref/default *http-functions* fnkey (lambda () "nothing here yet"))) +;; Main entry point to start a server. was start-server (define (rmt:run hostn) ;; ;; Configurations for server ;; (tcp-buffer-size 2048) ;; (max-connections 2048) - (debug:print 2 *default-log-port* "Attempting to start the server ...") + (debug:print 2 *default-log-port* "PID: "(current-process-id)". Attempting to start the server ...") (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily (hostname (get-host-name)) (ipaddrstr (let ((ipstr (if (string=? "-" hostn) ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") (server:get-best-guess-address hostname) #f))) (if ipstr ipstr hostn))) ;; hostname))) (port (portlogger:open-run-close portlogger:find-port)) - (link-tree-path (common:get-linktree)) + ;; (link-tree-path (common:get-linktree)) ;; (tmp-area (common:get-db-tmp-area)) #;(start-file (conc tmp-area "/.server-start"))) (debug:print-info 0 *default-log-port* "portlogger recommended port: " port) (if *server-info* (begin @@ -1638,13 +1696,16 @@ (set! *db-last-access* (current-seconds)) (nng-send rep resdat) (loop (nng-recv rep))))))) (debug:print-info 0 *default-log-port* "After server, should never see this") ;; server exit stuff here - (let* ((portnum (servdat-port *server-info*))) + (let* ((portnum (servdat-port *server-info*)) + (host (servdat-host *server-info*))) (portlogger:open-run-close portlogger:set-port portnum "released") - (rmt:server-shutdown) + (if (not (equal? (get-host-name) host)) + (debug:print-info 0 *default-log-port* "Server shutdown called for host "host", but we are on "(get-host-name)) + (rmt:server-shutdown host portnum)) ;; (bdat-time-to-exit-set! *bdat* #t) ;; tell on-exit to be fast as we've already cleaned up (portlogger:open-run-close portlogger:set-port port "released") ;; done in rmt:run ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) ;; (debug:print-info 0 *default-log-port* "Average cached write time " @@ -1676,82 +1737,70 @@ (set! *server-info* (make-servdat host: ipaddrstr port: portnum))) (debug:print-info 0 *default-log-port* "rmt:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum) - (if (is-port-in-use portnum) - (begin - (portlogger:open-run-close portlogger:set-failed portnum) - (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") - ;; (thread-sleep! 0.1) - (rmt:try-start-server ipaddrstr - (portlogger:open-run-close - portlogger:find-port))) - (begin - (if (not *server-info*) - (set! *server-info* (make-servdat - host: ipaddrstr - port: portnum))) - (servdat-status-set! *server-info* 'starting) - (servdat-port-set! *server-info* portnum) - (if (not (servdat-rep *server-info*)) - (let ((rep (make-rep-socket))) - (servdat-rep-set! *server-info* rep) - (socket-set! rep 'nng/recvtimeo 2000))) - (let* ((rep (servdat-rep *server-info*))) - (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) - (handle-exceptions - exn - (begin - (print-error-message exn) - (if (< portnum 64000) - (begin - (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (portlogger:open-run-close portlogger:set-failed portnum) - (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") - ;; (thread-sleep! 0.1) - (rmt:try-start-server ipaddrstr - (portlogger:open-run-close portlogger:find-port))) - (begin - (print "ERROR: Tried and tried but could not start the server, stopping at port "portnum)))) - (nng-listen rep (conc "tcp://*:" portnum)) - rep))))) + (assert (servdat? *server-info*) "FATAL: Must always have *server-info* properly set up by here.") + (servdat-status-set! *server-info* 'starting) + (servdat-port-set! *server-info* portnum) + (if (not (servdat-rep *server-info*)) + (let ((rep (make-rep-socket))) + (servdat-rep-set! *server-info* rep) + (socket-set! rep 'nng/recvtimeo 2000))) + (let* ((rep (servdat-rep *server-info*))) + (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) + (handle-exceptions + exn + (begin + (print-error-message exn) + (if (< portnum 64000) + (begin + (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (portlogger:open-run-close portlogger:set-failed portnum) + (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") + ;; (thread-sleep! 0.1) + (rmt:try-start-server ipaddrstr + (portlogger:open-run-close portlogger:find-port))) + (begin + (print "ERROR: Tried and tried but could not start the server, stopping at port "portnum)))) + (nng-listen rep (conc "tcp://*:" portnum)) + rep))) ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== ;;====================================================================== ;; C L I E N T S ;;====================================================================== -(define (http-transport:get-time-to-cleanup) +(define (rmt:get-time-to-cleanup) (let ((res #f)) (mutex-lock! *http-mutex*) (set! res (> (current-seconds) *http-connections-next-cleanup*)) (mutex-unlock! *http-mutex*) res)) -(define (http-transport:inc-requests-count) +(define (rmt:inc-requests-count) (mutex-lock! *http-mutex*) (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)) ;; Use this opportunity to slow things down iff there are too many requests in flight (if (> *http-requests-in-progress* 5) (begin (debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...") (thread-sleep! 1))) (mutex-unlock! *http-mutex*)) -(define (http-transport:dec-requests-count proc) +(define (rmt:dec-requests-count proc) (mutex-lock! *http-mutex*) (proc) (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) (mutex-unlock! *http-mutex*)) -(define (http-transport:dec-requests-count-and-close-all-connections) +(define (rmt:dec-requests-count-and-close-all-connections) (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds (if (> *http-requests-in-progress* 0) (if (> etime (current-seconds)) (begin @@ -1761,68 +1810,37 @@ "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections")) #;(close-idle-connections!))) (set! *http-connections-next-cleanup* (+ (current-seconds) 10)) (mutex-unlock! *http-mutex*)) -(define (http-transport:inc-requests-and-prep-to-close-all-connections) +(define (rmt:inc-requests-and-prep-to-close-all-connections) (mutex-lock! *http-mutex*) (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))) -;; careful closing of connections stored in *runremote* -;; -(define (http-transport:close-connections #!key (area-dat #f)) - (debug:print-info 0 *default-log-port* "http-transport:close-connections doesn't do anything now!")) -;; (let* ((runremote (or area-dat *runremote*)) -;; (server-dat (if runremote -;; (remote-conndat runremote) -;; #f))) ;; (hash-table-ref/default *runremote* run-id #f))) -;; (if (vector? server-dat) -;; (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) -;; (handle-exceptions -;; exn -;; (begin -;; (print-call-chain *default-log-port*) -;; (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) -;; (close-connection! api-dat) -;; ;;(close-idle-connections!) -;; #t)) -;; #f))) - - - -;; initialize servdat for client side, setup needed parameters -;; pass in #f as sdat-in to create sdat -;; -#;(define (servdat-init sdat-in iface port uuid) - (let* ((sdat (or sdat-in (make-servdat)))) - - (assert #f "This is a bad idea.") - - (if uuid (servdat-uuid-set! sdat uuid)) - (servdat-host-set! sdat iface) - (servdat-port-set! sdat port) - (servdat-api-url-set! sdat (conc "http://" iface ":" port "/api")) - (servdat-api-uri-set! sdat (uri-reference (servdat-api-url sdat))) - (servdat-api-req-set! sdat (make-request method: 'POST - uri: (servdat-api-uri sdat))) - ;; set up the http-client parameters - (max-retry-attempts 1) - ;; consider all requests indempotent - (retry-request? (lambda (request) - #f)) - (determine-proxy (constantly #f)) - sdat)) - ;;====================================================================== ;; NEW SERVER METHOD ;;====================================================================== ;; only use for main.db - need to re-write some of this :( ;; -(define (get-lock-db sdat dbfile) - (let* ((dbh (db:open-run-db dbfile db:initialize-db)) - (res (db:get-iam-server-lock dbh dbfile))) +(define (get-lock-db sdat dbfile host port) + (assert host "FATAL: get-lock-db called with host not set.") + (assert port "FATAL: get-lock-db called with port not set.") + (let* ((dbh (db:open-run-db dbfile db:initialize-db)) ;; open-run-db creates a standard db with schema used by all situations + (res (db:get-iam-server-lock dbh dbfile host port))) + ;; res => list then already locked, check server is responsive + ;; => #t then sucessfully got the lock + ;; => #f reserved for future use as to indicate something went wrong + (match res + ((owner_pid owner_host owner_port event_time) + (if (server-ready? owner_host owner_port "abc") + #f ;; locked by someone else + (begin ;; locked by someone dead and gone + (debug:print 0 *default-log-port* "WARNING: stale lock - have to steal it. This may fail.") + (db:steal-lock-db dbh dbfile port)))) + (#t #t) ;; placeholder so that we don't touch res if it is #t + (else (set! res #f))) (sqlite3:finalize! dbh) res)) (define (register-server pkts-dir pkt-spec host port servkey ipaddr dbpath) @@ -1867,47 +1885,18 @@ (define (server-address srv-pkt) (conc (alist-ref 'host srv-pkt) ":" (alist-ref 'port srv-pkt))) (define (server-ready? host port key) ;; server-address is host:port -;; (let-values (((i o)(handle-exceptions -;; exn -;; (values #f #f) -;; (tcp-connect host port)))) -;; (if (and i o) (let* ((data (sexpr->string `((cmd . ping) (key . ,key) (params . ())))) (res (open-send-receive-nn (conc host ":" port) data))) - (string->sexpr res))) - -;; (let ((res (with-input-from-port i -;; read))) -;; (close-output-port o) -;; (close-input-port i) -;; res)) -;; (if (string? res) -;; (string->sexpr res) -;; res))) -;; (begin ;; connection failed -;; (debug:print-info 0 *default-log-port* "Server at "host":"port" is not responding.") -;; #f)))) - -;; (define (loop-test host port data) ;; server-address is host:port -;; ;; ping the server and ask it -;; ;; if it ready -;; ;; (let* ((sdat (servdat-init #f host port #f))) -;; ;; (http-transport:send-receive sdat "abc" 'ping '()))) -;; (let* ((payload (sexpr->string data)) -;; (res (with-input-from-request -;; (conc "http://"host":"port"/loop-test") -;; `((data . ,payload)) -;; read-string))) -;; (string->sexpr res)) -;; #f -;; ) - + (if res + (string->sexpr res) + res))) + ; from the pkts return servers associated with dbpath ;; NOTE: Only one can be alive - have to check on each ;; in the list of pkts returned ;; (define (get-viable-servers serv-pkts dbpath) @@ -1918,10 +1907,26 @@ (let* ((spkt (car tail))) (loop (cdr tail) (if (equal? dbpath (alist-ref 'dbpath spkt)) (cons spkt res) res)))))) + +(define (remove-pkts-if-not-alive serv-pkts) + (filter (lambda (pkt) + (let* ((host (alist-ref 'host pkt)) + (port (alist-ref 'port pkt)) + (key (alist-ref 'servkey pkt)) + (pktz (alist-ref 'Z pkt)) + (res (server-ready? host port key))) + (if res + res + (let* ((pktsdir (get-pkts-dir *toppath*)) + (pktpath (conc pktsdir"/"pktz".pkt"))) + (debug:print 0 *default-log-port* "WARNING: pkt with no server "pktpath) + (delete-file* pktpath) + #f)))) + serv-pkts)) ;; from viable servers get one that is alive and ready ;; (define (get-the-server apath serv-pkts) (let loop ((tail serv-pkts)) @@ -1971,11 +1976,11 @@ ;; END NEW SERVER METHOD ;;====================================================================== ;; if .db/main.db check the pkts ;; -(define (http-transport:wait-for-server pkts-dir db-file server-key) +(define (rmt:wait-for-server pkts-dir db-file server-key) (let* ((sdat *server-info*)) (let loop ((start-time (current-seconds)) (changed #t) (last-sdat "not this")) (begin ;; let ((sdat #f)) @@ -2005,32 +2010,39 @@ (servdat-host sdat) db-file)) ;; (set! *my-signature* (servdat-uuid sdat)) ;; replace with Z, no, stick with proper key ;; now read pkts and see if we are a contender (let* ((all-pkts (get-all-server-pkts pkts-dir *srvpktspec*)) (viables (get-viable-servers all-pkts db-file)) - (best-srv (get-best-candidate viables db-file)) - (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f))) - (debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key) + (alive (remove-pkts-if-not-alive viables)) + (best-srv (get-best-candidate alive db-file)) + (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f)) + (i-am-srv (equal? best-srv-key server-key)) + (delete-pkt (lambda () + (let* ((pktfile (conc (get-pkts-dir *toppath*) + "/" (servdat-uuid *server-info*) + ".pkt"))) + (debug:print-info 0 *default-log-port* "Attempting to remove bogus pkt file "pktfile) + (delete-file* pktfile))))) ;; remove immediately instead of waiting for on-exit + (debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key", i-am-srv: "i-am-srv) ;; am I the best-srv, compare server-keys to know - (if (equal? best-srv-key server-key) - (if (get-lock-db sdat db-file) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id) + (if i-am-srv + (if (get-lock-db sdat db-file (servdat-host sdat)(servdat-port sdat)) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id) (begin - (debug:print 0 *default-log-port* "I'm the server!") + (debug:print-info 0 *default-log-port* "I'm the server!") (servdat-dbfile-set! sdat db-file) (servdat-status-set! sdat 'db-locked)) (begin - (debug:print 0 *default-log-port* "I'm not the server, exiting.") + (debug:print-info 0 *default-log-port* "I'm not the server, exiting.") (bdat-time-to-exit-set! *bdat* #t) + (delete-pkt) (thread-sleep! 0.2) (exit))) (begin - (debug:print 0 *default-log-port* + (debug:print-info 0 *default-log-port* "Keys do not match "best-srv-key", "server-key", exiting.") (bdat-time-to-exit-set! *bdat* #t) - (delete-file* (conc (get-pkts-dir *toppath*) - "/" (servdat-uuid *server-info*) - ".pkt")) ;; remove immediately instead of waiting for on-exit + (delete-pkt) (thread-sleep! 0.2) (exit))) sdat)) (begin ;; sdat not yet contains server info (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) @@ -2041,42 +2053,47 @@ (exit)) (loop start-time (equal? sdat last-sdat) sdat)))))))) -(define (rmt:register-server remote apath iface port server-key dbname) - (rmt:open-main-connection remote apath) ;; we need a channel to main.db - (rmt:send-receive-real remote apath ;; params: host port servkey pid ipaddr dbpath +(define (rmt:register-server remdat apath iface port server-key dbname) + (remotedat-conns remdat) ;; just checking types + (rmt:open-main-connection remdat apath) ;; we need a channel to main.db + (rmt:send-receive-real remdat apath ;; params: host port servkey pid ipaddr dbpath (db:run-id->dbname #f) 'register-server `(,iface ,port ,server-key ,(current-process-id) ,iface ,apath ,dbname))) -(define (rmt:get-count-servers remote apath) - (rmt:open-main-connection remote apath) ;; we need a channel to main.db - (rmt:send-receive-real remote apath ;; params: host port servkey pid ipaddr dbpath +(define (rmt:get-count-servers remdat apath) + (remotedat-conns remdat) ;; just checking types + (rmt:open-main-connection remdat apath) ;; we need a channel to main.db + (rmt:send-receive-real remdat apath ;; params: host port servkey pid ipaddr dbpath (db:run-id->dbname #f) - 'get-count-servers `(,apath - ))) + 'get-count-servers `(,apath))) + +(define (rmt:get-servers-info apath) + (rmt:send-receive 'get-servers-info #f `(,apath))) -(define (rmt:deregister-server remote apath iface port server-key dbname) - (rmt:open-main-connection remote apath) ;; we need a channel to main.db - (rmt:send-receive-real remote apath ;; params: host port servkey pid ipaddr dbpath +(define (rmt:deregister-server remdat apath iface port server-key dbname) + (remotedat-conns remdat) ;; just checking types + (rmt:open-main-connection remdat apath) ;; we need a channel to main.db + (rmt:send-receive-real remdat apath ;; params: host port servkey pid ipaddr dbpath (db:run-id->dbname #f) 'deregister-server `(,iface ,port ,server-key ,(current-process-id) ,iface ,apath ,dbname))) -(define (http-transport:wait-for-stable-interface #!optional (num-tries-allowed 100)) +(define (rmt:wait-for-stable-interface #!optional (num-tries-allowed 100)) ;; wait until *server-info* stops changing (let* ((stime (current-seconds))) (let loop ((last-host #f) (last-port #f) (tries 0)) @@ -2083,17 +2100,17 @@ (let* ((curr-host (and *server-info* (servdat-host *server-info*))) (curr-port (and *server-info* (servdat-port *server-info*)))) ;; first we verify port and interface, update *server-info* in need be. (cond ((> tries num-tries-allowed) - (debug:print 0 *default-log-port* "http-transport:keep-running, giving up after trying for several minutes.") + (debug:print 0 *default-log-port* "rmt:keep-running, giving up after trying for several minutes.") (exit 1)) ((not *server-info*) (thread-sleep! 0.25) (loop curr-host curr-port (+ tries 1))) ((or (not last-host)(not last-port)) - (debug:print 0 *default-log-port* "http-transport:keep-running, still no interface, tries="tries) + (debug:print 0 *default-log-port* "rmt:keep-running, still no interface, tries="tries) (thread-sleep! 0.25) (loop curr-host curr-port (+ tries 1))) ((or (not (equal? last-host curr-host)) (not (equal? last-port curr-port))) (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") @@ -2111,36 +2128,46 @@ " AT " (current-seconds) " server signature: " *my-signature* " with "(servdat-trynum *server-info*)" port changes") (flush-output *default-log-port*) #t)))))) -;; run http-transport:keep-running in a parallel thread to monitor that the db is being +;; run rmt:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; (define (rmt:keep-running dbname) ;; if none running or if > 20 seconds since ;; server last used then start shutdown ;; This thread waits for the server to come alive (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") - (let* ((server-start-time (current-seconds)) + (let* ((remdat *remotedat*) + (server-start-time (current-seconds)) (pkts-dir (get-pkts-dir)) (server-key (rmt:get-signature)) ;; This servers key (is-main (equal? (args:get-arg "-db") ".db/main.db")) (last-access 0) - (server-timeout (server:expiration-timeout))) + (server-timeout (server:expiration-timeout)) + (shutdown-server-sequence (lambda (host port) + (set! *unclean-shutdown* #f) + (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) + (rmt:server-shutdown host port) + (portlogger:open-run-close portlogger:set-port port "released") + (exit))) + (timed-out? (lambda () + (<= (+ last-access server-timeout) + (current-seconds))))) + (servdat-dbfile-set! *server-info* (args:get-arg "-db")) ;; main and run db servers have both got wait logic (could/should merge it) (if is-main - (http-transport:wait-for-server pkts-dir dbname server-key) - (http-transport:wait-for-stable-interface)) + (rmt:wait-for-server pkts-dir dbname server-key) + (rmt:wait-for-stable-interface)) ;; this is our forever loop (let* ((iface (servdat-host *server-info*)) (port (servdat-port *server-info*))) (let loop ((count 0) (bad-sync-count 0) (start-time (current-milliseconds))) - (if (and (not is-main) (common:low-noise-print 60 "servdat-status")) (debug:print-info 0 *default-log-port* "servdat-status is " (servdat-status *server-info*))) ;; set up the database handle @@ -2150,18 +2177,28 @@ (debug:print 0 *default-log-port* "SERVER: dbprep") (db:setup dbname) ;; sets *dbstruct-db* as side effect (servdat-status-set! *server-info* 'db-opened) ;; IFF I'm not main, call into main and register self (if (not is-main) - (let ((res (rmt:register-server *rmt:remote* + (let ((res (rmt:register-server remdat *toppath* iface port server-key dbname))) (if res ;; we are the server (servdat-status-set! *server-info* 'have-interface-and-db) - (begin - (debug:print 0 *default-log-port* "We are not the server for "dbname", exiting.") - (exit))))) + ;; now check that the db locker is alive, clear it out if not + (let* ((serv-info (rmt:get-server-info *toppath* dbname))) + (match serv-info + ((host port servkey pid ipaddr apath dbpath) + (if (not (server-ready? host port servkey)) + (begin + (debug:print-info 0 *default-log-port* "Server registered but not alive. Removing and trying again.") + (rmt:deregister-server remdat apath host port servkey dbpath) ;; servkey pid ipaddr apath dbpath) + + (loop (+ count 1) bad-sync-count start-time)))) + (else + (debug:print 0 *default-log-port* "We are not the server for "dbname", exiting. Server info is: "serv-info) + (exit))))))) (debug:print 0 *default-log-port* "SERVER: running, db "dbname" opened, megatest version: " (common:get-full-version)) ;; start the watchdog @@ -2203,26 +2240,28 @@ (begin (debug:print 0 *default-log-port* "Server stats:") (db:print-current-query-stats))) (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) (cond + ((not *server-run*) + (debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.") + (shutdown-server-sequence (get-host-name) port)) + ((timed-out?) + (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) + (shutdown-server-sequence (get-host-name) port)) ((and *server-run* - (> (+ last-access server-timeout) - (current-seconds)) - (if is-main - (> (rmt:get-count-servers *rmt:remote* *toppath*) 1) - #t)) + (or (not (timed-out?)) + (if is-main ;; do not exit if there are other servers (keep main open until all others gone) + (> (rmt:get-count-servers remdat *toppath*) 1) + #f))) (if (common:low-noise-print 120 "server continuing") (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))) (loop 0 bad-sync-count (current-milliseconds))) (else (set! *unclean-shutdown* #f) (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) - (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) - (rmt:server-shutdown) - (portlogger:open-run-close portlogger:set-port port "released") - (exit) + (shutdown-server-sequence (get-host-name) port) #;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: " (open-send-receive-nn (conc iface":"port) ;; do this here and not in server-shutdown (sexpr->string 'quit))) ))))))) @@ -2292,78 +2331,85 @@ ;;start a server, returns the connection ;; (define (start-nn-server portnum ) (let ((rep (make-rep-socket))) ;; (nn-socket 'rep))) (socket-set! rep 'nng/recvtimeo 2000) - (handle-exceptions + (handle-exceptions ;; why have exception handler here? exn (let ((emsg ((condition-property-accessor 'exn 'message) exn))) (print "ERROR: Failed to start server \"" emsg "\"") (exit 1)) (nng-dial #;nn-bind rep (conc "tcp://*:" portnum))) rep)) -;; open connection to server, send message, close connection -;; -(define (open-send-close-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds - (let ((req (make-req-socket 'req)) - (uri (conc "tcp://" host-port)) - (res #f) - ;; (contacts (alist-ref 'contact attrib)) - ;; (mode (alist-ref 'mode attrib)) - ) +(define (open-nn-connection host-port) + (let ((req (make-req-socket)) + (uri (conc "tcp://" host-port))) + (nng-dial req uri) (socket-set! req 'nng/recvtimeo 2000) - (handle-exceptions - exn - (let ((emsg ((condition-property-accessor 'exn 'message) exn))) - ;; Send notification - (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\"" ) - #f) - (nng-dial req uri) - ;; (print "Connected to the server " ) - (nng-send req msg) - ;; (print "Request Sent") - (let* ((th1 (make-thread (lambda () - (let ((resp (nng-recv req))) - (nng-close! req) - (set! res (if (equal? resp "ok") - #t - #f)))) - "recv thread")) - (th2 (make-thread (lambda () - (thread-sleep! timeout) - (thread-terminate! th1)) - "timer thread"))) - (thread-start! th1) - (thread-start! th2) - (thread-join! th1) - res)))) - + req)) + +(define (send-receive-nn req msg) + (nng-send req msg) + (nng-recv req)) + +(define (close-nn-connection req) + (nng-close! req)) + +;; ;; open connection to server, send message, close connection +;; ;; +;; (define (open-send-close-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds +;; (let ((req (make-req-socket 'req)) +;; (uri (conc "tcp://" host-port)) +;; (res #f) +;; ;; (contacts (alist-ref 'contact attrib)) +;; ;; (mode (alist-ref 'mode attrib)) +;; ) +;; (socket-set! req 'nng/recvtimeo 2000) +;; (handle-exceptions +;; exn +;; (let ((emsg ((condition-property-accessor 'exn 'message) exn))) +;; ;; Send notification +;; (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\"" ) +;; #f) +;; (nng-dial req uri) +;; ;; (print "Connected to the server " ) +;; (nng-send req msg) +;; ;; (print "Request Sent") +;; (let* ((th1 (make-thread (lambda () +;; (let ((resp (nng-recv req))) +;; (nng-close! req) +;; (set! res (if (equal? resp "ok") +;; #t +;; #f)))) +;; "recv thread")) +;; (th2 (make-thread (lambda () +;; (thread-sleep! timeout) +;; (thread-terminate! th1)) +;; "timer thread"))) +;; (thread-start! th1) +;; (thread-start! th2) +;; (thread-join! th1) +;; res)))) +;; (define (open-send-receive-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds (let ((req (make-req-socket)) (uri (conc "tcp://" host-port)) - (res #f) - ;; (contacts (alist-ref 'contact attrib)) - ;; (mode (alist-ref 'mode attrib)) - ) + (res #f)) (handle-exceptions exn (let ((emsg ((condition-property-accessor 'exn 'message) exn))) ;; Send notification (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\", exn=" exn) #f) (nng-dial req uri) - ;; (print "Connected to the server " ) (nng-send req msg) - ;; (print "Request Sent") - ;; receive code here - ;;(print (nn-recv req)) (let* ((th1 (make-thread (lambda () (let ((resp (nng-recv req))) (nng-close! req) - (print resp) + ;; (print resp) (set! res resp))) "recv thread")) (th2 (make-thread (lambda () (thread-sleep! timeout) (thread-terminate! th1)) @@ -2457,17 +2503,17 @@ ;; '(/ "loop-test")) ;; (send-response body: (alist-ref 'data ($)) ;; headers: '((content-type text/plain)))) ;; ((equal? (uri-path (request-uri (current-request))) ;; '(/ "")) -;; (send-response body: ((http-get-function 'http-transport:main-page)))) +;; (send-response body: ((http-get-function 'rmt:main-page)))) ;; ((equal? (uri-path (request-uri (current-request))) ;; '(/ "json_api")) -;; (send-response body: ((http-get-function 'http-transport:main-page)))) +;; (send-response body: ((http-get-function 'rmt:main-page)))) ;; ((equal? (uri-path (request-uri (current-request))) ;; '(/ "runs")) -;; (send-response body: ((http-get-function 'http-transport:main-page)))) +;; (send-response body: ((http-get-function 'rmt:main-page)))) ;; ((equal? (uri-path (request-uri (current-request))) ;; '(/ any)) ;; (send-response body: "hey there!\n" ;; headers: '((content-type text/plain)))) ;; ((equal? (uri-path (request-uri (current-request))) @@ -2474,16 +2520,16 @@ ;; '(/ "hey")) ;; (send-response body: "hey there!\n" ;; headers: '((content-type text/plain)))) ;; ((equal? (uri-path (request-uri (current-request))) ;; '(/ "jquery3.1.0.js")) -;; (send-response body: ((http-get-function 'http-transport:show-jquery)) +;; (send-response body: ((http-get-function 'rmt:show-jquery)) ;; headers: '((content-type application/javascript)))) ;; ((equal? (uri-path (request-uri (current-request))) ;; '(/ "test_log")) -;; (send-response body: ((http-get-function 'http-transport:html-test-log) $) +;; (send-response body: ((http-get-function 'rmt:html-test-log) $) ;; headers: '((content-type text/HTML)))) ;; ((equal? (uri-path (request-uri (current-request))) ;; '(/ "dashboard")) -;; (send-response body: ((http-get-function 'http-transport:html-dboard) $) +;; (send-response body: ((http-get-function 'rmt:html-dboard) $) ;; headers: '((content-type text/HTML)))) ;; (else (continue)))))))) Index: stml2.scm ================================================================== --- stml2.scm +++ stml2.scm @@ -18,8 +18,10 @@ ;;====================================================================== (declare (unit stml2)) (declare (uses cookie)) +(declare (uses dbi)) +(declare (uses autoload)) (include "stml2/stml2.scm") Index: stml2/formdat.scm ================================================================== --- stml2/formdat.scm +++ stml2/formdat.scm @@ -10,12 +10,11 @@ ;; (declare (unit formdat)) (module formdat * -(import chicken scheme data-structures extras srfi-13 ports ) -(use html-filter) +(import chicken scheme data-structures extras srfi-13 ports html-filter) -(use regex) -(require-extension srfi-69) +(import regex) +(import srfi-69) ) Index: stml2/html-filter.scm ================================================================== --- stml2/html-filter.scm +++ stml2/html-filter.scm @@ -11,11 +11,11 @@ (module html-filter * (import chicken scheme data-structures extras srfi-13 ports ) -(use misc-stml) +(import misc-stml) -(require-extension regex) +(import regex) ;; ) Index: stml2/misc-stml.scm ================================================================== --- stml2/misc-stml.scm +++ stml2/misc-stml.scm @@ -16,9 +16,8 @@ (module misc-stml * (import chicken scheme data-structures extras srfi-13 ports posix) -(use regex (prefix dbi dbi:)) -(use (prefix crypt c:)) -(use (prefix dbi dbi:)) +(import regex (prefix dbi dbi:)) +(import (prefix crypt c:)) ) Index: stml2/rollup-pages.scm ================================================================== --- stml2/rollup-pages.scm +++ stml2/rollup-pages.scm @@ -1,6 +1,6 @@ -(use regex posix srfi-69 srfi-1) +(import regex posix srfi-69 srfi-1) (define extract-rx (regexp "pages\\/(.*)_(view|ctrl).scm")) (define (print-page-wrapper lookup page) (print "(define (pages:" page " session db shared)") Index: stml2/session.scm ================================================================== --- stml2/session.scm +++ stml2/session.scm @@ -11,10 +11,9 @@ (module session * (import chicken scheme data-structures extras srfi-13 ports posix files srfi-1) -(use (prefix dbi dbi:) srfi-69) -(require-extension regex) -(use cookie stmlcommon) ;; (declare (uses cookie)) +(import (prefix dbi dbi:) srfi-69 regex) +(import cookie stmlcommon) ;; (declare (uses cookie)) ) Index: stml2/setup.scm ================================================================== --- stml2/setup.scm +++ stml2/setup.scm @@ -9,13 +9,12 @@ (module setup * (import chicken scheme data-structures extras srfi-13 ports posix) -(uses session misc-stml) +(import session misc-stml) ;; (declare (unit setup))se ;; (declare (uses session)) -(require-extension srfi-69) -(require-extension regex) +(import srfi-69 regex) ) Index: stml2/spiffyserver.scm ================================================================== --- stml2/spiffyserver.scm +++ stml2/spiffyserver.scm @@ -1,8 +1,8 @@ ;; This doesn't work yet ;; -(use spiffy cgi-handler) +(import spiffy cgi-handler) (spiffy-debug-mode #t) (spiffy-file-ext-handlers `(("drcdb" . ,(cgi-handler* "/path/to/drcdb")))) Index: stml2/sqlite3.scm ================================================================== --- stml2/sqlite3.scm +++ stml2/sqlite3.scm @@ -9,11 +9,11 @@ ;; ;; I used this to get a simple interactive sqlite editor on the nokia n800 ;; since I couldn't get sqlite3 to install (for reasons I can't remember). -(use sqlite3) +(import sqlite3) (define args (argv)) (define num-args (length args)) (define dbname #f) Index: stml2/stmlcommon.scm ================================================================== --- stml2/stmlcommon.scm +++ stml2/stmlcommon.scm @@ -13,8 +13,8 @@ (module stmlcommon * (import chicken scheme data-structures extras srfi-13 ports posix) -(use (prefix dbi dbi:) regex (prefix crypt c:) srfi-69) +(import (prefix dbi dbi:) regex (prefix crypt c:) srfi-69) ) Index: stml2/stmlrun.scm ================================================================== --- stml2/stmlrun.scm +++ stml2/stmlrun.scm @@ -11,9 +11,9 @@ ;; (require-extension syntax-case) ;; (declare (run-time-macros)) ;; (include "stmlcommon.scm") -(require-library stml) +(import stml) (stml:main #f) Index: stml2/test.scm ================================================================== --- stml2/test.scm +++ stml2/test.scm @@ -1,8 +1,7 @@ -(use test md5) +(import test md5) -(require-extension sqlite3) (import (prefix sqlite3 sqlite3:)) (require-library dbi) ;; (declare (uses stml)) Index: tests/Makefile ================================================================== --- tests/Makefile +++ tests/Makefile @@ -40,16 +40,16 @@ TARGET = "ubuntu/nfs/none" all : build unit test4 # test1 test2 test3 test4 test5 test6 test7 test8 test9 -unit : basicserver.log server.log +unit : basicserver.log server.log all-rmt.log # all-rmt.log all-api.log # runs.log misc.log tests.log # inter dependencies on the unit tests, I wish these could be "suggestions" -all-rmt.log : all-api.log +# all-rmt.log : all-api.log rel : cd release;dashboard -rows 25 & ## basicserver.log : unittests/basicserver.scm ADDED tests/simplerun/Makefile Index: tests/simplerun/Makefile ================================================================== --- /dev/null +++ tests/simplerun/Makefile @@ -0,0 +1,3 @@ + +cleanup : + killall mtest -v -9;rm -rf .meta .db Index: tests/simplerun/megatest.config ================================================================== --- tests/simplerun/megatest.config +++ tests/simplerun/megatest.config @@ -36,11 +36,11 @@ state start end completed # Job tools are more advanced ways to control how your jobs are launched [jobtools] useshell yes -launcher nbfind +launcher nbfake # You can override environment variables for all your tests here [env-override] EXAMPLE_VAR example value ADDED tests/simplerun/stress-test.scm Index: tests/simplerun/stress-test.scm ================================================================== --- /dev/null +++ tests/simplerun/stress-test.scm @@ -0,0 +1,102 @@ +;;====================================================================== +;; S E R V E R +;;====================================================================== +;; Copyright 2006-2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + + +;; Run like this: +;; +;; (cd ..;make && make install) && ./rununittest.sh server 1;(cd simplerun;megatest -stop-server 0) +(import big-chicken + chicken.random + test + srfi-18 + + rmtmod + trace + apimod + dbmod + launchmod + commonmod + ) + +(trace-call-sites #t) +(trace + + ;; db:get-dbdat + ;; rmt:find-main-server + ;; rmt:send-receive-real + ;; rmt:send-receive + ;; sexpr->string + ;; server-ready? + ;; rmt:register-server + ;; rmt:deregister-server + ;; rmt:open-main-connection + ;; rmt:general-open-connection + ;; rmt:get-conn + ;; common:watchdog + ;; rmt:find-main-server + ;; get-all-server-pkts + ;; get-viable-servers + ;; get-best-candidate + ;; api:run-server-process + ;; api:process-request + ;; rmt:run + ;; rmt:try-start-server + ) + + +(define *db* (db:setup ".db/main.db")) + +;; these let me cut and paste from source easily +(define apath *toppath*) +(define run-id (pseudo-random-integer 10)) +(define dbname (conc ".db/"run-id".db")) +(define remote *remotedat*) +(define keyvals '(("SYSTEM" "a")("RELEASE" "b"))) + +(test #f #t (rmt:open-main-connection remote apath)) +(test #f #t (conndat? (rmt:get-conn *remotedat* *toppath* ".db/main.db"))) +(test #f #t (conndat? (rmt:get-conn *remotedat* *toppath* ".db/main.db"))) +(test #f dbname (list-ref (rmt:send-receive-real *remotedat* *toppath* ".db/main.db" 'get-server `(,apath ,dbname)) + 6)) + +(thread-sleep! 2) +(test #f #t (rmt:general-open-connection *remotedat* *toppath* dbname)) + +(let loop ((end-time (+ (current-seconds) 600))) + (test #f #t (list? (rmt:get-servers-info *toppath*))) + + (test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) + (test #f 1 (rmt:send-receive 'register-run #f (list keyvals "run1" "new" "n/a" "justme" #f))) + ;; (print "Got here.") + + (test #f 1 (rmt:send-receive 'register-run 1 (list keyvals "run1" "new" "n/a" "justme" #f))) + + (test #f 2 (rmt:register-run keyvals "run2" "new" "n/a" "justme" #f)) + ;; (test #f 2 (rmt:deregister-server *remotedat* *toppath* iface port server-key dbname + + (test #f #t (number? (rmt:get-count-servers *remotedat* *toppath*))) + + (test #f "run2" (rmt:get-run-name-from-id 2)) + (test #f #f (rmt:send-receive 'get-test-info-by-id 2 '(2 1))) + + (test #f #t (rmt:general-call 'update-cpuload-diskfree 2 1.5 1e6 1)) + (if (< (current-seconds) end-time)(loop end-time))) + +(exit) Index: tests/tests.scm ================================================================== --- tests/tests.scm +++ tests/tests.scm @@ -15,11 +15,18 @@ ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') -(import srfi-18 test) +(import srfi-18 + test + chicken.string + chicken.process-context + chicken.file + chicken.pretty-print + commonmod + ) (define test-work-dir (current-directory)) ;; given list of lists ;; ( ( msg expected param1 param2 ...) Index: tests/unittests/all-rmt.scm ================================================================== --- tests/unittests/all-rmt.scm +++ tests/unittests/all-rmt.scm @@ -29,74 +29,76 @@ ;; NTN - no test needed ;; DEP - function is deprecated, no point in testing ;; NED - function nested under others, no test needed. ;; DEF - deferred -(print "start dir: " (current-directory)) - -(define toppath (current-directory)) - -(test #f #f (server:check-if-running toppath)) ;; these are used by server:start-and-wait -(test #f #t (list? (server:get-list toppath))) -(test #f '() (server:get-best '())) -(test #f #t (common:simple-file-lock-and-wait "test.lock" expire-time: 15)) -(test #f "test.lock" (common:simple-file-release-lock "test.lock")) -(test #f #t (server:get-best-guess-address (get-host-name))) -(test #f #t (string? (common:get-homehost))) - -;; clean out any old running servers -;; -(let ((servers (server:get-list toppath))) - (print "Known servers: " servers) - (if (not (null? servers)) - (begin - (for-each - (lambda (server) - (let ((pid (list-ref server 4))) - (thread-start! - (make-thread - (lambda () - (print "Attempting to kill server: " server) - (print "Attempting to kill pid " pid) - (system (conc "kill " pid)) - (thread-sleep! 2) - (system (conc "kill -9 " pid))) - (conc pid))))) - servers) - (thread-sleep! 2)))) -;; let's start up a server the mechanical way -(system "nbfake megatest -server -") -(thread-sleep! 2) -;; (test #f #t (string? (server:start-and-wait *toppath*))) - -(test "setup for run" #t (begin (launch:setup) - (string? (getenv "MT_RUN_AREA_HOME")))) -(test #f #t (client:setup-http toppath)) -(test #f #t (vector? (client:setup toppath))) - -(test #f #t (vector? (rmt:get-connection-info toppath))) ;; TODO: push areapath down. -(test #f #t (string? (server:check-if-running "."))) -;; DEF (test #f #f (rmt:send-receive-no-auto-client-setup *runremote* 'get-keys #f '())) -;; DEF (rmt:kill-server run-id) -;; DEF (rmt:start-server run-id) -(test #f '(#t "successful login")(rmt:login #f)) -;; DEF (rmt:login-no-auto-client-setup connection-info) -(test #f #t (pair? (rmt:get-latest-host-load (get-host-name)))) - -;; get-latest-host-load does a lookup in the db, it won't return a useful value unless -;; a test ran recently on host -(test-batch rmt:get-latest-host-load - "rmt:get-latest-host-load" - (list (list "localhost" #t (get-host-name)) - (list "not-a-host" #t "not-a-host" )) - post-proc: pair?) - -(test #f #t (list? (rmt:get-changed-record-ids 0))) - -(test #f #f (begin (runs:update-all-test_meta #f) #f)) - -(test #f '("test1" "test2")(sort (alist-ref "tagtwo" (rmt:get-tests-tags) equal?) string<=)) +(import big-chicken rmtmod apimod runsmod) + +(print "start dir: " (current-directory)) +;; +(define toppath (current-directory)) +;; +;; (test #f #f (server:check-if-running toppath)) ;; these are used by server:start-and-wait +;; (test #f #t (list? (server:get-list toppath))) +;; (test #f '() (server:get-best '())) +;; (test #f #t (common:simple-file-lock-and-wait "test.lock" expire-time: 15)) +;; (test #f "test.lock" (common:simple-file-release-lock "test.lock")) +;; (test #f #t (server:get-best-guess-address (get-host-name))) +;; (test #f #t (string? (common:get-homehost))) +;; +;; ;; clean out any old running servers +;; ;; +;; (let ((servers (server:get-list toppath))) +;; (print "Known servers: " servers) +;; (if (not (null? servers)) +;; (begin +;; (for-each +;; (lambda (server) +;; (let ((pid (list-ref server 4))) +;; (thread-start! +;; (make-thread +;; (lambda () +;; (print "Attempting to kill server: " server) +;; (print "Attempting to kill pid " pid) +;; (system (conc "kill " pid)) +;; (thread-sleep! 2) +;; (system (conc "kill -9 " pid))) +;; (conc pid))))) +;; servers) +;; (thread-sleep! 2)))) +;; ;; let's start up a server the mechanical way +;; (system "nbfake megatest -server -") +;; (thread-sleep! 2) +;; ;; (test #f #t (string? (server:start-and-wait *toppath*))) +;; +;; (test "setup for run" #t (begin (launch:setup) +;; (string? (getenv "MT_RUN_AREA_HOME")))) +;; (test #f #t (client:setup-http toppath)) +;; (test #f #t (vector? (client:setup toppath))) +;; +;; (test #f #t (vector? (rmt:get-connection-info toppath))) ;; TODO: push areapath down. +;; (test #f #t (string? (server:check-if-running "."))) +;; ;; DEF (test #f #f (rmt:send-receive-no-auto-client-setup *runremote* 'get-keys #f '())) +;; ;; DEF (rmt:kill-server run-id) +;; ;; DEF (rmt:start-server run-id) +;; (test #f '(#t "successful login")(rmt:login #f)) +;; ;; DEF (rmt:login-no-auto-client-setup connection-info) +;; (test #f #t (pair? (rmt:get-latest-host-load (get-host-name)))) +;; +;; ;; get-latest-host-load does a lookup in the db, it won't return a useful value unless +;; ;; a test ran recently on host +;; (test-batch rmt:get-latest-host-load +;; "rmt:get-latest-host-load" +;; (list (list "localhost" #t (get-host-name)) +;; (list "not-a-host" #t "not-a-host" )) +;; post-proc: pair?) +;; +;; (test #f #t (list? (rmt:get-changed-record-ids 0))) +;; +(test #f #f (begin (runs:update-all-test_meta #f) #f)) + +(test #f '("test1" "test2")(sort (alist-ref "tagtwo" (rmt:get-tests-tags) equal?) string<=?)) (test #f '() (rmt:get-key-val-pairs 0)) (test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) (test #f '("SYSTEM" "RELEASE") (rmt:get-keys-write)) ;; dummy query to force server start (test #f '() (rmt:get-key-vals 1)) (test #f (vector '("SYSTEM" "RELEASE") '()) (rmt:get-targets)) @@ -131,11 +133,11 @@ (test #f #f (begin (rmt:set-state-status-and-roll-up-items 1 "foo" "" "COMPLETED" "FAIL" "Just yet another message") #f)) (test #f #t (rmt:top-test-set-per-pf-counts 1 "foo")) (test #f '() (rmt:get-raw-run-stats 1)) (test #f #t (vector? (rmt:get-run-info 1))) (test #f 0 (rmt:get-num-runs "%")) -(define keypatts '(("SYSTEM" "ubuntu")("RELEASE" "v1.234")) ) +(define keypatts '(("SYSTEM" "ubuntu")("RELEASE" "v1.234"))) (test #f 1 (rmt:register-run '(("SYSTEM" "ubuntu")("RELEASE" "v1.234")) "bar" "NEW" "JUSTFINE" "bobafett" "quick")) (test #f "bar" (rmt:get-run-name-from-id 1)) (test #f #t (begin (rmt:delete-run 2) #t)) ;; delete a non-existant run (test #f #t (begin (rmt:update-run-stats 1 '()) #t)) (test #f #t (begin (rmt:delete-old-deleted-test-records) #t)) @@ -166,50 +168,51 @@ (test #f '(("Totals" "UNKNOWN" 1) ("bar" "UNKNOWN" 1)) (begin (rmt:get-run-stats))) (test #f #t (begin (rmt:set-run-state-status 1 "COMPLETE" "PASS") #t)) (test #f '"COMPLETE" (rmt:get-run-state 1)) (test #f '"PASS" (rmt:get-run-status 1)) -(test #f #t (begin (rmt:set-var "foo" "bar")#t)) -(test #f "bar" (rmt:get-var "foo")) +(test #f #t (begin (rmt:set-var 1 "foo" "bar")#t)) +(test #f "bar" (rmt:get-var 1 "foo")) (test #f #t (begin (rmt:print-db-stats) #t)) -(test #f #t (begin (rmt:del-var "foo") #t)) -(test #f #f (rmt:get-var "foo")) +(test #f #t (begin (rmt:del-var 1 "foo") #t)) +(test #f #f (rmt:get-var 1 "foo")) (test #f (vector #f #f #f #f #f #f #f #f #f #f #f #f) (rmt:get-data-info-by-id 1)) (test #f '() (rmt:get-key-vals 1)) (test #f "ubuntu/v1.234" (rmt:get-target 1)) (print (rmt:get-run-info 1)) (test #f '((runs) (tests) (test_steps) (test_data)) (rmt:get-run-record-ids "ubuntu/v1.234" 1 '("fail_count") "bar")) -;; (rmt:find-and-mark-incomplete run-id ovr-deadtime) -;; (rmt:get-main-run-stats run-id) -;; (rmt:get-var varname) -;; (rmt:set-var varname value) -;; (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f)) -;; (rmt:get-previous-test-run-record run-id test-name item-path) -;; (rmt:get-run-stats) -;; (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile) -;; (rmt:get-steps-for-test run-id test-id) -;; (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) -;; (rmt:testmeta-add-record testname) -;; (rmt:testmeta-get-record testname) -;; (rmt:testmeta-update-field test-name fld val) -;; (rmt:test-data-rollup run-id test-id status) -;; (rmt:csv->test-data run-id test-id csvdata) -;; (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt) -;; (rmt:tasks-add action owner target runname testpatt params) -;; (rmt:tasks-set-state-given-param-key param-key new-state) -;; (rmt:tasks-get-last target runname) -;; (rmt:archive-get-allocations testname itempath dneeded) -;; (rmt:archive-register-block-name bdisk-id archive-path) -;; (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey) -;; (rmt:archive-register-disk bdisk-name bdisk-path df) -;; (rmt:test-set-archive-block-id run-id test-id archive-block-id) -;; (rmt:test-get-archive-block-info archive-block-id) -;; NED (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) -;; NED (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected -;; DEF (test #f #f (rmt:print-db-stats)) -;; DEF (rmt:get-max-query-average run-id) -;; NED (rmt:general-call stmtname run-id . params) -;; DEP (rmt:sdb-qry qry val run-id) -;; DEF (rmt:runtests user run-id testpatt params) -;; DEP (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f)) -;; DEP (rmt:synchash-get run-id proc synckey keynum params) -;; DEP (test #f #f (rmt:update-pass-fail-counts 1 "foo")) +;; ;; (rmt:find-and-mark-incomplete run-id ovr-deadtime) +;; ;; (rmt:get-main-run-stats run-id) +;; ;; (rmt:get-var varname) +;; ;; (rmt:set-var varname value) +;; ;; (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f)) +;; ;; (rmt:get-previous-test-run-record run-id test-name item-path) +;; ;; (rmt:get-run-stats) +;; ;; (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile) +;; ;; (rmt:get-steps-for-test run-id test-id) +;; ;; (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) +;; ;; (rmt:testmeta-add-record testname) +;; ;; (rmt:testmeta-get-record testname) +;; ;; (rmt:testmeta-update-field test-name fld val) +;; ;; (rmt:test-data-rollup run-id test-id status) +;; ;; (rmt:csv->test-data run-id test-id csvdata) +;; ;; (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt) +;; ;; (rmt:tasks-add action owner target runname testpatt params) +;; ;; (rmt:tasks-set-state-given-param-key param-key new-state) +;; ;; (rmt:tasks-get-last target runname) +;; ;; (rmt:archive-get-allocations testname itempath dneeded) +;; ;; (rmt:archive-register-block-name bdisk-id archive-path) +;; ;; (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey) +;; ;; (rmt:archive-register-disk bdisk-name bdisk-path df) +;; ;; (rmt:test-set-archive-block-id run-id test-id archive-block-id) +;; ;; (rmt:test-get-archive-block-info archive-block-id) +;; ;; NED (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) +;; ;; NED (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected +;; ;; DEF (test #f #f (rmt:print-db-stats)) +;; ;; DEF (rmt:get-max-query-average run-id) +;; ;; NED (rmt:general-call stmtname run-id . params) +;; ;; DEP (rmt:sdb-qry qry val run-id) +;; ;; DEF (rmt:runtests user run-id testpatt params) +;; ;; DEP (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f)) +;; ;; DEP (rmt:synchash-get run-id proc synckey keynum params) +;; ;; DEP (test #f #f (rmt:update-pass-fail-counts 1 "foo")) +;; Index: tests/unittests/basicserver.scm ================================================================== --- tests/unittests/basicserver.scm +++ tests/unittests/basicserver.scm @@ -21,11 +21,11 @@ ;; Run like this: ;; ;; ./rununittest.sh server 1;(cd simplerun;megatest -stop-server 0) (import rmtmod trace http-client apimod dbmod - launchmod) + launchmod srfi-69) (trace-call-sites #t) (trace ;; db:get-dbdat ;; rmt:find-main-server @@ -45,20 +45,20 @@ ;; api:run-server-process ;; rmt:run ;; rmt:try-start-server ) -(test #f #t (rmt:remote? (let ((r (make-rmt:remote))) - (set! *rmt:remote* r) - r))) -(test #f #f (rmt:get-conn *rmt:remote* *toppath* ".db/main.db")) +(test #f #t (remotedat? (let ((r (make-remotedat))) + (set! *remotedat* r) + r))) +(test #f #f (rmt:get-conn *remotedat* *toppath* ".db/main.db")) (test #f #f (rmt:find-main-server *toppath* ".db/main.db")) -(test #f #t (rmt:open-main-connection *rmt:remote* *toppath*)) -(pp (hash-table->alist (rmt:remote-conns *rmt:remote*))) -(test #f #t (rmt:conn? (rmt:get-conn *rmt:remote* *toppath* ".db/main.db"))) +(test #f #t (rmt:open-main-connection *remotedat* *toppath*)) +(pp (hash-table->alist (remotedat-conns *remotedat*))) +(test #f #t (conndat? (rmt:get-conn *remotedat* *toppath* ".db/main.db"))) -(define *main* (rmt:get-conn *rmt:remote* *toppath* ".db/main.db")) +(define *main* (rmt:get-conn *remotedat* *toppath* ".db/main.db")) ;; (for-each (lambda (tdat) ;; (test #f tdat (loop-test (rmt:conn-ipaddr *main*) ;; (rmt:conn-port *main*) tdat))) ;; (list 'a @@ -68,13 +68,13 @@ (define *db* (db:setup ".db/main.db")) ;; these let me cut and paste from source easily (define apath *toppath*) (define dbname ".db/2.db") -(define remote *rmt:remote*) +(define remote *remotedat*) (define keyvals '(("SYSTEM" "a")("RELEASE" "b"))) (test #f '() (string->sexpr "()")) (test #f 'server-started (api:execute-requests *db* 'get-server (list *toppath* ".db/2.db"))) (set! *dbstruct-db* #f) (exit) Index: tests/unittests/server.scm ================================================================== --- tests/unittests/server.scm +++ tests/unittests/server.scm @@ -20,19 +20,20 @@ ;; Run like this: ;; ;; (cd ..;make && make install) && ./rununittest.sh server 1;(cd simplerun;megatest -stop-server 0) -(import rmtmod trace http-client apimod dbmod +(import big-chicken rmtmod trace http-client apimod dbmod launchmod) (trace-call-sites #t) (trace + ;; db:get-dbdat ;; rmt:find-main-server -;; rmt:send-receive-real -;; rmt:send-receive + ;; rmt:send-receive-real + ;; rmt:send-receive ;; sexpr->string ;; server-ready? ;; rmt:register-server ;; rmt:deregister-server ;; rmt:open-main-connection @@ -52,31 +53,38 @@ (define *db* (db:setup ".db/main.db")) ;; these let me cut and paste from source easily (define apath *toppath*) (define dbname ".db/2.db") -(define remote *rmt:remote*) +(define remote *remotedat*) (define keyvals '(("SYSTEM" "a")("RELEASE" "b"))) (test #f #t (rmt:open-main-connection remote apath)) -(test #f #t (rmt:conn? (rmt:get-conn *rmt:remote* *toppath* ".db/main.db"))) -(test #f #t (rmt:conn? (rmt:get-conn *rmt:remote* *toppath* ".db/main.db"))) -(test #f 'server-started (rmt:send-receive-real *rmt:remote* *toppath* ".db/main.db" 'get-server `(,apath ,dbname))) +(test #f #t (conndat? (rmt:get-conn *remotedat* *toppath* ".db/main.db"))) +(test #f #t (conndat? (rmt:get-conn *remotedat* *toppath* ".db/main.db"))) +(test #f ".db/2.db" (list-ref (rmt:send-receive-real *remotedat* *toppath* ".db/main.db" 'get-server `(,apath ,dbname)) + 6)) (thread-sleep! 2) -(test #f #t (list? (rmt:general-open-connection *rmt:remote* *toppath* ".db/2.db"))) - -(test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) -(test #f 1 (rmt:send-receive 'register-run #f (list keyvals "run1" "new" "n/a" "justme" #f))) -(print "Got here.") - -(test #f 1 (rmt:send-receive 'register-run 1 (list keyvals "run1" "new" "n/a" "justme" #f))) - -(test #f 2 (rmt:register-run keyvals "run2" "new" "n/a" "justme" #f)) -;; (test #f 2 (rmt:deregister-server *rmt:remote* *toppath* iface port server-key dbname - -(test #f 2 (rmt:get-count-servers *rmt:remote* *toppath*)) - -(test #f "run2" (rmt:get-run-name-from-id 2)) - -;; (exit) - +(test #f #t (rmt:general-open-connection *remotedat* *toppath* ".db/2.db")) + +;; (let loop ((end-time (+ (current-seconds) 61))) + (test #f #t (list? (rmt:get-servers-info *toppath*))) + + (test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) + (test #f 1 (rmt:send-receive 'register-run #f (list keyvals "run1" "new" "n/a" "justme" #f))) + ;; (print "Got here.") + + (test #f 1 (rmt:send-receive 'register-run 1 (list keyvals "run1" "new" "n/a" "justme" #f))) + + (test #f 2 (rmt:register-run keyvals "run2" "new" "n/a" "justme" #f)) + ;; (test #f 2 (rmt:deregister-server *remotedat* *toppath* iface port server-key dbname + + (test #f 2 (rmt:get-count-servers *remotedat* *toppath*)) + + (test #f "run2" (rmt:get-run-name-from-id 2)) + (test #f #f (rmt:send-receive 'get-test-info-by-id 2 '(2 1))) + + (test #f #t (rmt:general-call 'update-cpuload-diskfree 2 1.5 1e6 1)) +;; (if (< (current-seconds) end-time)(loop end-time))) + +(exit) Index: testsmod.scm ================================================================== --- testsmod.scm +++ testsmod.scm @@ -26,10 +26,11 @@ (declare (uses itemsmod)) (declare (uses rmtmod)) (declare (uses stml2)) (declare (uses dbmod)) (declare (uses tasksmod)) +(declare (uses dbi)) (module testsmod * (import scheme ADDED ulex/ulex-prev-orig.scm Index: ulex/ulex-prev-orig.scm ================================================================== --- /dev/null +++ ulex/ulex-prev-orig.scm @@ -0,0 +1,2252 @@ +;; ulex: Distributed sqlite3 db +;;; +;; Copyright (C) 2018 Matt Welland +;; Redistribution and use in source and binary forms, with or without +;; modification, is permitted. +;; +;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS +;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE +;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +;; OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +;; BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +;; USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +;; DAMAGE. + +;;====================================================================== +;; ABOUT: +;; See README in the distribution at https://www.kiatoa.com/fossils/ulex +;; NOTES: +;; Why sql-de-lite and not say, dbi? - performance mostly, then simplicity. +;; +;;====================================================================== + +(use mailbox) + +(module ulex + * + +(import scheme posix chicken data-structures ports extras files mailbox) +(import srfi-18 pkts matchable regex + typed-records srfi-69 srfi-1 + srfi-4 regex-case + (prefix sqlite3 sqlite3:) + foreign + tcp6 + ;; ulex-netutil + hostinfo + ) + +;; make it a global? Well, it is local to area module + +(define *captain-pktspec* + `((captain (host . h) + (port . p) + (pid . i) + (ipaddr . a) + ) + #;(data (hostname . h) ;; sender hostname + (port . p) ;; sender port + (ipaddr . a) ;; sender ip + (hostkey . k) ;; sending host key - store info at server under this key + (servkey . s) ;; server key - this needs to match at server end or reject the msg + (format . f) ;; sb=serialized-base64, t=text, sx=sexpr, j=json + (data . d) ;; base64 encoded slln data + ))) + +;; struct for keeping track of our world + +(defstruct udat + ;; captain info + (captain-address #f) + (captain-host #f) + (captain-port #f) + (captain-pid #f) + (captain-lease 0) ;; time (unix epoc) seconds when the lease is up + (ulex-dir (conc (get-environment-variable "HOME") "/.ulex")) + (cpkts-dir (conc (get-environment-variable "HOME") "/.ulex/pkts")) + (cpkt-spec *captain-pktspec*) + ;; this processes info + (my-cpkt-key #f) ;; put Z card here when I create a pkt for myself as captain + (my-address #f) + (my-hostname #f) + (my-port #f) + (my-pid (current-process-id)) + (my-dbs '()) + ;; server and handler thread + (serv-listener #f) ;; this processes server info + (handler-thread #f) + (mboxes (make-hash-table)) ;; key => mbox + ;; other servers + (peers (make-hash-table)) ;; host-port => peer record + (dbowners (make-hash-table)) ;; dbfile => host-port + (handlers (make-hash-table)) ;; dbfile => proc + ;; (outgoing-conns (make-hash-table)) ;; host:port -> conn + (work-queue (make-queue)) ;; most stuff goes here + ;; (fast-queue (make-queue)) ;; super quick stuff goes here (e.g. ping) + (busy #f) ;; is either of the queues busy, use to switch between queuing tasks or doing immediately + ;; app info + (appname #f) + (dbtypes (make-hash-table)) ;; this should be an alist but hash is easier. dbtype => [ initproc syncproc ] + ;; cookies + (cnum 0) ;; cookie num + ) + +;;====================================================================== +;; NEW APPROACH +;;====================================================================== + +;; start-server-find-port ;; gotta have a server port ready from the very begining + +;; udata - all the connection info, captain, server, ulex db etc. MUST BE PASSED IN +;; dbpath - full path and filename of the db to talk to or a symbol naming the db? +;; callname - the remote call to execute +;; params - parameters to pass to the remote call +;; +(define (remote-call udata dbpath dbtype callname . params) + (start-server-find-port udata) ;; ensure we have a local server + (find-or-setup-captain udata) + ;; look at connect, process-request, send, send-receive + (let-values (((cookie-key host-port)(get-db-owner udata dbpath dbtype))) + (send-receive udata host-port callname cookie-key params))) + +;;====================================================================== +;; KEY FUNCTIONS - THESE ARE TOO BE EXPOSED AND USED +;;====================================================================== + +;; connection setup and management functions + +;; This is the basic setup command. Must always be +;; called before connecting to a db using connect. +;; +;; find or become the captain +;; setup and return a ulex object +;; +(define (find-or-setup-captain udata) + ;; see if we already have a captain and if the lease is ok + (if (and (udat-captain-address udata) + (udat-captain-port udata) + (< (current-seconds) (udat-captain-lease udata))) + udata + (let* ((cpkts (get-all-captain-pkts udata)) ;; read captain pkts + (captn (get-winning-pkt cpkts))) + (if captn + (let* ((port (alist-ref 'port captn)) + (host (alist-ref 'host captn)) + (ipaddr (alist-ref 'ipaddr captn)) + (pid (alist-ref 'pid captn)) + (Z (alist-ref 'Z captn))) + (udat-captain-address-set! udata ipaddr) + (udat-captain-host-set! udata host) + (udat-captain-port-set! udata port) + (udat-captain-pid-set! udata pid) + (udat-captain-lease-set! udata (+ (current-seconds) 10)) + (let-values (((success pingtime)(ping udata (conc ipaddr ":" port)))) + (if success + udata + (begin + (print "Found unreachable captain at " ipaddr ":" port ", removing pkt") + (remove-captain-pkt udata captn) + (find-or-setup-captain udata)))) + (begin + (setup-as-captain udata) ;; this saves the thread to captain-thread and starts the thread + (find-or-setup-captain udata))))))) + +;; connect to a specific dbfile +;; - if already connected - return the dbowner host-port +;; - ask the captain who to talk to for this db +;; - put the entry in the dbowners hash as dbfile => host-port +;; +(define (connect udata dbfname dbtype) + (or (hash-table-ref/default (udat-dbowners udata) dbfname #f) + (let-values (((success dbowner-host-port)(get-db-owner udata dbfname dbtype))) + (if success + (begin + ;; just clobber the record, this is the new data no matter what + (hash-table-set! (udat-dbowners udata) dbfname dbowner-host-port) + dbowner-host-port) + #f)))) + +;; returns: success pingtime +;; +;; NOTE: causes the callee to store the info on this host along with the dbs this host currently owns +;; +(define (ping udata host-port) + (let* ((start (current-milliseconds)) + (cookie (make-cookie udata)) + (dbs (udat-my-dbs udata)) + (msg (string-intersperse dbs " ")) + (res (send udata host-port 'ping cookie msg retval: #t)) + (delta (- (current-milliseconds) start))) + (values (equal? res cookie) delta))) + +;; returns: success pingtime +;; +;; NOTE: causes all references to this worker to be wiped out in the +;; callee (ususally the captain) +;; +(define (goodbye-ping udata host-port) + (let* ((start (current-milliseconds)) + (cookie (make-cookie udata)) + (dbs (udat-my-dbs udata)) + (res (send udata host-port 'goodbye cookie "nomsg" retval: #t)) + (delta (- (current-milliseconds) start))) + (values (equal? res cookie) delta))) + +(define (goodbye-captain udata) + (let* ((host-port (udat-captain-host-port udata))) + (if host-port + (goodbye-ping udata host-port) + (values #f -1)))) + +(define (get-db-owner udata dbname dbtype) + (let* ((host-port (udat-captain-host-port udata))) + (if host-port + (let* ((cookie (make-cookie udata)) + (msg #f) ;; (conc dbname " " dbtype)) + (params `(,dbname ,dbtype)) + (res (send udata host-port 'db-owner cookie msg + params: params retval: #t))) + (match (string-split res) + ((retcookie owner-host-port) + (values (equal? retcookie cookie) owner-host-port)))) + (values #f -1)))) + +;; called in ulex-handler to dispatch work, called on the workers side +;; calls (proc params data) +;; returns result with cookie +;; +;; pdat is the info of the caller, used to send the result data +;; prockey is key into udat-handlers hash dereferencing a proc +;; procparam is a first param handed to proc - often to do further derefrencing +;; NOTE: params is intended to be a list of strings, encoding on data +;; is up to the user but data must be a single line +;; +(define (process-request udata pdat dbname cookie prockey procparam data) + (let* ((dbrec (ulex-open-db udata dbname)) ;; this will be a dbconn record, looks for in udata first + (proc (hash-table-ref udata prockey))) + (let* ((result (proc dbrec procparam data))) + result))) + +;; remote-request - send to remote to process in process-request +;; uconn comes from a call to connect and can be used instead of calling connect again +;; uconn is the host-port to call +;; we send dbname to the worker so they know which file to open +;; data must be a string with no newlines, it will be handed to the proc +;; at the remote site unchanged. It is up to the user to encode/decode it's contents +;; +;; rtype: immediate, read-only, normal, low-priority +;; +(define (remote-request udata uconn rtype dbname prockey procparam data) + (let* ((cookie (make-cookie udata))) + (send-receive udata uconn rtype cookie data `(,prockey procparam)))) + +(define (ulex-open-db udata dbname) + #f) + + +;;====================================================================== +;; Ulex db +;; +;; - track who is captain, lease expire time +;; - track who owns what db, lease +;; +;;====================================================================== + +;; +;; +(define (ulex-dbfname) + (let ((dbdir (conc (get-environment-variable "HOME") "/.ulex"))) + (if (not (file-exists? dbdir)) + (create-directory dbdir #t)) + (conc dbdir "/network.db"))) + +;; always goes in ~/.ulex/network.db +;; role is captain, adjutant, node +;; +(define (ulexdb-setup) + (let* ((dbfname (ulex-dbfname)) + (have-db (file-exists? dbfname)) + (db (sqlite3:open-database dbfname))) + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) + (sqlite3:execute db "PRAGMA synchronous = 0;") + (if (not have-db) + (sqlite3:with-transaction + db + (lambda () + (for-each + (lambda (stmt) + (if stmt (sqlite3:execute db stmt))) + `("CREATE TABLE IF NOT EXISTS nodes + (id INTEGER PRIMARY KEY, + role TEXT NOT NULL, + host TEXT NOT NULL, + port TEXT NOT NULL, + ipadr TEXT NOT NULL, + pid INTEGER NOT NULL, + zcard TEXT NOT NULL, + regtime INTEGER DEFAULT (strftime('%s','now')), + lease_thru INTEGER DEFAULT (strftime('%s','now')), + last_update INTEGER DEFAULT (strftime('%s','now')));" + "CREATE TRIGGER IF NOT EXISTS update_nodes_trigger AFTER UPDATE ON nodes + FOR EACH ROW + BEGIN + UPDATE nodes SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;" + "CREATE TABLE IF NOT EXISTS dbs + (id INTEGER PRIMARY KEY, + dbname TEXT NOT NULL, + dbfile TEXT NOT NULL, + dbtype TEXT NOT NULL, + host_port TEXT NOT NULL, + regtime INTEGER DEFAULT (strftime('%s','now')), + lease_thru INTEGER DEFAULT (strftime('%s','now')), + last_update INTEGER DEFAULT (strftime('%s','now')));" + "CREATE TRIGGER IF NOT EXISTS update_dbs_trigger AFTER UPDATE ON dbs + FOR EACH ROW + BEGIN + UPDATE dbs SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;"))))) + db)) + +(define (get-host-port-lease db dbfname) + (sqlite3:fold-row + (lambda (rem host-port lease-thru) + (list host-port lease-thru)) + #f db "SELECT host_port,lease_thru FROM dbs WHERE dbfile = ?" dbfname)) + +(define (register-captain db host ipadr port pid zcard #!key (lease 20)) + (let* ((dbfname (ulex-dbfname)) + (host-port (conc host ":" port))) + (sqlite3:with-transaction + db + (lambda () + (match (get-host-port-lease db dbfname) + ((host-port lease-thru) + (if (> (current-seconds) lease-thru) + (begin + (sqlite3:execute db "UPDATE dbs SET host_port=?,lease_thru=? WHERE dbname=?" + (conc host ":" port) + (+ (current-seconds) lease) + dbfname) + #t) + #f)) + (#f (sqlite3:execute db "INSERT INTO dbs (dbname,dbfile,dbtype,host_port,lease_thru) VALUES (?,?,?,?,?)" + "captain" dbfname "captain" host-port (+ (current-seconds) lease))) + (else (print "ERROR: Unrecognised result from fold-row") + (exit 1))))))) + +;;====================================================================== +;; network utilities +;;====================================================================== + +(define (rate-ip ipaddr) + (regex-case ipaddr + ( "^127\\..*" _ 0 ) + ( "^(10\\.0|192\\.168)\\..*" _ 1 ) + ( else 2 ) )) + +;; Change this to bias for addresses with a reasonable broadcast value? +;; +(define (ip-pref-less? a b) + (> (rate-ip a) (rate-ip b))) + + +(define (get-my-best-address) + (let ((all-my-addresses (get-all-ips)) + ;;(all-my-addresses-old (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name))))) + ) + (cond + ((null? all-my-addresses) + (get-host-name)) ;; no interfaces? + ((eq? (length all-my-addresses) 1) + (car all-my-addresses)) ;; only one to choose from, just go with it + + (else + (car (sort all-my-addresses ip-pref-less?))) + ;; (else + ;; (ip->string (car (filter (lambda (x) ;; take any but 127. + ;; (not (eq? (u8vector-ref x 0) 127))) + ;; all-my-addresses)))) + + ))) + +(define (get-all-ips-sorted) + (sort (get-all-ips) ip-pref-less?)) + +(define (get-all-ips) + (map ip->string (vector->list + (hostinfo-addresses + (host-information (current-hostname)))))) + +(define (udat-my-host-port udata) + (if (and (udat-my-address udata)(udat-my-port udata)) + (conc (udat-my-address udata) ":" (udat-my-port udata)) + #f)) + +(define (udat-captain-host-port udata) + (if (and (udat-captain-address udata)(udat-captain-port udata)) + (conc (udat-captain-address udata) ":" (udat-captain-port udata)) + #f)) + +(define (udat-get-peer udata host-port) + (hash-table-ref/default (udat-peers udata) host-port #f)) + +;; struct for keeping track of others we are talking to + +(defstruct peer + (addr-port #f) + (hostname #f) + (pid #f) + ;; (inp #f) + ;; (oup #f) + (dbs '()) ;; list of databases this peer is currently handling + ) + +(defstruct work + (peer-dat #f) + (handlerkey #f) + (qrykey #f) + (data #f) + (start (current-milliseconds))) + +#;(defstruct dbowner + (pdat #f) + (last-update (current-seconds))) + +;;====================================================================== +;; Captain functions +;;====================================================================== + +;; NB// This needs to be started in a thread +;; +;; setup to be a captain +;; - local server MUST be started already +;; - create pkt +;; - start server port handler +;; +(define (setup-as-captain udata) + (if (create-captain-pkt udata) + (let* ((my-addr (udat-my-address udata)) + (my-port (udat-my-port udata)) + (th (make-thread (lambda () + (ulex-handler-loop udata)) "Captain handler"))) + (udat-handler-thread-set! udata th) + (udat-captain-address-set! udata my-addr) + (udat-captain-port-set! udata my-port) + (thread-start! th)) + (begin + (print "ERROR: failed to create captain pkt") + #f))) + +;; given a pkts dir read +;; +(define (get-all-captain-pkts udata) + (let* ((pktsdir (let ((d (udat-cpkts-dir udata))) + (if (file-exists? d) + d + (begin + (create-directory d #t) + d)))) + (all-pkt-files (glob (conc pktsdir "/*.pkt"))) + (pkt-spec (udat-cpkt-spec udata))) + (map (lambda (pkt-file) + (read-pkt->alist pkt-file pktspec: pkt-spec)) + all-pkt-files))) + +;; sort by D then Z, return one, choose the oldest then +;; differentiate if needed using the Z key +;;l +(define (get-winning-pkt pkts) + (if (null? pkts) + #f + (car (sort pkts (lambda (a b) + (let ((ad (string->number (alist-ref 'D a))) + (bd (string->number (alist-ref 'D b)))) + (if (eq? a b) + (let ((az (alist-ref 'Z a)) + (bz (alist-ref 'Z b))) + (string>=? az bz)) + (> ad bd)))))))) + +;; put the host, ip, port and pid into a pkt in +;; the captain pkts dir +;; - assumes user has already fired up a server +;; which will be in the udata struct +;; +(define (create-captain-pkt udata) + (if (not (udat-serv-listener udata)) + (begin + (print "ERROR: create-captain-pkt called with out a listener") + #f) + (let* ((pktdat `((port . ,(udat-my-port udata)) + (host . ,(udat-my-hostname udata)) + (ipaddr . ,(udat-my-address udata)) + (pid . ,(udat-my-pid udata)))) + (pktdir (udat-cpkts-dir udata)) + (pktspec (udat-cpkt-spec udata)) + ) + (udat-my-cpkt-key-set! + udata + (write-alist->pkt + pktdir + pktdat + pktspec: pktspec + ptype: 'captain)) + (udat-my-cpkt-key udata)))) + +;; remove pkt associated with captn (the Z key .pkt) +;; +(define (remove-captain-pkt udata captn) + (let ((Z (alist-ref 'Z captn)) + (cpktdir (udat-cpkts-dir udata))) + (delete-file* (conc cpktdir "/" Z ".pkt")))) + +;; call all known peers and tell them to delete their info on the captain +;; thus forcing them to re-read pkts and connect to a new captain +;; call this when the captain needs to exit and if an older captain is +;; detected. Due to delays in sending file meta data in NFS multiple +;; captains can be initiated in a "Storm of Captains", book soon to be +;; on Amazon +;; +(define (drop-captain udata) + (let* ((peers (hash-table-keys (udat-peers udata))) + (cookie (make-cookie udata))) + (for-each + (lambda (host-port) + (send udata host-port 'dropcaptain cookie "nomsg" retval: #t)) + peers))) + +;;====================================================================== +;; server primitives +;;====================================================================== + +(define (make-cookie udata) + (let ((newcnum (+ (udat-cnum udata) 1))) + (udat-cnum-set! udata newcnum) + (conc (udat-my-address udata) ":" + (udat-my-port udata) "-" + (udat-my-pid udata) "-" + newcnum))) + +;; create a tcp listener and return a populated udat struct with +;; my port, address, hostname, pid etc. +;; return #f if fail to find a port to allocate. +;; +;; if udata-in is #f create the record +;; if there is already a serv-listener return the udata +;; +(define (start-server-find-port udata-in #!optional (port 4242)) + (let ((udata (or udata-in (make-udat)))) + (if (udat-serv-listener udata) ;; TODO - add check that the listener is alive and ready? + udata + (handle-exceptions + exn + (if (< port 65535) + (start-server-find-port udata (+ port 1)) + #f) + (connect-server udata port))))) + +(define (connect-server udata port) + ;; (tcp-listener-socket LISTENER)(socket-name so) + ;; sockaddr-address, sockaddr-port, sockaddr->string + (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]]) + (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname))) + (udat-my-address-set! udata addr) + (udat-my-port-set! udata port) + (udat-my-hostname-set! udata (get-host-name)) + (udat-serv-listener-set! udata tlsn) + udata)) + +(define (get-peer-dat udata host-port #!optional (hostname #f)(pid #f)) + (let* ((pdat (or (udat-get-peer udata host-port) + (handle-exceptions ;; ERROR - MAKE THIS EXCEPTION HANDLER MORE SPECIFIC + exn + #f + (let ((npdat (make-peer addr-port: host-port))) + (if hostname (peer-hostname-set! npdat hostname)) + (if pid (peer-pid-set! npdat pid)) + npdat))))) + pdat)) + +;; send structured data to recipient +;; +;; NOTE: qrykey is what was called the "cookie" previously +;; +;; retval tells send to expect and wait for return data (one line) and return it or time out +;; this is for ping where we don't want to necessarily have set up our own server yet. +;; +(define (send udata host-port handler qrykey data + #!key (hostname #f)(pid #f)(params '())(retval #f)) + (let* ((my-host-port (udat-my-host-port udata)) + (isme (equal? host-port my-host-port)) ;; am I calling + ;; myself? + (dat (list + handler ;; " " + my-host-port ;; " " + (udat-my-pid udata) ;; " " + qrykey + params ;;(if (null? params) "" (conc " " + ;;(string-intersperse params " "))) + ))) + ;; (print "send isme is " (if isme "true!" "false!") ", + ;; my-host-port: " my-host-port ", host-port: " host-port) + (if isme + (ulex-handler udata dat data) + (handle-exceptions ;; ERROR - MAKE THIS EXCEPTION HANDLER MORE + ;; SPECIFIC + exn + #f + (let-values (((inp oup)(tcp-connect host-port))) + ;; + ;; CONTROL LINE: + ;; handlerkey host:port pid qrykey params ... + ;; + (let ((res + (if (and inp oup) + (let* () + (if my-host-port + (begin + (write dat oup) + (write data oup) ;; send as sexpr + ;; (print "Sent dat: " dat " data: " data) + (if retval + (read inp) + #t)) + (begin + (print "ERROR: send called but no receiver has been setup. Please call setup first!") + #f)) + ;; NOTE: DO NOT BE TEMPTED TO LOOK AT ANY DATA ON INP HERE! + ;; (there is a listener for handling that) + ) + #f))) ;; #f means failed to connect and send + (close-input-port inp) + (close-output-port oup) + res)))))) + +;; send a request to the given host-port and register a mailbox in udata +;; wait for the mailbox data and return it +;; +(define (send-receive udata host-port handler qrykey data #!key (hostname #f)(pid #f)(params '())(timeout 20)) + (let ((mbox (make-mailbox)) + (mbox-time (current-milliseconds)) + (mboxes (udat-mboxes udata))) + (hash-table-set! mboxes qrykey mbox) + (if (send udata host-port handler qrykey data hostname: hostname pid: pid params: params) + (let* ((mbox-timeout-secs timeout) + (mbox-timeout-result 'MBOX_TIMEOUT) + (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result)) + (mbox-receive-time (current-milliseconds))) + (hash-table-delete! mboxes qrykey) + (if (eq? res 'MBOX_TIMEOUT) + #f + res)) + #f))) ;; #f means failed to communicate + +;; +(define (ulex-handler udata controldat data) + (print "controldat: " controldat " data: " data) + (match controldat ;; (string-split controldat) + ((handlerkey host-port pid qrykey params ...) + ;; (print "handlerkey: " handlerkey " host-port: " host-port " pid: " pid " qrykey: " qrykey " params: " params) + (case handlerkey ;; (string->symbol handlerkey) + ((ack)(print "Got ack!")) + ((ping) ;; special case - return result immediately on the same connection + (let* ((proc (hash-table-ref/default (udat-handlers udata) 'ping #f)) + (val (if proc (proc) "gotping")) + (peer (make-peer addr-port: host-port pid: pid)) + (dbshash (udat-dbowners udata))) + (peer-dbs-set! peer params) ;; params for ping is list of dbs owned by pinger + (for-each (lambda (dbfile) + (hash-table-set! dbshash dbfile host-port)) ;; WRONG? + params) ;; register each db in the dbshash + (if (not (hash-table-exists? (udat-peers udata) host-port)) + (hash-table-set! (udat-peers udata) host-port peer)) ;; save the details of this caller in peers + qrykey)) ;; End of ping + ((goodbye) + ;; remove all traces of the caller in db ownership etc. + (let* ((peer (hash-table-ref/default (udat-peers udata) host-port #f)) + (dbs (if peer (peer-dbs peer) '())) + (dbshash (udat-dbowners udata))) + (for-each (lambda (dbfile)(hash-table-delete! dbshash dbfile)) dbs) + (hash-table-delete! (udat-peers udata) host-port) + qrykey)) + ((dropcaptain) + ;; remove all traces of the captain + (udat-captain-address-set! udata #f) + (udat-captain-host-set! udata #f) + (udat-captain-port-set! udata #f) + (udat-captain-pid-set! udata #f) + qrykey) + ((rucaptain) ;; remote is asking if I'm the captain + (if (udat-my-cpkt-key udata) "yes" "no")) + ((db-owner) ;; given a db name who do I send my queries to + ;; look up the file in handlers, if have an entry ping them to be sure + ;; they are still alive and then return that host:port. + ;; if no handler found or if the ping fails pick from peers the oldest that + ;; is managing the fewest dbs + (match params + ((dbfile dbtype) + (let* ((owner-host-port (hash-table-ref/default (udat-dbowners udata) dbfile #f))) + (if owner-host-port + (conc qrykey " " owner-host-port) + (let* ((pdat (or (hash-table-ref/default (udat-peers udata) host-port #f) ;; no owner - caller gets to own it! + (make-peer addr-port: host-port pid: pid dbs: `(,dbfile))))) + (hash-table-set! (udat-peers udata) host-port pdat) + (hash-table-set! (udat-dbowners udata) dbfile host-port) + (conc qrykey " " host-port))))) + (else (conc qrykey " BADDATA")))) + ;; for work items: + ;; handler is one of; immediate, read-only, read-write, high-priority + ((immediate read-only normal low-priority) ;; do this work immediately + ;; host-port (caller), pid (caller), qrykey (cookie), params <= all from first line + ;; data => a single line encoded however you want, or should I build json into it? + (print "handlerkey=" handlerkey) + (let* ((pdat (get-peer-dat udata host-port))) + (match params ;; dbfile prockey procparam + ((dbfile prockey procparam) + (case handlerkey + ((immediate read-only) + (process-request udata pdat dbfile qrykey prockey procparam data)) + ((normal low-priority) ;; split off later and add logic to support low priority + (add-to-work-queue udata pdat dbfile qrykey prockey procparam data)) + (else + #f))) + (else + (print "INFO: params=" params " handlerkey=" handlerkey " controldat=" controldat) + #f)))) + (else + ;; (add-to-work-queue udata (get-peer-dat udata host-port) handlerkey qrykey data) + #f))) + (else + (print "BAD DATA? controldat=" controldat " data=" data) + #f)));; handles the incoming messages and dispatches to queues + +;; +(define (ulex-handler-loop udata) + (let* ((serv-listener (udat-serv-listener udata))) + ;; data comes as two lines + ;; handlerkey resp-addr:resp-port hostname pid qrykey [dbpath/dbfile.db] + ;; data + (let loop ((state 'start)) + (let-values (((inp oup)(tcp-accept serv-listener))) + (let* ((controldat (read inp)) + (data (read inp)) + (resp (ulex-handler udata controldat data))) + (if resp (write resp oup)) + (close-input-port inp) + (close-output-port oup)) + (loop state))))) + +;; add a proc to the handler list, these are done symetrically (i.e. in all instances) +;; so that the proc can be dereferenced remotely +;; +(define (register-handler udata key proc) + (hash-table-set! (udat-handlers udata) key proc)) + + +;;====================================================================== +;; work queues +;;====================================================================== + +(define (add-to-work-queue udata peer-dat handlerkey qrykey data) + (let ((wdat (make-work peer-dat: peer-dat handlerkey: handlerkey qrykey: qrykey data: data))) + (if (udat-busy udata) + (queue-add! (udat-work-queue udata) wdat) + (process-work udata wdat)) ;; passing in wdat tells process-work to first process the passed in wdat + )) + +(define (do-work udata wdat) + #f) + +(define (process-work udata #!optional wdat) + (if wdat (do-work udata wdat)) ;; process wdat + (let ((wqueue (udat-work-queue udata))) + (if (not (queue-empty? wqueue)) + (let loop ((wd (queue-remove! wqueue))) + (do-work udata wd) + (if (not (queue-empty? wqueue)) + (loop (queue-remove! wqueue))))))) + +;;====================================================================== +;; Generic db handling +;; setup a inmem db instance +;; open connection to on-disk db +;; sync on-disk db to inmem +;; get lock in on-disk db for dbowner of this db +;; put sync-proc, init-proc, on-disk handle, inmem handle in dbconn stuct +;; return the stuct +;;====================================================================== + +(defstruct dbconn + (fname #f) + (inmem #f) + (conn #f) + (sync #f) ;; sync proc + (init #f) ;; init proc + (lastsync (current-seconds)) + ) + +(defstruct dbinfo + (initproc #f) + (syncproc #f)) + +;; open inmem and disk database +;; init with initproc +;; return db struct +;; +;; appname; megatest, ulex or something else. +;; +(define (setup-db-connection udata fname-in appname dbtype) + (let* ((is-ulex (eq? appname 'ulex)) + (dbinf (if is-ulex ;; ulex is a built-in special case + (make-dbinfo initproc: ulexdb-init syncproc: ulexdb-sync) + (hash-table-ref/default (udat-dbtypes udata) dbtype #f))) + (initproc (dbinfo-initproc dbinf)) + (syncproc (dbinfo-syncproc dbinf)) + (fname (if is-ulex + (conc (udat-ulex-dir udata) "/ulex.db") + fname-in)) + (inmem-db (open-and-initdb udata #f 'inmem (dbinfo-initproc dbinf))) + (disk-db (open-and-initdb udata fname 'disk (dbinfo-initproc dbinf)))) + (make-dbconn inmem: inmem-db conn: disk-db sync: syncproc init: initproc))) + +;; dest='inmem or 'disk +;; +(define (open-and-initdb udata filename dest init-proc) + (let* ((inmem (eq? dest 'inmem)) + (dbfile (if inmem + ":INMEM:" + filename)) + (dbexists (if inmem #t (file-exists? dbfile))) + (db (sqlite3:open-database dbfile))) + (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) + (if (not dbexists) + (init-proc db)) + db)) + + +;;====================================================================== +;; Previous Ulex db stuff +;;====================================================================== + +(define (ulexdb-init db inmem) + (sqlite3:with-transaction + db + (lambda () + (for-each + (lambda (stmt) + (if stmt (sqlite3:execute db stmt))) + `("CREATE TABLE IF NOT EXISTS processes + (id INTEGER PRIMARY KEY, + host TEXT NOT NULL, + ipadr TEXT NOT NULL, + port INTEGER NOT NULL, + pid INTEGER NOT NULL, + regtime INTEGER DEFAULT (strftime('%s','now')), + last_update INTEGER DEFAULT (strftime('%s','now')));" + (if inmem + "CREATE TRIGGER IF NOT EXISTS update_proces_trigger AFTER UPDATE ON processes + FOR EACH ROW + BEGIN + UPDATE processes SET last_update=(strftime('%s','now')) + WHERE id=old.id; + END;" + #f)))))) + +;; open databases, do initial sync +(define (ulexdb-sync dbconndat udata) + #f) + + +) ;; END OF ULEX + + +;;; ;;====================================================================== +;;; ;; D E B U G H E L P E R S +;;; ;;====================================================================== +;;; +;;; (define (dbg> . args) +;;; (with-output-to-port (current-error-port) +;;; (lambda () +;;; (apply print "dbg> " args)))) +;;; +;;; (define (debug-pp . args) +;;; (if (get-environment-variable "ULEX_DEBUG") +;;; (with-output-to-port (current-error-port) +;;; (lambda () +;;; (apply pp args))))) +;;; +;;; (define *default-debug-port* (current-error-port)) +;;; +;;; (define (sdbg> fn stage-name stage-start stage-end start-time . message) +;;; (if (get-environment-variable "ULEX_DEBUG") +;;; (with-output-to-port *default-debug-port* +;;; (lambda () +;;; (apply print "ulex:" fn " " stage-name " took " (- (if stage-end stage-end (current-milliseconds)) stage-start) " ms. " +;;; (if start-time +;;; (conc "total time " (- (current-milliseconds) start-time) +;;; " ms.") +;;; "") +;;; message +;;; ))))) + +;;====================================================================== +;; M A C R O S +;;====================================================================== +;; iup callbacks are not dumping the stack, this is a work-around +;; + +;; Some of these routines use: +;; +;; http://www.cs.toronto.edu/~gfb/scheme/simple-macros.html +;; +;; Syntax for defining macros in a simple style similar to function definiton, +;; when there is a single pattern for the argument list and there are no keywords. +;; +;; (define-simple-syntax (name arg ...) body ...) +;; +;; +;; (define-syntax define-simple-syntax +;; (syntax-rules () +;; ((_ (name arg ...) body ...) +;; (define-syntax name (syntax-rules () ((name arg ...) (begin body ...))))))) +;; +;; (define-simple-syntax (catch-and-dump proc procname) +;; (handle-exceptions +;; exn +;; (begin +;; (print-call-chain (current-error-port)) +;; (with-output-to-port (current-error-port) +;; (lambda () +;; (print ((condition-property-accessor 'exn 'message) exn)) +;; (print "Callback error in " procname) +;; (print "Full condition info:\n" (condition->list exn))))) +;; (proc))) +;; +;; +;;====================================================================== +;; R E C O R D S +;;====================================================================== + +;;; ;; information about me as a server +;;; ;; +;;; (defstruct area +;;; ;; about this area +;;; (useportlogger #f) +;;; (lowport 32768) +;;; (server-type 'auto) ;; auto=create up to five servers/pkts, main=create pkts, passive=no pkt (unless there are no pkts at all) +;;; (conn #f) +;;; (port #f) +;;; (myaddr (get-my-best-address)) +;;; pktid ;; get pkt from hosts table if needed +;;; pktfile +;;; pktsdir +;;; dbdir +;;; (dbhandles (make-hash-table)) ;; fname => list-of-dbh, NOTE: Should really never need more than one? +;;; (mutex (make-mutex)) +;;; (rtable (make-hash-table)) ;; registration table of available actions +;;; (dbs (make-hash-table)) ;; filename => random number, used for choosing what dbs I serve +;;; ;; about other servers +;;; (hosts (make-hash-table)) ;; key => hostdat +;;; (hoststats (make-hash-table)) ;; key => alist of fname => ( qcount . qtime ) +;;; (reqs (make-hash-table)) ;; uri => queue +;;; ;; work queues +;;; (wqueues (make-hash-table)) ;; fname => qdat +;;; (stats (make-hash-table)) ;; fname => totalqueries +;;; (last-srvup (current-seconds)) ;; last time we updated the known servers +;;; (cookie2mbox (make-hash-table)) ;; map cookie for outstanding request to mailbox of awaiting call +;;; (ready #f) +;;; (health (make-hash-table)) ;; ipaddr:port => num failed pings since last good ping +;;; ) +;;; +;;; ;; host stats +;;; ;; +;;; (defstruct hostdat +;;; (pkt #f) +;;; (dbload (make-hash-table)) ;; "dbfile.db" => queries/min +;;; (hostload #f) ;; normalized load ( 5min load / numcpus ) +;;; ) +;;; +;;; ;; dbdat +;;; ;; +;;; (defstruct dbdat +;;; (dbh #f) +;;; (fname #f) +;;; (write-access #f) +;;; (sths (make-hash-table)) ;; hash mapping query strings to handles +;;; ) +;;; +;;; ;; qdat +;;; ;; +;;; (defstruct qdat +;;; (writeq (make-queue)) +;;; (readq (make-queue)) +;;; (rwq (make-queue)) +;;; (logq (make-queue)) ;; do we need a queue for logging? yes, if we use sqlite3 db for logging +;;; (osshort (make-queue)) +;;; (oslong (make-queue)) +;;; (misc (make-queue)) ;; used for things like ping-full +;;; ) +;;; +;;; ;; calldat +;;; ;; +;;; (defstruct calldat +;;; (ctype 'dbwrite) +;;; (obj #f) ;; this would normally be an SQL statement e.g. SELECT, INSERT etc. +;;; (rtime (current-milliseconds))) +;;; +;;; ;; make it a global? Well, it is local to area module +;;; +;;; (define *pktspec* +;;; `((server (hostname . h) +;;; (port . p) +;;; (pid . i) +;;; (ipaddr . a) +;;; ) +;;; (data (hostname . h) ;; sender hostname +;;; (port . p) ;; sender port +;;; (ipaddr . a) ;; sender ip +;;; (hostkey . k) ;; sending host key - store info at server under this key +;;; (servkey . s) ;; server key - this needs to match at server end or reject the msg +;;; (format . f) ;; sb=serialized-base64, t=text, sx=sexpr, j=json +;;; (data . d) ;; base64 encoded slln data +;;; ))) +;;; +;;; ;; work item +;;; ;; +;;; (defstruct witem +;;; (rhost #f) ;; return host +;;; (ripaddr #f) ;; return ipaddr +;;; (rport #f) ;; return port +;;; (servkey #f) ;; the packet representing the client of this workitem, used by final send-message +;;; (rdat #f) ;; the request - usually an sql query, type is rdat +;;; (action #f) ;; the action: immediate, dbwrite, dbread,oslong, osshort +;;; (cookie #f) ;; cookie id for response +;;; (data #f) ;; the data payload, i.e. parameters +;;; (result #f) ;; the result from processing the data +;;; (caller #f)) ;; the calling peer according to rpc itself +;;; +;;; (define (trim-pktid pktid) +;;; (if (string? pktid) +;;; (substring pktid 0 4) +;;; "nopkt")) +;;; +;;; (define (any->number num) +;;; (cond +;;; ((number? num) num) +;;; ((string? num) (string->number num)) +;;; (else num))) +;;; +;;; (use trace) +;;; (trace-call-sites #t) +;;; +;;; ;;====================================================================== +;;; ;; D A T A B A S E H A N D L I N G +;;; ;;====================================================================== +;;; +;;; ;; look in dbhandles for a db, return it, else return #f +;;; ;; +;;; (define (get-dbh acfg fname) +;;; (let ((dbh-lst (hash-table-ref/default (area-dbhandles acfg) fname '()))) +;;; (if (null? dbh-lst) +;;; (begin +;;; ;; (print "opening db for " fname) +;;; (open-db acfg fname)) ;; Note that the handles get put back in the queue in the save-dbh calls +;;; (let ((rem-lst (cdr dbh-lst))) +;;; ;; (print "re-using saved connection for " fname) +;;; (hash-table-set! (area-dbhandles acfg) fname rem-lst) +;;; (car dbh-lst))))) +;;; +;;; (define (save-dbh acfg fname dbdat) +;;; ;; (print "saving dbh for " fname) +;;; (hash-table-set! (area-dbhandles acfg) fname (cons dbdat (hash-table-ref/default (area-dbhandles acfg) fname '())))) +;;; +;;; ;; open the database, if never before opened init it. put the handle in the +;;; ;; open db's hash table +;;; ;; returns: the dbdat +;;; ;; +;;; (define (open-db acfg fname) +;;; (let* ((fullname (conc (area-dbdir acfg) "/" fname)) +;;; (exists (file-exists? fullname)) +;;; (write-access (if exists +;;; (file-write-access? fullname) +;;; (file-write-access? (area-dbdir acfg)))) +;;; (db (sqlite3:open-database fullname)) +;;; (handler (sqlite3:make-busy-timeout 136000)) +;;; ) +;;; (sqlite3:set-busy-handler! db handler) +;;; (sqlite3:execute db "PRAGMA synchronous = 0;") +;;; (if (not exists) ;; need to init the db +;;; (if write-access +;;; (let ((isql (get-rsql acfg 'dbinitsql))) ;; get the init sql statements +;;; ;; (sqlite3:with-transaction +;;; ;; db +;;; ;; (lambda () +;;; (if isql +;;; (for-each +;;; (lambda (sql) +;;; (sqlite3:execute db sql)) +;;; isql))) +;;; (print "ERROR: no write access to " (area-dbdir acfg)))) +;;; (make-dbdat dbh: db fname: fname write-access: write-access))) +;;; +;;; ;; This is a low-level command to retrieve or to prepare, save and return a prepared statment +;;; ;; you must extract the db handle +;;; ;; +;;; (define (get-sth db cache stmt) +;;; (if (hash-table-exists? cache stmt) +;;; (begin +;;; ;; (print "Reusing cached stmt for " stmt) +;;; (hash-table-ref/default cache stmt #f)) +;;; (let ((sth (sqlite3:prepare db stmt))) +;;; (hash-table-set! cache stmt sth) +;;; ;; (print "prepared stmt for " stmt) +;;; sth))) +;;; +;;; ;; a little more expensive but does all the tedious deferencing - only use if you don't already +;;; ;; have dbdat and db sitting around +;;; ;; +;;; (define (full-get-sth acfg fname stmt) +;;; (let* ((dbdat (get-dbh acfg fname)) +;;; (db (dbdat-dbh dbdat)) +;;; (sths (dbdat-sths dbdat))) +;;; (get-sth db sths stmt))) +;;; +;;; ;; write to a db +;;; ;; acfg: area data +;;; ;; rdat: request data +;;; ;; hdat: (host . port) +;;; ;; +;;; ;; (define (dbwrite acfg rdat hdat data-in) +;;; ;; (let* ((dbname (car data-in)) +;;; ;; (dbdat (get-dbh acfg dbname)) +;;; ;; (db (dbdat-dbh dbdat)) +;;; ;; (sths (dbdat-sths dbdat)) +;;; ;; (stmt (calldat-obj rdat)) +;;; ;; (sth (get-sth db sths stmt)) +;;; ;; (data (cdr data-in))) +;;; ;; (print "dbname: " dbname " acfg: " acfg " rdat: " (calldat->alist rdat) " hdat: " hdat " data: " data) +;;; ;; (print "dbdat: " (dbdat->alist dbdat)) +;;; ;; (apply sqlite3:execute sth data) +;;; ;; (save-dbh acfg dbname dbdat) +;;; ;; #t +;;; ;; )) +;;; +;;; (define (finalize-all-db-handles acfg) +;;; (let* ((dbhandles (area-dbhandles acfg)) ;; dbhandles is hash of fname ==> dbdat +;;; (num 0)) +;;; (for-each +;;; (lambda (area-name) +;;; (print "Closing handles for " area-name) +;;; (let ((dbdats (hash-table-ref/default dbhandles area-name '()))) +;;; (for-each +;;; (lambda (dbdat) +;;; ;; first close all statement handles +;;; (for-each +;;; (lambda (sth) +;;; (sqlite3:finalize! sth) +;;; (set! num (+ num 1))) +;;; (hash-table-values (dbdat-sths dbdat))) +;;; ;; now close the dbh +;;; (set! num (+ num 1)) +;;; (sqlite3:finalize! (dbdat-dbh dbdat))) +;;; dbdats))) +;;; (hash-table-keys dbhandles)) +;;; (print "FINALIZED " num " dbhandles"))) +;;; +;;; ;;====================================================================== +;;; ;; W O R K Q U E U E H A N D L I N G +;;; ;;====================================================================== +;;; +;;; (define (register-db-as-mine acfg dbname) +;;; (let ((ht (area-dbs acfg))) +;;; (if (not (hash-table-ref/default ht dbname #f)) +;;; (hash-table-set! ht dbname (random 10000))))) +;;; +;;; (define (work-queue-add acfg fname witem) +;;; (let* ((work-queue-start (current-milliseconds)) +;;; (action (witem-action witem)) ;; NB the action is the index into the rdat actions +;;; (qdat (or (hash-table-ref/default (area-wqueues acfg) fname #f) +;;; (let ((newqdat (make-qdat))) +;;; (hash-table-set! (area-wqueues acfg) fname newqdat) +;;; newqdat))) +;;; (rdat (hash-table-ref/default (area-rtable acfg) action #f))) +;;; (if rdat +;;; (queue-add! +;;; (case (calldat-ctype rdat) +;;; ((dbwrite) (register-db-as-mine acfg fname)(qdat-writeq qdat)) +;;; ((dbread) (register-db-as-mine acfg fname)(qdat-readq qdat)) +;;; ((dbrw) (register-db-as-mine acfg fname)(qdat-rwq qdat)) +;;; ((oslong) (qdat-oslong qdat)) +;;; ((osshort) (qdat-osshort qdat)) +;;; ((full-ping) (qdat-misc qdat)) +;;; (else +;;; (print "ERROR: no queue for " action ". Adding to dbwrite queue.") +;;; (qdat-writeq qdat))) +;;; witem) +;;; (case action +;;; ((full-ping)(qdat-misc qdat)) +;;; (else +;;; (print "ERROR: No action " action " was registered")))) +;;; (sdbg> "work-queue-add" "queue-add" work-queue-start #f #f) +;;; #t)) ;; for now, simply return #t to indicate request got to the queue +;;; +;;; (define (doqueue acfg q fname dbdat dbh) +;;; ;; (print "doqueue: " fname) +;;; (let* ((start-time (current-milliseconds)) +;;; (qlen (queue-length q))) +;;; (if (> qlen 1) +;;; (print "Processing queue of length " qlen)) +;;; (let loop ((count 0) +;;; (responses '())) +;;; (let ((delta (- (current-milliseconds) start-time))) +;;; (if (or (queue-empty? q) +;;; (> delta 400)) ;; stop working on this queue after 400ms have passed +;;; (list count delta responses) ;; return count, delta and responses list +;;; (let* ((witem (queue-remove! q)) +;;; (action (witem-action witem)) +;;; (rdat (witem-rdat witem)) +;;; (stmt (calldat-obj rdat)) +;;; (sth (full-get-sth acfg fname stmt)) +;;; (ctype (calldat-ctype rdat)) +;;; (data (witem-data witem)) +;;; (cookie (witem-cookie witem))) +;;; ;; do the processing and save the result in witem-result +;;; (witem-result-set! +;;; witem +;;; (case ctype ;; action +;;; ((noblockwrite) ;; blind write, no ack of success returned +;;; (apply sqlite3:execute sth data) +;;; (sqlite3:last-insert-rowid dbh)) +;;; ((dbwrite) ;; blocking write +;;; (apply sqlite3:execute sth data) +;;; #t) +;;; ((dbread) ;; TODO: consider breaking this up and shipping in pieces for large query +;;; (apply sqlite3:map-row (lambda x x) sth data)) +;;; ((full-ping) 'full-ping) +;;; (else (print "Not ready for action " action) #f))) +;;; (loop (add1 count) +;;; (if cookie +;;; (cons witem responses) +;;; responses)))))))) +;;; +;;; ;; do up to 400ms of processing on each queue +;;; ;; - the work-queue-processor will allow the max 1200ms of work to complete but it will flag as overloaded +;;; ;; +;;; (define (process-db-queries acfg fname) +;;; (if (hash-table-exists? (area-wqueues acfg) fname) +;;; (let* ((process-db-queries-start-time (current-milliseconds)) +;;; (qdat (hash-table-ref/default (area-wqueues acfg) fname #f)) +;;; (queue-sym->queue (lambda (queue-sym) +;;; (case queue-sym ;; lookup the queue from qdat given a name (symbol) +;;; ((wqueue) (qdat-writeq qdat)) +;;; ((rqueue) (qdat-readq qdat)) +;;; ((rwqueue) (qdat-rwq qdat)) +;;; ((misc) (qdat-misc qdat)) +;;; (else #f)))) +;;; (dbdat (get-dbh acfg fname)) +;;; (dbh (if (dbdat? dbdat)(dbdat-dbh dbdat) #f)) +;;; (nowtime (current-seconds))) +;;; ;; handle the queues that require a transaction +;;; ;; +;;; (map ;; +;;; (lambda (queue-sym) +;;; ;; (print "processing queue " queue-sym) +;;; (let* ((queue (queue-sym->queue queue-sym))) +;;; (if (not (queue-empty? queue)) +;;; (let ((responses +;;; (sqlite3:with-transaction ;; todo - catch exceptions... +;;; dbh +;;; (lambda () +;;; (let* ((res (doqueue acfg queue fname dbdat dbh))) ;; this does the work! +;;; ;; (print "res=" res) +;;; (match res +;;; ((count delta responses) +;;; (update-stats acfg fname queue-sym delta count) +;;; (sdbg> "process-db-queries" "sqlite3-transaction" process-db-queries-start-time #f #f) +;;; responses) ;; return responses +;;; (else +;;; (print "ERROR: bad return data from doqueue " res))) +;;; ))))) +;;; ;; having completed the transaction, send the responses. +;;; ;; (print "INFO: sending " (length responses) " responses.") +;;; (let loop ((responses-left responses)) +;;; (cond +;;; ((null? responses-left) #t) +;;; (else +;;; (let* ((witem (car responses-left)) +;;; (response (cdr responses-left))) +;;; (call-deliver-response acfg (witem-ripaddr witem)(witem-rport witem) +;;; (witem-cookie witem)(witem-result witem))) +;;; (loop (cdr responses-left)))))) +;;; ))) +;;; '(wqueue rwqueue rqueue)) +;;; +;;; ;; handle misc queue +;;; ;; +;;; ;; (print "processing misc queue") +;;; (let ((queue (queue-sym->queue 'misc))) +;;; (doqueue acfg queue fname dbdat dbh)) +;;; ;; .... +;;; (save-dbh acfg fname dbdat) +;;; #t ;; just to let the tests know we got here +;;; ) +;;; #f ;; nothing processed +;;; )) +;;; +;;; ;; run all queues in parallel per db but sequentially per queue for that db. +;;; ;; - process the queues every 500 or so ms +;;; ;; - allow for long running queries to continue but all other activities for that +;;; ;; db will be blocked. +;;; ;; +;;; (define (work-queue-processor acfg) +;;; (let* ((threads (make-hash-table))) ;; fname => thread +;;; (let loop ((fnames (hash-table-keys (area-wqueues acfg))) +;;; (target-time (+ (current-milliseconds) 50))) +;;; ;;(if (not (null? fnames))(print "Processing for these databases: " fnames)) +;;; (for-each +;;; (lambda (fname) +;;; ;; (print "processing for " fname) +;;; ;;(process-db-queries acfg fname)) +;;; (let ((th (hash-table-ref/default threads fname #f))) +;;; (if (and th (not (member (thread-state th) '(dead terminated)))) +;;; (begin +;;; (print "WARNING: worker thread for " fname " is taking a long time.") +;;; (print "Thread is in state " (thread-state th))) +;;; (let ((th1 (make-thread (lambda () +;;; ;; (catch-and-dump +;;; ;; (lambda () +;;; ;; (print "Process queries for " fname) +;;; (let ((start-time (current-milliseconds))) +;;; (process-db-queries acfg fname) +;;; ;; (thread-sleep! 0.01) ;; need the thread to take at least some time +;;; (hash-table-delete! threads fname)) ;; no mutexes? +;;; fname) +;;; "th1"))) ;; )) +;;; (hash-table-set! threads fname th1) +;;; (thread-start! th1))))) +;;; fnames) +;;; ;; (thread-sleep! 0.1) ;; give the threads some time to process requests +;;; ;; burn time until 400ms is up +;;; (let ((now-time (current-milliseconds))) +;;; (if (< now-time target-time) +;;; (let ((delta (- target-time now-time))) +;;; (thread-sleep! (/ delta 1000))))) +;;; (loop (hash-table-keys (area-wqueues acfg)) +;;; (+ (current-milliseconds) 50))))) +;;; +;;; ;;====================================================================== +;;; ;; S T A T S G A T H E R I N G +;;; ;;====================================================================== +;;; +;;; (defstruct stat +;;; (qcount-avg 0) ;; coarse running average +;;; (qtime-avg 0) ;; coarse running average +;;; (qcount 0) ;; total +;;; (qtime 0) ;; total +;;; (last-qcount 0) ;; last +;;; (last-qtime 0) ;; last +;;; (dbs '()) ;; list of db files handled by this node +;;; (when 0)) ;; when the last query happened - seconds +;;; +;;; +;;; (define (update-stats acfg fname bucket duration numqueries) +;;; (let* ((key fname) ;; for now do not use bucket. Was: (conc fname "-" bucket)) ;; lazy but good enough +;;; (stats (or (hash-table-ref/default (area-stats acfg) key #f) +;;; (let ((newstats (make-stat))) +;;; (hash-table-set! (area-stats acfg) key newstats) +;;; newstats)))) +;;; ;; when the last query happended (used to remove the fname from the active list) +;;; (stat-when-set! stats (current-seconds)) +;;; ;; last values +;;; (stat-last-qcount-set! stats numqueries) +;;; (stat-last-qtime-set! stats duration) +;;; ;; total over process lifetime +;;; (stat-qcount-set! stats (+ (stat-qcount stats) numqueries)) +;;; (stat-qtime-set! stats (+ (stat-qtime stats) duration)) +;;; ;; coarse average +;;; (stat-qcount-avg-set! stats (/ (+ (stat-qcount-avg stats) numqueries) 2)) +;;; (stat-qtime-avg-set! stats (/ (+ (stat-qtime-avg stats) duration) 2)) +;;; +;;; ;; here is where we add the stats for a given dbfile +;;; (if (not (member fname (stat-dbs stats))) +;;; (stat-dbs-set! stats (cons fname (stat-dbs stats)))) +;;; +;;; )) +;;; +;;; ;;====================================================================== +;;; ;; S E R V E R S T U F F +;;; ;;====================================================================== +;;; +;;; ;; this does NOT return! +;;; ;; +;;; (define (find-free-port-and-open acfg) +;;; (let ((port (or (area-port acfg) 3200))) +;;; (handle-exceptions +;;; exn +;;; (begin +;;; (print "INFO: cannot bind to port " (rpc:default-server-port) ", trying next port") +;;; (area-port-set! acfg (+ port 1)) +;;; (find-free-port-and-open acfg)) +;;; (rpc:default-server-port port) +;;; (area-port-set! acfg port) +;;; (tcp-read-timeout 120000) +;;; ;; ((rpc:make-server (tcp-listen port)) #t) +;;; (tcp-listen (rpc:default-server-port) +;;; )))) +;;; +;;; ;; register this node by putting a packet into the pkts dir. +;;; ;; look for other servers +;;; ;; contact other servers and compile list of servers +;;; ;; there are two types of server +;;; ;; main servers - dashboards, runners and dedicated servers - need pkt +;;; ;; passive servers - test executers, step calls, list-runs - no pkt +;;; ;; +;;; (define (register-node acfg hostip port-num) +;;; ;;(mutex-lock! (area-mutex acfg)) +;;; (let* ((server-type (area-server-type acfg)) ;; auto, main, passive (no pkt created) +;;; (best-ip (or hostip (get-my-best-address))) +;;; (mtdir (area-dbdir acfg)) +;;; (pktdir (area-pktsdir acfg))) ;; conc mtdir "/.server-pkts"))) +;;; (print "Registering node " best-ip ":" port-num) +;;; (if (not mtdir) ;; require a home for this node to put or find databases +;;; #f +;;; (begin +;;; (if (not (directory? pktdir))(create-directory pktdir)) +;;; ;; server is started, now create pkt if needed +;;; (print "Starting server in " server-type " mode with port " port-num) +;;; (if (member server-type '(auto main)) ;; TODO: if auto, count number of servers registers, if > 3 then don't put out a pkt +;;; (begin +;;; (area-pktid-set! acfg +;;; (write-alist->pkt +;;; pktdir +;;; `((hostname . ,(get-host-name)) +;;; (ipaddr . ,best-ip) +;;; (port . ,port-num) +;;; (pid . ,(current-process-id))) +;;; pktspec: *pktspec* +;;; ptype: 'server)) +;;; (area-pktfile-set! acfg (conc pktdir "/" (area-pktid acfg) ".pkt")))) +;;; (area-port-set! acfg port-num) +;;; #;(mutex-unlock! (area-mutex acfg)))))) +;;; +;;; (define *cookie-seqnum* 0) +;;; (define (make-cookie key) +;;; (set! *cookie-seqnum* (add1 *cookie-seqnum*)) +;;; ;;(print "MAKE COOKIE CALLED -- on "servkey"-"*cookie-seqnum*) +;;; (conc key "-" *cookie-seqnum*) +;;; ) +;;; +;;; ;; dispatch locally if possible +;;; ;; +;;; (define (call-deliver-response acfg ipaddr port cookie data) +;;; (if (and (equal? (area-myaddr acfg) ipaddr) +;;; (equal? (area-port acfg) port)) +;;; (deliver-response acfg cookie data) +;;; ((rpc:procedure 'response ipaddr port) cookie data))) +;;; +;;; (define (deliver-response acfg cookie data) +;;; (let ((deliver-response-start (current-milliseconds))) +;;; (thread-start! (make-thread +;;; (lambda () +;;; (let loop ((tries-left 5)) +;;; ;;(print "TOP OF DELIVER_RESPONSE LOOP; triesleft="tries-left) +;;; ;;(pp (hash-table->alist (area-cookie2mbox acfg))) +;;; (let* ((mbox (hash-table-ref/default (area-cookie2mbox acfg) cookie #f))) +;;; (cond +;;; ((eq? 0 tries-left) +;;; (print "ulex:deliver-response: I give up. Mailbox never appeared. cookie="cookie) +;;; ) +;;; (mbox +;;; ;;(print "got mbox="mbox" got data="data" send.") +;;; (mailbox-send! mbox data)) +;;; (else +;;; ;;(print "no mbox yet. look for "cookie) +;;; (thread-sleep! (/ (- 6 tries-left) 10)) +;;; (loop (sub1 tries-left)))))) +;;; ;; (debug-pp (list (conc "ulex:deliver-response took " (- (current-milliseconds) deliver-response-start) " ms, cookie=" cookie " data=") data)) +;;; (sdbg> "deliver-response" "mailbox-send" deliver-response-start #f #f cookie) +;;; ) +;;; (conc "deliver-response thread for cookie="cookie)))) +;;; #t) +;;; +;;; ;; action: +;;; ;; immediate - quick actions, no need to put in queues +;;; ;; dbwrite - put in dbwrite queue +;;; ;; dbread - put in dbread queue +;;; ;; oslong - os actions, e.g. du, that could take a long time +;;; ;; osshort - os actions that should be quick, e.g. df +;;; ;; +;;; (define (request acfg from-ipaddr from-port servkey action cookie fname params) ;; std-peer-handler +;;; ;; NOTE: Use rpc:current-peer for getting return address +;;; (let* ((std-peer-handler-start (current-milliseconds)) +;;; ;; (raw-data (alist-ref 'data dat)) +;;; (rdat (hash-table-ref/default +;;; (area-rtable acfg) action #f)) ;; this looks up the sql query or other details indexed by the action +;;; (witem (make-witem ripaddr: from-ipaddr ;; rhost: from-host +;;; rport: from-port action: action +;;; rdat: rdat cookie: cookie +;;; servkey: servkey data: params ;; TODO - rename data to params +;;; caller: (rpc:current-peer)))) +;;; (if (not (equal? servkey (area-pktid acfg))) +;;; `(#f . ,(conc "I don't know you servkey=" servkey ", pktid=" (area-pktid acfg))) ;; immediately return this +;;; (let* ((ctype (if rdat +;;; (calldat-ctype rdat) ;; is this necessary? these should be identical +;;; action))) +;;; (sdbg> "std-peer-handler" "immediate" std-peer-handler-start #f #f) +;;; (case ctype +;;; ;; (dbwrite acfg rdat (cons from-ipaddr from-port) data))) +;;; ((full-ping) `(#t "ack to full ping" ,(work-queue-add acfg fname witem) ,cookie)) +;;; ((response) `(#t "ack from requestor" ,(deliver-response acfg fname params))) +;;; ((dbwrite) `(#t "db write submitted" ,(work-queue-add acfg fname witem) ,cookie)) +;;; ((dbread) `(#t "db read submitted" ,(work-queue-add acfg fname witem) ,cookie )) +;;; ((dbrw) `(#t "db read/write submitted" ,cookie)) +;;; ((osshort) `(#t "os short submitted" ,cookie)) +;;; ((oslong) `(#t "os long submitted" ,cookie)) +;;; (else `(#f "unrecognised action" ,ctype))))))) +;;; +;;; ;; Call this to start the actual server +;;; ;; +;;; ;; start_server +;;; ;; +;;; ;; mode: ' +;;; ;; handler: proc which takes pktrecieved as argument +;;; ;; +;;; +;;; (define (start-server acfg) +;;; (let* ((conn (find-free-port-and-open acfg)) +;;; (port (area-port acfg))) +;;; (rpc:publish-procedure! +;;; 'delist-db +;;; (lambda (fname) +;;; (hash-table-delete! (area-dbs acfg) fname))) +;;; (rpc:publish-procedure! +;;; 'calling-addr +;;; (lambda () +;;; (rpc:current-peer))) +;;; (rpc:publish-procedure! +;;; 'ping +;;; (lambda ()(real-ping acfg))) +;;; (rpc:publish-procedure! +;;; 'request +;;; (lambda (from-addr from-port servkey action cookie dbname params) +;;; (request acfg from-addr from-port servkey action cookie dbname params))) +;;; (rpc:publish-procedure! +;;; 'response +;;; (lambda (cookie res-dat) +;;; (deliver-response acfg cookie res-dat))) +;;; (area-ready-set! acfg #t) +;;; (area-conn-set! acfg conn) +;;; ((rpc:make-server conn) #f)));; ((tcp-listen (rpc:default-server-port)) #t) +;;; +;;; +;;; (define (launch acfg) ;; #!optional (proc std-peer-handler)) +;;; (print "starting launch") +;;; (update-known-servers acfg) ;; gotta do this on every start (thus why limit number of publicised servers) +;;; #;(let ((original-handler (current-exception-handler))) ;; is th +;;; (lambda (exception) +;;; (server-exit-procedure) +;;; (original-handler exception))) +;;; (on-exit (lambda () +;;; (shutdown acfg))) ;; (finalize-all-db-handles acfg))) +;;; ;; set up the rpc handler +;;; (let* ((th1 (make-thread +;;; (lambda ()(start-server acfg)) +;;; "server thread")) +;;; (th2 (make-thread +;;; (lambda () +;;; (print "th2 starting") +;;; (let loop () +;;; (work-queue-processor acfg) +;;; (print "work-queue-processor crashed!") +;;; (loop))) +;;; "work queue thread"))) +;;; (thread-start! th1) +;;; (thread-start! th2) +;;; (let loop () +;;; (thread-sleep! 0.025) +;;; (if (area-ready acfg) +;;; #t +;;; (loop))) +;;; ;; attempt to fix my address +;;; (let* ((all-addr (get-all-ips-sorted))) ;; could use (tcp-addresses conn)? +;;; (let loop ((rem-addrs all-addr)) +;;; (if (null? rem-addrs) +;;; (begin +;;; (print "ERROR: Failed to figure out the ip address of myself as a server. Giving up.") +;;; (exit 1)) ;; BUG Changeme to raising an exception +;;; +;;; (let* ((addr (car rem-addrs)) +;;; (good-addr (handle-exceptions +;;; exn +;;; #f +;;; ((rpc:procedure 'calling-addr addr (area-port acfg)))))) +;;; (if good-addr +;;; (begin +;;; (print "Got good-addr of " good-addr) +;;; (area-myaddr-set! acfg good-addr)) +;;; (loop (cdr rem-addrs))))))) +;;; (register-node acfg (area-myaddr acfg)(area-port acfg)) +;;; (print "INFO: Server started on " (area-myaddr acfg) ":" (area-port acfg)) +;;; ;; (update-known-servers acfg) ;; gotta do this on every start (thus why limit number of publicised servers) +;;; )) +;;; +;;; (define (clear-server-pkt acfg) +;;; (let ((pktf (area-pktfile acfg))) +;;; (if pktf (delete-file* pktf)))) +;;; +;;; (define (shutdown acfg) +;;; (let (;;(conn (area-conn acfg)) +;;; (pktf (area-pktfile acfg)) +;;; (port (area-port acfg))) +;;; (if pktf (delete-file* pktf)) +;;; (send-all "imshuttingdown") +;;; ;; (rpc:close-all-connections!) ;; don't know if this is actually needed +;;; (finalize-all-db-handles acfg))) +;;; +;;; (define (send-all msg) +;;; #f) +;;; +;;; ;; given a area record look up all the packets +;;; ;; +;;; (define (get-all-server-pkts acfg) +;;; (let ((all-pkt-files (glob (conc (area-pktsdir acfg) "/*.pkt")))) +;;; (map (lambda (pkt-file) +;;; (read-pkt->alist pkt-file pktspec: *pktspec*)) +;;; all-pkt-files))) +;;; +;;; #;((Z . "9a0212302295a19610d5796fce0370fa130758e9") +;;; (port . "34827") +;;; (pid . "28748") +;;; (hostname . "zeus") +;;; (T . "server") +;;; (D . "1549427032.0")) +;;; +;;; #;(define (get-my-best-address) +;;; (let ((all-my-addresses (get-all-ips))) ;; (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name)))))) +;;; (cond +;;; ((null? all-my-addresses) +;;; (get-host-name)) ;; no interfaces? +;;; ((eq? (length all-my-addresses) 1) +;;; (ip->string (car all-my-addresses))) ;; only one to choose from, just go with it +;;; (else +;;; (ip->string (car (filter (lambda (x) ;; take any but 127. +;;; (not (eq? (u8vector-ref x 0) 127))) +;;; all-my-addresses))))))) +;;; +;;; ;; whoami? I am my pkt +;;; ;; +;;; (define (whoami? acfg) +;;; (hash-table-ref/default (area-hosts acfg)(area-pktid acfg) #f)) +;;; +;;; ;;====================================================================== +;;; ;; "Client side" operations +;;; ;;====================================================================== +;;; +;;; (define (safe-call call-key host port . params) +;;; (handle-exceptions +;;; exn +;;; (begin +;;; (print "Call " call-key " to " host ":" port " failed") +;;; #f) +;;; (apply (rpc:procedure call-key host port) params))) +;;; +;;; ;; ;; convert to/from string / sexpr +;;; ;; +;;; ;; (define (string->sexpr str) +;;; ;; (if (string? str) +;;; ;; (with-input-from-string str read) +;;; ;; str)) +;;; ;; +;;; ;; (define (sexpr->string s) +;;; ;; (with-output-to-string (lambda ()(write s)))) +;;; +;;; ;; is the server alive? +;;; ;; +;;; (define (ping acfg host port) +;;; (let* ((myaddr (area-myaddr acfg)) +;;; (myport (area-port acfg)) +;;; (start-time (current-milliseconds)) +;;; (res (if (and (equal? myaddr host) +;;; (equal? myport port)) +;;; (real-ping acfg) +;;; ((rpc:procedure 'ping host port))))) +;;; (cons (- (current-milliseconds) start-time) +;;; res))) +;;; +;;; ;; returns ( ipaddr port alist-fname=>randnum ) +;;; (define (real-ping acfg) +;;; `(,(area-myaddr acfg) ,(area-port acfg) ,(get-host-stats acfg))) +;;; +;;; ;; is the server alive AND the queues processing? +;;; ;; +;;; #;(define (full-ping acfg servpkt) +;;; (let* ((start-time (current-milliseconds)) +;;; (res (send-message acfg servpkt '(full-ping) 'full-ping))) +;;; (cons (- (current-milliseconds) start-time) +;;; res))) ;; (equal? res "got ping")))) +;;; +;;; +;;; ;; look up all pkts and get the server id (the hash), port, host/ip +;;; ;; store this info in acfg +;;; ;; return the number of responsive servers found +;;; ;; +;;; ;; DO NOT VERIFY THAT THE SERVER IS ALIVE HERE. This is called at times where the current server is not yet alive and cannot ping itself +;;; ;; +;;; (define (update-known-servers acfg) +;;; ;; readll all pkts +;;; ;; foreach pkt; if it isn't me ping the server; if alive, add to hosts hash, else rm the pkt +;;; (let* ((start-time (current-milliseconds)) +;;; (all-pkts (delete-duplicates +;;; (append (get-all-server-pkts acfg) +;;; (hash-table-values (area-hosts acfg))))) +;;; (hostshash (area-hosts acfg)) +;;; (my-id (area-pktid acfg)) +;;; (pktsdir (area-pktsdir acfg)) ;; needed to remove pkts from non-responsive servers +;;; (numsrvs 0) +;;; (delpkt (lambda (pktsdir sid) +;;; (print "clearing out server " sid) +;;; (delete-file* (conc pktsdir "/" sid ".pkt")) +;;; (hash-table-delete! hostshash sid)))) +;;; (area-last-srvup-set! acfg (current-seconds)) +;;; (for-each +;;; (lambda (servpkt) +;;; (if (list? servpkt) +;;; ;; (pp servpkt) +;;; (let* ((shost (alist-ref 'ipaddr servpkt)) +;;; (sport (any->number (alist-ref 'port servpkt))) +;;; (res (handle-exceptions +;;; exn +;;; (begin +;;; ;; (print "INFO: bad server on " shost ":" sport) +;;; #f) +;;; (ping acfg shost sport))) +;;; (sid (alist-ref 'Z servpkt)) ;; Z code is our name for the server +;;; (url (conc shost ":" sport)) +;;; ) +;;; #;(if (or (not res) +;;; (null? res)) +;;; (begin +;;; (print "STRANGE: ping of " url " gave " res))) +;;; +;;; ;; (print "Got " res " from " shost ":" sport) +;;; (match res +;;; ((qduration . payload) +;;; ;; (print "Server pkt:" (alist-ref 'ipaddr servpkt) ":" (alist-ref 'port servpkt) +;;; ;; (if payload +;;; ;; "Success" "Fail")) +;;; (match payload +;;; ((host port stats) +;;; ;; (print "From " host ":" port " got stats: " stats) +;;; (if (and host port stats) +;;; (let ((url (conc host ":" port))) +;;; (hash-table-set! hostshash sid servpkt) +;;; ;; store based on host:port +;;; (hash-table-set! (area-hoststats acfg) sid stats)) +;;; (print "missing data from the server, not sure what that means!")) +;;; (set! numsrvs (+ numsrvs 1))) +;;; (#f +;;; (print "Removing pkt " sid " due to #f from server or failed ping") +;;; (delpkt pktsdir sid)) +;;; (else +;;; (print "Got ")(pp res)(print " from server ")(pp servpkt) " but response did not match (#f/#t . msg)"))) +;;; (else +;;; ;; here we delete the pkt - can't reach the server, remove it +;;; ;; however this logic is inadequate. we should mark the server as checked +;;; ;; and not good, if it happens a second time - then remove the pkt +;;; ;; or something similar. I.e. don't be too quick to assume the server is wedged or dead +;;; ;; could be it is simply too busy to reply +;;; (let ((bad-pings (hash-table-ref/default (area-health acfg) url 0))) +;;; (if (> bad-pings 1) ;; two bad pings - remove pkt +;;; (begin +;;; (print "INFO: " bad-pings " bad responses from " url ", deleting pkt " sid) +;;; (delpkt pktsdir sid)) +;;; (begin +;;; (print "INFO: " bad-pings " bad responses from " shost ":" sport " not deleting pkt yet") +;;; (hash-table-set! (area-health acfg) +;;; url +;;; (+ (hash-table-ref/default (area-health acfg) url 0) 1)) +;;; )) +;;; )))) +;;; ;; servpkt is not actually a pkt? +;;; (begin +;;; (print "Bad pkt " servpkt)))) +;;; all-pkts) +;;; (sdbg> "update-known-servers" "end" start-time #f #f " found " numsrvs +;;; " servers, pkts: " (map (lambda (p) +;;; (alist-ref 'Z p)) +;;; all-pkts)) +;;; numsrvs)) +;;; +;;; (defstruct srvstat +;;; (numfiles 0) ;; number of db files handled by this server - subtract 1 for the db being currently looked at +;;; (randnum #f) ;; tie breaker number assigned to by the server itself - applies only to the db under consideration +;;; (pkt #f)) ;; the server pkt +;;; +;;; ;;(define (srv->srvstat srvpkt) +;;; +;;; ;; Get the server best for given dbname and key +;;; ;; +;;; ;; NOTE: key is not currently used. The key points to the kind of query, this may be useful for directing read-only queries. +;;; ;; +;;; (define (get-best-server acfg dbname key) +;;; (let* (;; (servers (hash-table-values (area-hosts acfg))) +;;; (servers (area-hosts acfg)) +;;; (skeys (sort (hash-table-keys servers) string>=?)) ;; a stable listing +;;; (start-time (current-milliseconds)) +;;; (srvstats (make-hash-table)) ;; srvid => srvstat +;;; (url (conc (area-myaddr acfg) ":" (area-port acfg)))) +;;; ;; (print "scores for " dbname ": " (map (lambda (k)(cons k (calc-server-score acfg dbname k))) skeys)) +;;; (if (null? skeys) +;;; (if (> (update-known-servers acfg) 0) +;;; (get-best-server acfg dbname key) ;; some risk of infinite loop here, TODO add try counter +;;; (begin +;;; (print "ERROR: no server found!") ;; since this process is also a server this should never happen +;;; #f)) +;;; (begin +;;; ;; (print "in get-best-server with skeys=" skeys) +;;; (if (> (- (current-seconds) (area-last-srvup acfg)) 10) +;;; (begin +;;; (update-known-servers acfg) +;;; (sdbg> "get-best-server" "update-known-servers" start-time #f #f))) +;;; +;;; ;; for each server look at the list of dbfiles, total number of dbs being handled +;;; ;; and the rand number, save the best host +;;; ;; also do a delist-db for each server dbfile not used +;;; (let* ((best-server #f) +;;; (servers-to-delist (make-hash-table))) +;;; (for-each +;;; (lambda (srvid) +;;; (let* ((server (hash-table-ref/default servers srvid #f)) +;;; (stats (hash-table-ref/default (area-hoststats acfg) srvid '(())))) +;;; ;; (print "stats: " stats) +;;; (if server +;;; (let* ((dbweights (car stats)) +;;; (srvload (length (filter (lambda (x)(not (equal? dbname (car x)))) dbweights))) +;;; (dbrec (alist-ref dbname dbweights equal?)) ;; get the pair with fname . randscore +;;; (randnum (if dbrec +;;; dbrec ;; (cdr dbrec) +;;; 0))) +;;; (hash-table-set! srvstats srvid (make-srvstat numfiles: srvload randnum: randnum pkt: server)))))) +;;; skeys) +;;; +;;; (let* ((sorted (sort (hash-table-values srvstats) +;;; (lambda (a b) +;;; (let ((numfiles-a (srvstat-numfiles a)) +;;; (numfiles-b (srvstat-numfiles b)) +;;; (randnum-a (srvstat-randnum a)) +;;; (randnum-b (srvstat-randnum b))) +;;; (if (< numfiles-a numfiles-b) ;; Note, I don't think adding an offset works here. Goal was only move file handling to a different server if it has 2 less +;;; #t +;;; (if (and (equal? numfiles-a numfiles-b) +;;; (< randnum-a randnum-b)) +;;; #t +;;; #f)))))) +;;; (best (if (null? sorted) +;;; (begin +;;; (print "ERROR: should never be null due to self as server.") +;;; #f) +;;; (srvstat-pkt (car sorted))))) +;;; #;(print "SERVER(" url "): " dbname ": " (map (lambda (srv) +;;; (let ((p (srvstat-pkt srv))) +;;; (conc (alist-ref 'ipaddr p) ":" (alist-ref 'port p) +;;; "(" (srvstat-numfiles srv)","(srvstat-randnum srv)")"))) +;;; sorted)) +;;; best)))))) +;;; +;;; ;; send out an "I'm about to exit notice to all known servers" +;;; ;; +;;; (define (death-imminent acfg) +;;; '()) +;;; +;;; ;;====================================================================== +;;; ;; U L E X - T H E I N T E R E S T I N G S T U F F ! ! +;;; ;;====================================================================== +;;; +;;; ;; register a handler +;;; ;; NOTES: +;;; ;; dbinitsql is reserved for a list of sql statements for initializing the db +;;; ;; dbinitfn is reserved for a db init function, if exists called after dbinitsql +;;; ;; +;;; (define (register acfg key obj #!optional (ctype 'dbwrite)) +;;; (let ((ht (area-rtable acfg))) +;;; (if (hash-table-exists? ht key) +;;; (print "WARNING: redefinition of entry " key)) +;;; (hash-table-set! ht key (make-calldat obj: obj ctype: ctype)))) +;;; +;;; ;; usage: register-batch acfg '((key1 . sql1) (key2 . sql2) ... ) +;;; ;; NB// obj is often an sql query +;;; ;; +;;; (define (register-batch acfg ctype data) +;;; (let ((ht (area-rtable acfg))) +;;; (map (lambda (dat) +;;; (hash-table-set! ht (car dat)(make-calldat obj: (cdr dat) ctype: ctype))) +;;; data))) +;;; +;;; (define (initialize-area-calls-from-specfile area specfile) +;;; (let* ((callspec (with-input-from-file specfile read ))) +;;; (for-each (lambda (group) +;;; (register-batch +;;; area +;;; (car group) +;;; (cdr group))) +;;; callspec))) +;;; +;;; ;; get-rentry +;;; ;; +;;; (define (get-rentry acfg key) +;;; (hash-table-ref/default (area-rtable acfg) key #f)) +;;; +;;; (define (get-rsql acfg key) +;;; (let ((cdat (get-rentry acfg key))) +;;; (if cdat +;;; (calldat-obj cdat) +;;; #f))) +;;; +;;; +;;; +;;; ;; blocking call: +;;; ;; client server +;;; ;; ------ ------ +;;; ;; call() +;;; ;; send-message() +;;; ;; nmsg-send() +;;; ;; nmsg-receive() +;;; ;; nmsg-respond(ack,cookie) +;;; ;; ack, cookie +;;; ;; mbox-thread-wait(cookie) +;;; ;; nmsg-send(client,cookie,result) +;;; ;; nmsg-respond(ack) +;;; ;; return result +;;; ;; +;;; ;; reserved action: +;;; ;; 'immediate +;;; ;; 'dbinitsql +;;; ;; +;;; (define (call acfg dbname action params #!optional (count 0)) +;;; (let* ((call-start-time (current-milliseconds)) +;;; (srv (get-best-server acfg dbname action)) +;;; (post-get-start-time (current-milliseconds)) +;;; (rdat (hash-table-ref/default (area-rtable acfg) action #f)) +;;; (myid (trim-pktid (area-pktid acfg))) +;;; (srvid (trim-pktid (alist-ref 'Z srv))) +;;; (cookie (make-cookie myid))) +;;; (sdbg> "call" "get-best-server" call-start-time #f call-start-time " from: " myid " to server: " srvid " for " dbname " action: " action " params: " params " rdat: " rdat) +;;; (print "INFO: call to " (alist-ref 'ipaddr srv) ":" (alist-ref 'port srv) " from " (area-myaddr acfg) ":" (area-port acfg) " for " dbname) +;;; (if (and srv rdat) ;; need both to dispatch a request +;;; (let* ((ripaddr (alist-ref 'ipaddr srv)) +;;; (rsrvid (alist-ref 'Z srv)) +;;; (rport (any->number (alist-ref 'port srv))) +;;; (res-full (if (and (equal? ripaddr (area-myaddr acfg)) +;;; (equal? rport (area-port acfg))) +;;; (request acfg ripaddr rport (area-pktid acfg) action cookie dbname params) +;;; (safe-call 'request ripaddr rport +;;; (area-myaddr acfg) +;;; (area-port acfg) +;;; #;(area-pktid acfg) +;;; rsrvid +;;; action cookie dbname params)))) +;;; ;; (print "res-full: " res-full) +;;; (match res-full +;;; ((response-ok response-msg rem ...) +;;; (let* ((send-message-time (current-milliseconds)) +;;; ;; (match res-full +;;; ;; ((response-ok response-msg) +;;; ;; (response-ok (car res-full)) +;;; ;; (response-msg (cadr res-full) +;;; ) +;;; ;; (res (take res-full 3))) ;; ctype == action, TODO: converge on one term <<=== what was this? BUG +;;; ;; (print "ulex:call: send-message took " (- send-message-time post-get-start-time) " ms params=" params) +;;; (sdbg> "call" "send-message" post-get-start-time #f call-start-time) +;;; (cond +;;; ((not response-ok) #f) +;;; ((member response-msg '("db read submitted" "db write submitted")) +;;; (let* ((cookie-id (cadddr res-full)) +;;; (mbox (make-mailbox)) +;;; (mbox-time (current-milliseconds))) +;;; (hash-table-set! (area-cookie2mbox acfg) cookie-id mbox) +;;; (let* ((mbox-timeout-secs 20) +;;; (mbox-timeout-result 'MBOX_TIMEOUT) +;;; (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result)) +;;; (mbox-receive-time (current-milliseconds))) +;;; (hash-table-delete! (area-cookie2mbox acfg) cookie-id) +;;; (sdbg> "call" "mailbox-receive" mbox-time #f call-start-time " from: " myid " to server: " srvid " for " dbname) +;;; ;; (print "ulex:call mailbox-receive took " (- mbox-receive-time mbox-time) "ms params=" params) +;;; res))) +;;; (else +;;; (print "Unhandled response \""response-msg"\"") +;;; #f)) +;;; ;; depending on what action (i.e. ctype) is we will block here waiting for +;;; ;; all the data (mechanism to be determined) +;;; ;; +;;; ;; if res is a "working on it" then wait +;;; ;; wait for result +;;; ;; mailbox thread wait on +;;; +;;; ;; if res is a "can't help you" then try a different server +;;; ;; if res is a "ack" (e.g. for one-shot requests) then return res +;;; )) +;;; (else +;;; (if (< count 10) +;;; (let* ((url (conc (alist-ref 'ipaddr srv) ":" (alist-ref 'port srv)))) +;;; (thread-sleep! 1) +;;; (print "ERROR: Bad result from " url ", dbname: " dbname ", action: " action ", params: " params ". Trying again in 1 second.") +;;; (call acfg dbname action params (+ count 1))) +;;; (begin +;;; (error (conc "ERROR: " count " tries, still have improper response res-full=" res-full))))))) +;;; (begin +;;; (if (not rdat) +;;; (print "ERROR: action " action " not registered.") +;;; (if (< count 10) +;;; (begin +;;; (thread-sleep! 1) +;;; (area-hosts-set! acfg (make-hash-table)) ;; clear out all known hosts +;;; (print "ERROR: no server found, srv=" srv ", trying again in 1 seconds") +;;; (call acfg dbname action params (+ count 1))) +;;; (begin +;;; (error (conc "ERROR: no server found after 10 tries, srv=" srv ", giving up.")) +;;; #;(error "No server available")))))))) +;;; +;;; +;;; ;;====================================================================== +;;; ;; U T I L I T I E S +;;; ;;====================================================================== +;;; +;;; ;; get a signature for identifing this process +;;; ;; +;;; (define (get-process-signature) +;;; (cons (get-host-name)(current-process-id))) +;;; +;;; ;;====================================================================== +;;; ;; S Y S T E M S T U F F +;;; ;;====================================================================== +;;; +;;; ;; get normalized cpu load by reading from /proc/loadavg and +;;; ;; /proc/cpuinfo return all three values and the number of real cpus +;;; ;; and the number of threads returns alist '((adj-cpu-load +;;; ;; . normalized-proc-load) ... etc. keys: adj-proc-load, +;;; ;; adj-core-load, 1m-load, 5m-load, 15m-load +;;; ;; +;;; (define (get-normalized-cpu-load) +;;; (let ((res (get-normalized-cpu-load-raw)) +;;; (default `((adj-proc-load . 2) ;; there is no right answer +;;; (adj-core-load . 2) +;;; (1m-load . 2) +;;; (5m-load . 0) ;; causes a large delta - thus causing default of throttling if stuff goes wrong +;;; (15m-load . 0) +;;; (proc . 1) +;;; (core . 1) +;;; (phys . 1) +;;; (error . #t)))) +;;; (cond +;;; ((and (list? res) +;;; (> (length res) 2)) +;;; res) +;;; ((eq? res #f) default) ;; add messages? +;;; ((eq? res #f) default) ;; this would be the #eof +;;; (else default)))) +;;; +;;; (define (get-normalized-cpu-load-raw) +;;; (let* ((actual-host (get-host-name))) ;; #f is localhost +;;; (let ((data (append +;;; (with-input-from-file "/proc/loadavg" read-lines) +;;; (with-input-from-file "/proc/cpuinfo" read-lines) +;;; (list "end"))) +;;; (load-rx (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$")) +;;; (proc-rx (regexp "^processor\\s+:\\s+(\\d+)\\s*$")) +;;; (core-rx (regexp "^core id\\s+:\\s+(\\d+)\\s*$")) +;;; (phys-rx (regexp "^physical id\\s+:\\s+(\\d+)\\s*$")) +;;; (max-num (lambda (p n)(max (string->number p) n)))) +;;; ;; (print "data=" data) +;;; (if (null? data) ;; something went wrong +;;; #f +;;; (let loop ((hed (car data)) +;;; (tal (cdr data)) +;;; (loads #f) +;;; (proc-num 0) ;; processor includes threads +;;; (phys-num 0) ;; physical chip on motherboard +;;; (core-num 0)) ;; core +;;; ;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num) +;;; (if (null? tal) ;; have all our data, calculate normalized load and return result +;;; (let* ((act-proc (+ proc-num 1)) +;;; (act-phys (+ phys-num 1)) +;;; (act-core (+ core-num 1)) +;;; (adj-proc-load (/ (car loads) act-proc)) +;;; (adj-core-load (/ (car loads) act-core)) +;;; (result +;;; (append (list (cons 'adj-proc-load adj-proc-load) +;;; (cons 'adj-core-load adj-core-load)) +;;; (list (cons '1m-load (car loads)) +;;; (cons '5m-load (cadr loads)) +;;; (cons '15m-load (caddr loads))) +;;; (list (cons 'proc act-proc) +;;; (cons 'core act-core) +;;; (cons 'phys act-phys))))) +;;; result) +;;; (regex-case +;;; hed +;;; (load-rx ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num)) +;;; (proc-rx ( x p ) (loop (car tal)(cdr tal) loads (max-num p proc-num) phys-num core-num)) +;;; (phys-rx ( x p ) (loop (car tal)(cdr tal) loads proc-num (max-num p phys-num) core-num)) +;;; (core-rx ( x c ) (loop (car tal)(cdr tal) loads proc-num phys-num (max-num c core-num))) +;;; (else +;;; (begin +;;; ;; (print "NO MATCH: " hed) +;;; (loop (car tal)(cdr tal) loads proc-num phys-num core-num)))))))))) +;;; +;;; (define (get-host-stats acfg) +;;; (let ((stats-hash (area-stats acfg))) +;;; ;; use this opportunity to remove references to dbfiles which have not been accessed in a while +;;; (for-each +;;; (lambda (dbname) +;;; (let* ((stats (hash-table-ref stats-hash dbname)) +;;; (last-access (stat-when stats))) +;;; (if (and (> last-access 0) ;; if zero then there has been no access +;;; (> (- (current-seconds) last-access) 10)) ;; not used in ten seconds +;;; (begin +;;; (print "Removing " dbname " from stats list") +;;; (hash-table-delete! stats-hash dbname) ;; remove from stats hash +;;; (stat-dbs-set! stats (hash-table-keys stats)))))) +;;; (hash-table-keys stats-hash)) +;;; +;;; `(,(hash-table->alist (area-dbs acfg)) ;; dbname => randnum +;;; ,(map (lambda (dbname) ;; dbname is the db name +;;; (cons dbname (stat-when (hash-table-ref stats-hash dbname)))) +;;; (hash-table-keys stats-hash)) +;;; (cpuload . ,(get-normalized-cpu-load))))) +;;; #;(stats . ,(map (lambda (k) ;; create an alist from the stats data +;;; (cons k (stat->alist (hash-table-ref (area-stats acfg) k)))) +;;; (hash-table-keys (area-stats acfg)))) +;;; +;;; #;(trace +;;; ;; assv +;;; ;; cdr +;;; ;; caar +;;; ;; ;; cdr +;;; ;; call +;;; ;; finalize-all-db-handles +;;; ;; get-all-server-pkts +;;; ;; get-normalized-cpu-load +;;; ;; get-normalized-cpu-load-raw +;;; ;; launch +;;; ;; nmsg-send +;;; ;; process-db-queries +;;; ;; receive-message +;;; ;; std-peer-handler +;;; ;; update-known-servers +;;; ;; work-queue-processor +;;; ) +;;; +;;; ;;====================================================================== +;;; ;; netutil +;;; ;; move this back to ulex-netutil.scm someday? +;;; ;;====================================================================== +;;; +;;; ;; #include +;;; ;; #include +;;; ;; #include +;;; ;; #include +;;; +;;; (foreign-declare "#include \"sys/types.h\"") +;;; (foreign-declare "#include \"sys/socket.h\"") +;;; (foreign-declare "#include \"ifaddrs.h\"") +;;; (foreign-declare "#include \"arpa/inet.h\"") +;;; +;;; ;; get IP addresses from ALL interfaces +;;; (define get-all-ips +;;; (foreign-safe-lambda* scheme-object () +;;; " +;;; +;;; // from https://stackoverflow.com/questions/17909401/linux-c-get-default-interfaces-ip-address : +;;; +;;; +;;; C_word lst = C_SCHEME_END_OF_LIST, len, str, *a; +;;; // struct ifaddrs *ifa, *i; +;;; // struct sockaddr *sa; +;;; +;;; struct ifaddrs * ifAddrStruct = NULL; +;;; struct ifaddrs * ifa = NULL; +;;; void * tmpAddrPtr = NULL; +;;; +;;; if ( getifaddrs(&ifAddrStruct) != 0) +;;; C_return(C_SCHEME_FALSE); +;;; +;;; // for (i = ifa; i != NULL; i = i->ifa_next) { +;;; for (ifa = ifAddrStruct; ifa != NULL; ifa = ifa->ifa_next) { +;;; if (ifa->ifa_addr->sa_family==AF_INET) { // Check it is +;;; // a valid IPv4 address +;;; tmpAddrPtr = &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; +;;; char addressBuffer[INET_ADDRSTRLEN]; +;;; inet_ntop(AF_INET, tmpAddrPtr, addressBuffer, INET_ADDRSTRLEN); +;;; // printf(\"%s IP Address %s\\n\", ifa->ifa_name, addressBuffer); +;;; len = strlen(addressBuffer); +;;; a = C_alloc(C_SIZEOF_PAIR + C_SIZEOF_STRING(len)); +;;; str = C_string(&a, len, addressBuffer); +;;; lst = C_a_pair(&a, str, lst); +;;; } +;;; +;;; // else if (ifa->ifa_addr->sa_family==AF_INET6) { // Check it is +;;; // // a valid IPv6 address +;;; // tmpAddrPtr = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr; +;;; // char addressBuffer[INET6_ADDRSTRLEN]; +;;; // inet_ntop(AF_INET6, tmpAddrPtr, addressBuffer, INET6_ADDRSTRLEN); +;;; //// printf(\"%s IP Address %s\\n\", ifa->ifa_name, addressBuffer); +;;; // len = strlen(addressBuffer); +;;; // a = C_alloc(C_SIZEOF_PAIR + C_SIZEOF_STRING(len)); +;;; // str = C_string(&a, len, addressBuffer); +;;; // lst = C_a_pair(&a, str, lst); +;;; // } +;;; +;;; // else { +;;; // printf(\" not an IPv4 address\\n\"); +;;; // } +;;; +;;; } +;;; +;;; freeifaddrs(ifa); +;;; C_return(lst); +;;; +;;; ")) +;;; +;;; ;; Change this to bias for addresses with a reasonable broadcast value? +;;; ;; +;;; (define (ip-pref-less? a b) +;;; (let* ((rate (lambda (ipstr) +;;; (regex-case ipstr +;;; ( "^127\\." _ 0 ) +;;; ( "^(10\\.0|192\\.168\\.)\\..*" _ 1 ) +;;; ( else 2 ) )))) +;;; (< (rate a) (rate b)))) +;;; +;;; +;;; (define (get-my-best-address) +;;; (let ((all-my-addresses (get-all-ips)) +;;; ;;(all-my-addresses-old (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name))))) +;;; ) +;;; (cond +;;; ((null? all-my-addresses) +;;; (get-host-name)) ;; no interfaces? +;;; ((eq? (length all-my-addresses) 1) +;;; (car all-my-addresses)) ;; only one to choose from, just go with it +;;; +;;; (else +;;; (car (sort all-my-addresses ip-pref-less?))) +;;; ;; (else +;;; ;; (ip->string (car (filter (lambda (x) ;; take any but 127. +;;; ;; (not (eq? (u8vector-ref x 0) 127))) +;;; ;; all-my-addresses)))) +;;; +;;; ))) +;;; +;;; (define (get-all-ips-sorted) +;;; (sort (get-all-ips) ip-pref-less?)) +;;; +;;; + Index: ulex/ulex.scm ================================================================== --- ulex/ulex.scm +++ ulex/ulex.scm @@ -23,330 +23,330 @@ ;; NOTES: ;; Why sql-de-lite and not say, dbi? - performance mostly, then simplicity. ;; ;;====================================================================== -(use mailbox) - -(module ulex - * - -(import scheme posix chicken data-structures ports extras files mailbox) -(import srfi-18 pkts matchable regex - typed-records srfi-69 srfi-1 - srfi-4 regex-case - (prefix sqlite3 sqlite3:) - foreign - tcp6 - ;; ulex-netutil - hostinfo - ) - -;; make it a global? Well, it is local to area module - -(define *captain-pktspec* - `((captain (host . h) - (port . p) - (pid . i) - (ipaddr . a) - ) - #;(data (hostname . h) ;; sender hostname - (port . p) ;; sender port - (ipaddr . a) ;; sender ip - (hostkey . k) ;; sending host key - store info at server under this key - (servkey . s) ;; server key - this needs to match at server end or reject the msg - (format . f) ;; sb=serialized-base64, t=text, sx=sexpr, j=json - (data . d) ;; base64 encoded slln data - ))) - -;; struct for keeping track of our world - -(defstruct udat - ;; captain info - (captain-address #f) - (captain-host #f) - (captain-port #f) - (captain-pid #f) - (captain-lease 0) ;; time (unix epoc) seconds when the lease is up - (ulex-dir (conc (get-environment-variable "HOME") "/.ulex")) - (cpkts-dir (conc (get-environment-variable "HOME") "/.ulex/pkts")) - (cpkt-spec *captain-pktspec*) - ;; this processes info - (my-cpkt-key #f) ;; put Z card here when I create a pkt for myself as captain - (my-address #f) - (my-hostname #f) - (my-port #f) - (my-pid (current-process-id)) - (my-dbs '()) - ;; server and handler thread - (serv-listener #f) ;; this processes server info - (handler-thread #f) - (mboxes (make-hash-table)) ;; key => mbox - ;; other servers - (peers (make-hash-table)) ;; host-port => peer record - (dbowners (make-hash-table)) ;; dbfile => host-port - (handlers (make-hash-table)) ;; dbfile => proc - ;; (outgoing-conns (make-hash-table)) ;; host:port -> conn - (work-queue (make-queue)) ;; most stuff goes here - ;; (fast-queue (make-queue)) ;; super quick stuff goes here (e.g. ping) - (busy #f) ;; is either of the queues busy, use to switch between queuing tasks or doing immediately - ;; app info - (appname #f) - (dbtypes (make-hash-table)) ;; this should be an alist but hash is easier. dbtype => [ initproc syncproc ] - ;; cookies - (cnum 0) ;; cookie num - ) - -;;====================================================================== -;; NEW APPROACH -;;====================================================================== - -;; start-server-find-port ;; gotta have a server port ready from the very begining - -;; udata - all the connection info, captain, server, ulex db etc. MUST BE PASSED IN -;; dbpath - full path and filename of the db to talk to or a symbol naming the db? -;; callname - the remote call to execute -;; params - parameters to pass to the remote call -;; -(define (remote-call udata dbpath dbtype callname . params) - (start-server-find-port udata) ;; ensure we have a local server - (find-or-setup-captain udata) - ;; look at connect, process-request, send, send-receive - (let-values (((cookie-key host-port)(get-db-owner udata dbpath dbtype))) - (send-receive udata host-port callname cookie-key params))) - -;;====================================================================== -;; KEY FUNCTIONS - THESE ARE TOO BE EXPOSED AND USED -;;====================================================================== - -;; connection setup and management functions - -;; This is the basic setup command. Must always be -;; called before connecting to a db using connect. -;; -;; find or become the captain -;; setup and return a ulex object -;; -(define (find-or-setup-captain udata) - ;; see if we already have a captain and if the lease is ok - (if (and (udat-captain-address udata) - (udat-captain-port udata) - (< (current-seconds) (udat-captain-lease udata))) - udata - (let* ((cpkts (get-all-captain-pkts udata)) ;; read captain pkts - (captn (get-winning-pkt cpkts))) - (if captn - (let* ((port (alist-ref 'port captn)) - (host (alist-ref 'host captn)) - (ipaddr (alist-ref 'ipaddr captn)) - (pid (alist-ref 'pid captn)) - (Z (alist-ref 'Z captn))) - (udat-captain-address-set! udata ipaddr) - (udat-captain-host-set! udata host) - (udat-captain-port-set! udata port) - (udat-captain-pid-set! udata pid) - (udat-captain-lease-set! udata (+ (current-seconds) 10)) - (let-values (((success pingtime)(ping udata (conc ipaddr ":" port)))) - (if success - udata - (begin - (print "Found unreachable captain at " ipaddr ":" port ", removing pkt") - (remove-captain-pkt udata captn) - (find-or-setup-captain udata)))) - (begin - (setup-as-captain udata) ;; this saves the thread to captain-thread and starts the thread - (find-or-setup-captain udata))))))) - -;; connect to a specific dbfile -;; - if already connected - return the dbowner host-port -;; - ask the captain who to talk to for this db -;; - put the entry in the dbowners hash as dbfile => host-port -;; -(define (connect udata dbfname dbtype) - (or (hash-table-ref/default (udat-dbowners udata) dbfname #f) - (let-values (((success dbowner-host-port)(get-db-owner udata dbfname dbtype))) - (if success - (begin - ;; just clobber the record, this is the new data no matter what - (hash-table-set! (udat-dbowners udata) dbfname dbowner-host-port) - dbowner-host-port) - #f)))) - -;; returns: success pingtime -;; -;; NOTE: causes the callee to store the info on this host along with the dbs this host currently owns -;; -(define (ping udata host-port) - (let* ((start (current-milliseconds)) - (cookie (make-cookie udata)) - (dbs (udat-my-dbs udata)) - (msg (string-intersperse dbs " ")) - (res (send udata host-port 'ping cookie msg retval: #t)) - (delta (- (current-milliseconds) start))) - (values (equal? res cookie) delta))) - -;; returns: success pingtime -;; -;; NOTE: causes all references to this worker to be wiped out in the -;; callee (ususally the captain) -;; -(define (goodbye-ping udata host-port) - (let* ((start (current-milliseconds)) - (cookie (make-cookie udata)) - (dbs (udat-my-dbs udata)) - (res (send udata host-port 'goodbye cookie "nomsg" retval: #t)) - (delta (- (current-milliseconds) start))) - (values (equal? res cookie) delta))) - -(define (goodbye-captain udata) - (let* ((host-port (udat-captain-host-port udata))) - (if host-port - (goodbye-ping udata host-port) - (values #f -1)))) - -(define (get-db-owner udata dbname dbtype) - (let* ((host-port (udat-captain-host-port udata))) - (if host-port - (let* ((cookie (make-cookie udata)) - (msg #f) ;; (conc dbname " " dbtype)) - (params `(,dbname ,dbtype)) - (res (send udata host-port 'db-owner cookie msg - params: params retval: #t))) - (match (string-split res) - ((retcookie owner-host-port) - (values (equal? retcookie cookie) owner-host-port)))) - (values #f -1)))) - -;; called in ulex-handler to dispatch work, called on the workers side -;; calls (proc params data) -;; returns result with cookie -;; -;; pdat is the info of the caller, used to send the result data -;; prockey is key into udat-handlers hash dereferencing a proc -;; procparam is a first param handed to proc - often to do further derefrencing -;; NOTE: params is intended to be a list of strings, encoding on data -;; is up to the user but data must be a single line -;; -(define (process-request udata pdat dbname cookie prockey procparam data) - (let* ((dbrec (ulex-open-db udata dbname)) ;; this will be a dbconn record, looks for in udata first - (proc (hash-table-ref udata prockey))) - (let* ((result (proc dbrec procparam data))) - result))) - -;; remote-request - send to remote to process in process-request -;; uconn comes from a call to connect and can be used instead of calling connect again -;; uconn is the host-port to call -;; we send dbname to the worker so they know which file to open -;; data must be a string with no newlines, it will be handed to the proc -;; at the remote site unchanged. It is up to the user to encode/decode it's contents -;; -;; rtype: immediate, read-only, normal, low-priority -;; -(define (remote-request udata uconn rtype dbname prockey procparam data) - (let* ((cookie (make-cookie udata))) - (send-receive udata uconn rtype cookie data `(,prockey procparam)))) - -(define (ulex-open-db udata dbname) - #f) - - -;;====================================================================== -;; Ulex db -;; -;; - track who is captain, lease expire time -;; - track who owns what db, lease -;; -;;====================================================================== - -;; -;; -(define (ulex-dbfname) - (let ((dbdir (conc (get-environment-variable "HOME") "/.ulex"))) - (if (not (file-exists? dbdir)) - (create-directory dbdir #t)) - (conc dbdir "/network.db"))) - -;; always goes in ~/.ulex/network.db -;; role is captain, adjutant, node -;; -(define (ulexdb-setup) - (let* ((dbfname (ulex-dbfname)) - (have-db (file-exists? dbfname)) - (db (sqlite3:open-database dbfname))) - (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) - (sqlite3:execute db "PRAGMA synchronous = 0;") - (if (not have-db) - (sqlite3:with-transaction - db - (lambda () - (for-each - (lambda (stmt) - (if stmt (sqlite3:execute db stmt))) - `("CREATE TABLE IF NOT EXISTS nodes - (id INTEGER PRIMARY KEY, - role TEXT NOT NULL, - host TEXT NOT NULL, - port TEXT NOT NULL, - ipadr TEXT NOT NULL, - pid INTEGER NOT NULL, - zcard TEXT NOT NULL, - regtime INTEGER DEFAULT (strftime('%s','now')), - lease_thru INTEGER DEFAULT (strftime('%s','now')), - last_update INTEGER DEFAULT (strftime('%s','now')));" - "CREATE TRIGGER IF NOT EXISTS update_nodes_trigger AFTER UPDATE ON nodes - FOR EACH ROW - BEGIN - UPDATE nodes SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;" - "CREATE TABLE IF NOT EXISTS dbs - (id INTEGER PRIMARY KEY, - dbname TEXT NOT NULL, - dbfile TEXT NOT NULL, - dbtype TEXT NOT NULL, - host_port TEXT NOT NULL, - regtime INTEGER DEFAULT (strftime('%s','now')), - lease_thru INTEGER DEFAULT (strftime('%s','now')), - last_update INTEGER DEFAULT (strftime('%s','now')));" - "CREATE TRIGGER IF NOT EXISTS update_dbs_trigger AFTER UPDATE ON dbs - FOR EACH ROW - BEGIN - UPDATE dbs SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;"))))) - db)) - -(define (get-host-port-lease db dbfname) - (sqlite3:fold-row - (lambda (rem host-port lease-thru) - (list host-port lease-thru)) - #f db "SELECT host_port,lease_thru FROM dbs WHERE dbfile = ?" dbfname)) - -(define (register-captain db host ipadr port pid zcard #!key (lease 20)) - (let* ((dbfname (ulex-dbfname)) - (host-port (conc host ":" port))) - (sqlite3:with-transaction - db - (lambda () - (match (get-host-port-lease db dbfname) - ((host-port lease-thru) - (if (> (current-seconds) lease-thru) - (begin - (sqlite3:execute db "UPDATE dbs SET host_port=?,lease_thru=? WHERE dbname=?" - (conc host ":" port) - (+ (current-seconds) lease) - dbfname) - #t) - #f)) - (#f (sqlite3:execute db "INSERT INTO dbs (dbname,dbfile,dbtype,host_port,lease_thru) VALUES (?,?,?,?,?)" - "captain" dbfname "captain" host-port (+ (current-seconds) lease))) - (else (print "ERROR: Unrecognised result from fold-row") - (exit 1))))))) - -;;====================================================================== -;; network utilities -;;====================================================================== +(module ulex + * + +(import scheme + chicken.base + chicken.file + chicken.time + chicken.condition + chicken.string + chicken.sort + chicken.pretty-print + + address-info + mailbox + matchable + queues + regex + regex-case + srfi-1 + srfi-18 + srfi-4 + srfi-69 + system-information + tcp6 + typed-records + ) + +;; udat struct, used by both caller and callee +;; instantiated as uconn by convention +;; +(defstruct udat + ;; the listener side + (port #f) + (host-port #f) + (socket #f) + ;; the peers + (peers (make-hash-table)) ;; host:port->peer + ;; work handling + (work-queue (make-queue)) + (work-proc #f) ;; set by user + (cnum 0) ;; cookie number + (mboxes (make-hash-table)) + (avail-cmboxes '()) ;; list of ( . ) for re-use + ) + +;; struct for keeping track of others we are talking to +;; +(defstruct pdat + (host-port #f) + (conns '()) ;; list of pcon structs, pop one off when calling the peer + ) + +;; struct for peer connections, keep track of expiration etc. +;; +(defstruct pcon + (inp #f) + (oup #f) + (exp (+ (current-seconds) 59)) ;; expires at this time, set to (+ (current-seconds) 59) + (lifetime (+ (current-seconds) 600)) ;; throw away and create new after five minutes + ) + +;;====================================================================== +;; listener +;;====================================================================== +;; create a tcp listener and return a populated udat struct with +;; my port, address, hostname, pid etc. +;; return #f if fail to find a port to allocate. +;; +;; if udata-in is #f create the record +;; if there is already a serv-listener return the udata +;; +(define (setup-listener uconn #!optional (port 4242)) + (handle-exceptions + exn + (if (< port 65535) + (setup-listener uconn (+ port 1)) + #f) + (connect-listener uconn port))) + +(define (connect-listener uconn port) + ;; (tcp-listener-socket LISTENER)(socket-name so) + ;; sockaddr-address, sockaddr-port, sockaddr->string + (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]]) + (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname))) + (udat-port-set! uconn port) + (udat-host-port-set! uconn (conc addr":"port)) + (udat-socket-set! uconn tlsn) + uconn)) + +;;====================================================================== +;; peers and connections +;;====================================================================== + +;; send structured data to recipient +;; +;; NOTE: qrykey is what was called the "cookie" previously +;; +;; retval tells send to expect and wait for return data (one line) and return it or time out +;; this is for ping where we don't want to necessarily have set up our own server yet. +;; +;; NOTE: see below for beginnings of code to allow re-use of tcp connections +;; - I believe (without substantial evidence) that re-using connections will +;; be beneficial ... +;; +(define (send udata host-port qrykey cmd params) + (let* ((my-host-port (udat-host-port udata)) ;; remote will return to this + (isme #f #;(equal? host-port my-host-port)) ;; calling myself? + ;; dat is a self-contained work block that can be sent or handled locally + (dat (list my-host-port qrykey cmd params)) + ) + (if isme + (ulex-handler udata dat) ;; no transmission needed + (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC? + exn + #f + (let-values (((inp oup)(tcp-connect host-port))) + (let ((res (if (and inp oup) + (begin + (write dat oup) + (read inp)) ;; yes, we always want an ack + (begin + (print "ERROR: send called but no receiver has been setup. Please call setup first!") + #f)))) + (close-input-port inp) + (close-output-port oup) + res)))))) ;; res will always be 'ack + +;; send a request to the given host-port and register a mailbox in udata +;; wait for the mailbox data and return it +;; +(define (send-receive uconn host-port cmd data) + (let* ((cmbox (get-cmbox uconn)) ;; would it be better to keep a stack of mboxes to reuse? + (qrykey (car cmbox)) + (mbox (cdr cmbox)) + (mbox-time (current-milliseconds))) + (if (eq? (send uconn host-port qrykey cmd data) 'ack) + (let* ((mbox-timeout-secs 120) ;; timeout) + (mbox-timeout-result 'MBOX_TIMEOUT) + (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result)) + (mbox-receive-time (current-milliseconds))) + (if (eq? res 'MBOX_TIMEOUT) + #f ;; convert to raising exception? + res)) + (begin + (print "ERROR: Communication failed?") + #f)))) ;; #f means failed to communicate + +;;====================================================================== +;; responder side +;;====================================================================== + +;; take a request, rdata, and if not immediate put it in the work queue +;; +;; Reserved cmds; ack ping goodbye response +;; +(define (ulex-handler uconn rdata) + (match rdata ;; (string-split controldat) + ((rem-host-port qrykey cmd params) + ;; (print "ulex-handler got: "rem-host-port" qrykey: "qrykey" cmd: "cmd" params: "params) + (let ((mbox (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) + (case cmd + ;; ((ack )(print "Got ack! But why? Should NOT get here.") 'ack) + ((ping) + ;; (print "Got Ping!") + (add-to-work-queue uconn rdata) + 'ack) + ((goodbye) + ;; just clear out references to the caller + (add-to-work-queue uconn rdata) + 'ack) + ((response) ;; this is a result from remote processing, send it as mail ... + (if mbox + (begin + (mailbox-send! mbox params) ;; params here is our result + 'ack) + (begin + (print "ERROR: received result but no associated mbox for cookie "qrykey) + #f))) + (else + ;; (print "Got generic request: "cmd) + (add-to-work-queue uconn rdata) + 'ack)))) + (else + (print "BAD DATA? controldat=" rdata) + 'ack) ;; send ack anyway? + )) + +;; given an already set up uconn start the cmd-loop +;; +(define (ulex-cmd-loop uconn) + (let* ((serv-listener (udat-socket uconn))) + (let loop ((state 'start)) + (let-values (((inp oup)(tcp-accept serv-listener))) + (let* ((rdat (read inp)) + (resp (ulex-handler uconn rdat))) + (if resp (write resp oup)) + (close-input-port inp) + (close-output-port oup)) + (loop state))))) + +;; add a proc to the cmd list, these are done symetrically (i.e. in all instances) +;; so that the proc can be dereferenced remotely +;; +(define (set-work-handler uconn proc) + (udat-work-proc-set! uconn proc)) + +;; run-listener does all the work of starting a listener in a thread +;; it then returns control +;; +(define (run-listener handler-proc) + (let* ((uconn (make-udat))) + (udat-work-proc-set! uconn handler-proc) + (if (setup-listener uconn) + (let* ((th1 (make-thread (lambda ()(ulex-cmd-loop uconn)) "Ulex command loop")) + (th2 (make-thread (lambda ()(process-work-queue uconn)) "Ulex work queue processor"))) + (thread-start! th1) + (thread-start! th2) + (print "cmd loop and process workers started") + uconn) + (begin + (print "ERROR: run-listener called without proper setup.") + (exit))))) + +;;====================================================================== +;; work queues - this is all happening on the listener side +;;====================================================================== + +;; rdata is (rem-host-port qrykey cmd params) + +(define (add-to-work-queue uconn rdata) + (queue-add! (udat-work-queue uconn) rdata)) + +(define (do-work uconn rdata) + (let* ((proc (udat-work-proc uconn))) ;; get it each time - conceivebly it could change + ;; put this following into a do-work procedure + (match rdata + ((rem-host-port qrykey cmd params) + (let* ((result (proc rem-host-port qrykey cmd params))) + ;; send 'response as cmd and result as params + (send uconn rem-host-port qrykey 'response result))) ;; could check for ack + (else + (print "ERROR: rdata "rdata", did not match rem-host-port qrykey cmd params"))))) + + +(define (process-work-queue uconn) + (let ((wqueue (udat-work-queue uconn)) + (proc (udat-work-proc uconn))) + (let loop () + (if (queue-empty? wqueue) + (thread-sleep! 0.1) + (let ((rdata (queue-remove! wqueue))) + (do-work uconn rdata))) + (loop)))) + +;; below was to enable re-use of connections. This seems non-trivial so for +;; now lets open on each call +;; +;; ;; given host-port get or create peer struct +;; ;; +;; (define (udat-get-peer uconn host-port) +;; (or (hash-table-ref/default (udat-peers uconn) host-port #f) +;; ;; no peer, so create pdat and init it +;; +;; ;; NEED stack of connections, pop and use; inp, oup, +;; ;; creation_time (remove and create new if over 24hrs old +;; ;; +;; (let ((pdat (make-pdat host-port: host-port))) +;; (hash-table-set! (udat-peers uconn) host-port pdat) +;; pdat))) +;; +;; ;; is pcon alive +;; +;; ;; given host-port and pdat get a pcon +;; ;; +;; (define (pdat-get-pcon pdat host-port) +;; (let loop ((conns (pdat-conns pdat))) +;; (if (null? conns) ;; none? make and return - do NOT add - it will be pushed back on list later +;; (init-pcon (make-pcon)) +;; (let* ((conn (pop conns))) +;; +;; ;; given host-port get a pcon struct +;; ;; +;; (define (udat-get-pcon + +;;====================================================================== +;; misc utils +;;====================================================================== + +(define (make-cookie uconn) + (let ((newcnum (+ (udat-cnum uconn) 1))) + (udat-cnum-set! uconn newcnum) + (conc (udat-host-port uconn) ":" + newcnum))) + +;; cookie/mboxes + +;; we store each mbox with a cookie ( . ) +;; +(define (get-cmbox uconn) + (if (null? (udat-avail-cmboxes uconn)) + (let ((cookie (make-cookie uconn)) + (mbox (make-mailbox))) + (hash-table-set! (udat-mboxes uconn) cookie mbox) + `(,cookie . ,mbox)) + (let ((cmbox (car (udat-avail-cmboxes uconn)))) + (udat-avail-cmboxes-set! uconn (cdr (udat-avail-cmboxes uconn))) + cmbox))) + +(define (put-cmbox uconn cmbox) + (udat-avail-cmboxes-set! uconn (cons cmbox (udat-avail-cmboxes uconn)))) + +(define (pp-uconn uconn) + (pp (udat->alist uconn))) + + +;;====================================================================== +;; network utilities +;;====================================================================== + +;; NOTE: Look at address-info egg as alternative to some of this (define (rate-ip ipaddr) (regex-case ipaddr ( "^127\\..*" _ 0 ) ( "^(10\\.0|192\\.168)\\..*" _ 1 ) @@ -354,1899 +354,70 @@ ;; Change this to bias for addresses with a reasonable broadcast value? ;; (define (ip-pref-less? a b) (> (rate-ip a) (rate-ip b))) - (define (get-my-best-address) - (let ((all-my-addresses (get-all-ips)) - ;;(all-my-addresses-old (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name))))) - ) + (let ((all-my-addresses (get-all-ips))) (cond ((null? all-my-addresses) (get-host-name)) ;; no interfaces? ((eq? (length all-my-addresses) 1) (car all-my-addresses)) ;; only one to choose from, just go with it - (else - (car (sort all-my-addresses ip-pref-less?))) - ;; (else - ;; (ip->string (car (filter (lambda (x) ;; take any but 127. - ;; (not (eq? (u8vector-ref x 0) 127))) - ;; all-my-addresses)))) - - ))) + (car (sort all-my-addresses ip-pref-less?)))))) (define (get-all-ips-sorted) (sort (get-all-ips) ip-pref-less?)) (define (get-all-ips) - (map ip->string (vector->list - (hostinfo-addresses - (host-information (current-hostname)))))) - -(define (udat-my-host-port udata) - (if (and (udat-my-address udata)(udat-my-port udata)) - (conc (udat-my-address udata) ":" (udat-my-port udata)) - #f)) - -(define (udat-captain-host-port udata) - (if (and (udat-captain-address udata)(udat-captain-port udata)) - (conc (udat-captain-address udata) ":" (udat-captain-port udata)) - #f)) - -(define (udat-get-peer udata host-port) - (hash-table-ref/default (udat-peers udata) host-port #f)) - -;; struct for keeping track of others we are talking to - -(defstruct peer - (addr-port #f) - (hostname #f) - (pid #f) - ;; (inp #f) - ;; (oup #f) - (dbs '()) ;; list of databases this peer is currently handling - ) - -(defstruct work - (peer-dat #f) - (handlerkey #f) - (qrykey #f) - (data #f) - (start (current-milliseconds))) - -#;(defstruct dbowner - (pdat #f) - (last-update (current-seconds))) - -;;====================================================================== -;; Captain functions -;;====================================================================== - -;; NB// This needs to be started in a thread -;; -;; setup to be a captain -;; - local server MUST be started already -;; - create pkt -;; - start server port handler -;; -(define (setup-as-captain udata) - (if (create-captain-pkt udata) - (let* ((my-addr (udat-my-address udata)) - (my-port (udat-my-port udata)) - (th (make-thread (lambda () - (ulex-handler-loop udata)) "Captain handler"))) - (udat-handler-thread-set! udata th) - (udat-captain-address-set! udata my-addr) - (udat-captain-port-set! udata my-port) - (thread-start! th)) - (begin - (print "ERROR: failed to create captain pkt") - #f))) - -;; given a pkts dir read -;; -(define (get-all-captain-pkts udata) - (let* ((pktsdir (let ((d (udat-cpkts-dir udata))) - (if (file-exists? d) - d - (begin - (create-directory d #t) - d)))) - (all-pkt-files (glob (conc pktsdir "/*.pkt"))) - (pkt-spec (udat-cpkt-spec udata))) - (map (lambda (pkt-file) - (read-pkt->alist pkt-file pktspec: pkt-spec)) - all-pkt-files))) - -;; sort by D then Z, return one, choose the oldest then -;; differentiate if needed using the Z key -;;l -(define (get-winning-pkt pkts) - (if (null? pkts) - #f - (car (sort pkts (lambda (a b) - (let ((ad (string->number (alist-ref 'D a))) - (bd (string->number (alist-ref 'D b)))) - (if (eq? a b) - (let ((az (alist-ref 'Z a)) - (bz (alist-ref 'Z b))) - (string>=? az bz)) - (> ad bd)))))))) - -;; put the host, ip, port and pid into a pkt in -;; the captain pkts dir -;; - assumes user has already fired up a server -;; which will be in the udata struct -;; -(define (create-captain-pkt udata) - (if (not (udat-serv-listener udata)) - (begin - (print "ERROR: create-captain-pkt called with out a listener") - #f) - (let* ((pktdat `((port . ,(udat-my-port udata)) - (host . ,(udat-my-hostname udata)) - (ipaddr . ,(udat-my-address udata)) - (pid . ,(udat-my-pid udata)))) - (pktdir (udat-cpkts-dir udata)) - (pktspec (udat-cpkt-spec udata)) - ) - (udat-my-cpkt-key-set! - udata - (write-alist->pkt - pktdir - pktdat - pktspec: pktspec - ptype: 'captain)) - (udat-my-cpkt-key udata)))) - -;; remove pkt associated with captn (the Z key .pkt) -;; -(define (remove-captain-pkt udata captn) - (let ((Z (alist-ref 'Z captn)) - (cpktdir (udat-cpkts-dir udata))) - (delete-file* (conc cpktdir "/" Z ".pkt")))) - -;; call all known peers and tell them to delete their info on the captain -;; thus forcing them to re-read pkts and connect to a new captain -;; call this when the captain needs to exit and if an older captain is -;; detected. Due to delays in sending file meta data in NFS multiple -;; captains can be initiated in a "Storm of Captains", book soon to be -;; on Amazon -;; -(define (drop-captain udata) - (let* ((peers (hash-table-keys (udat-peers udata))) - (cookie (make-cookie udata))) - (for-each - (lambda (host-port) - (send udata host-port 'dropcaptain cookie "nomsg" retval: #t)) - peers))) - -;;====================================================================== -;; server primitives -;;====================================================================== - -(define (make-cookie udata) - (let ((newcnum (+ (udat-cnum udata) 1))) - (udat-cnum-set! udata newcnum) - (conc (udat-my-address udata) ":" - (udat-my-port udata) "-" - (udat-my-pid udata) "-" - newcnum))) - -;; create a tcp listener and return a populated udat struct with -;; my port, address, hostname, pid etc. -;; return #f if fail to find a port to allocate. -;; -;; if udata-in is #f create the record -;; if there is already a serv-listener return the udata -;; -(define (start-server-find-port udata-in #!optional (port 4242)) - (let ((udata (or udata-in (make-udat)))) - (if (udat-serv-listener udata) ;; TODO - add check that the listener is alive and ready? - udata - (handle-exceptions - exn - (if (< port 65535) - (start-server-find-port udata (+ port 1)) - #f) - (connect-server udata port))))) - -(define (connect-server udata port) - ;; (tcp-listener-socket LISTENER)(socket-name so) - ;; sockaddr-address, sockaddr-port, sockaddr->string - (let* ((tlsn (tcp-listen port 1000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]]) - (addr (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname))) - (udat-my-address-set! udata addr) - (udat-my-port-set! udata port) - (udat-my-hostname-set! udata (get-host-name)) - (udat-serv-listener-set! udata tlsn) - udata)) - -(define (get-peer-dat udata host-port #!optional (hostname #f)(pid #f)) - (let* ((pdat (or (udat-get-peer udata host-port) - (handle-exceptions ;; ERROR - MAKE THIS EXCEPTION HANDLER MORE SPECIFIC - exn - #f - (let ((npdat (make-peer addr-port: host-port))) - (if hostname (peer-hostname-set! npdat hostname)) - (if pid (peer-pid-set! npdat pid)) - npdat))))) - pdat)) - -;; send structured data to recipient -;; -;; NOTE: qrykey is what was called the "cookie" previously -;; -;; retval tells send to expect and wait for return data (one line) and return it or time out -;; this is for ping where we don't want to necessarily have set up our own server yet. -;; -(define (send udata host-port handler qrykey data - #!key (hostname #f)(pid #f)(params '())(retval #f)) - (let* ((my-host-port (udat-my-host-port udata)) - (isme (equal? host-port my-host-port)) ;; am I calling - ;; myself? - (dat (list - handler ;; " " - my-host-port ;; " " - (udat-my-pid udata) ;; " " - qrykey - params ;;(if (null? params) "" (conc " " - ;;(string-intersperse params " "))) - ))) - ;; (print "send isme is " (if isme "true!" "false!") ", - ;; my-host-port: " my-host-port ", host-port: " host-port) - (if isme - (ulex-handler udata dat data) - (handle-exceptions ;; ERROR - MAKE THIS EXCEPTION HANDLER MORE - ;; SPECIFIC - exn - #f - (let-values (((inp oup)(tcp-connect host-port))) - ;; - ;; CONTROL LINE: - ;; handlerkey host:port pid qrykey params ... - ;; - (let ((res - (if (and inp oup) - (let* () - (if my-host-port - (begin - (write dat oup) - (write data oup) ;; send as sexpr - ;; (print "Sent dat: " dat " data: " data) - (if retval - (read inp) - #t)) - (begin - (print "ERROR: send called but no receiver has been setup. Please call setup first!") - #f)) - ;; NOTE: DO NOT BE TEMPTED TO LOOK AT ANY DATA ON INP HERE! - ;; (there is a listener for handling that) - ) - #f))) ;; #f means failed to connect and send - (close-input-port inp) - (close-output-port oup) - res)))))) - -;; send a request to the given host-port and register a mailbox in udata -;; wait for the mailbox data and return it -;; -(define (send-receive udata host-port handler qrykey data #!key (hostname #f)(pid #f)(params '())(timeout 20)) - (let ((mbox (make-mailbox)) - (mbox-time (current-milliseconds)) - (mboxes (udat-mboxes udata))) - (hash-table-set! mboxes qrykey mbox) - (if (send udata host-port handler qrykey data hostname: hostname pid: pid params: params) - (let* ((mbox-timeout-secs timeout) - (mbox-timeout-result 'MBOX_TIMEOUT) - (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result)) - (mbox-receive-time (current-milliseconds))) - (hash-table-delete! mboxes qrykey) - (if (eq? res 'MBOX_TIMEOUT) - #f - res)) - #f))) ;; #f means failed to communicate - -;; -(define (ulex-handler udata controldat data) - (print "controldat: " controldat " data: " data) - (match controldat ;; (string-split controldat) - ((handlerkey host-port pid qrykey params ...) - ;; (print "handlerkey: " handlerkey " host-port: " host-port " pid: " pid " qrykey: " qrykey " params: " params) - (case handlerkey ;; (string->symbol handlerkey) - ((ack)(print "Got ack!")) - ((ping) ;; special case - return result immediately on the same connection - (let* ((proc (hash-table-ref/default (udat-handlers udata) 'ping #f)) - (val (if proc (proc) "gotping")) - (peer (make-peer addr-port: host-port pid: pid)) - (dbshash (udat-dbowners udata))) - (peer-dbs-set! peer params) ;; params for ping is list of dbs owned by pinger - (for-each (lambda (dbfile) - (hash-table-set! dbshash dbfile host-port)) ;; WRONG? - params) ;; register each db in the dbshash - (if (not (hash-table-exists? (udat-peers udata) host-port)) - (hash-table-set! (udat-peers udata) host-port peer)) ;; save the details of this caller in peers - qrykey)) ;; End of ping - ((goodbye) - ;; remove all traces of the caller in db ownership etc. - (let* ((peer (hash-table-ref/default (udat-peers udata) host-port #f)) - (dbs (if peer (peer-dbs peer) '())) - (dbshash (udat-dbowners udata))) - (for-each (lambda (dbfile)(hash-table-delete! dbshash dbfile)) dbs) - (hash-table-delete! (udat-peers udata) host-port) - qrykey)) - ((dropcaptain) - ;; remove all traces of the captain - (udat-captain-address-set! udata #f) - (udat-captain-host-set! udata #f) - (udat-captain-port-set! udata #f) - (udat-captain-pid-set! udata #f) - qrykey) - ((rucaptain) ;; remote is asking if I'm the captain - (if (udat-my-cpkt-key udata) "yes" "no")) - ((db-owner) ;; given a db name who do I send my queries to - ;; look up the file in handlers, if have an entry ping them to be sure - ;; they are still alive and then return that host:port. - ;; if no handler found or if the ping fails pick from peers the oldest that - ;; is managing the fewest dbs - (match params - ((dbfile dbtype) - (let* ((owner-host-port (hash-table-ref/default (udat-dbowners udata) dbfile #f))) - (if owner-host-port - (conc qrykey " " owner-host-port) - (let* ((pdat (or (hash-table-ref/default (udat-peers udata) host-port #f) ;; no owner - caller gets to own it! - (make-peer addr-port: host-port pid: pid dbs: `(,dbfile))))) - (hash-table-set! (udat-peers udata) host-port pdat) - (hash-table-set! (udat-dbowners udata) dbfile host-port) - (conc qrykey " " host-port))))) - (else (conc qrykey " BADDATA")))) - ;; for work items: - ;; handler is one of; immediate, read-only, read-write, high-priority - ((immediate read-only normal low-priority) ;; do this work immediately - ;; host-port (caller), pid (caller), qrykey (cookie), params <= all from first line - ;; data => a single line encoded however you want, or should I build json into it? - (print "handlerkey=" handlerkey) - (let* ((pdat (get-peer-dat udata host-port))) - (match params ;; dbfile prockey procparam - ((dbfile prockey procparam) - (case handlerkey - ((immediate read-only) - (process-request udata pdat dbfile qrykey prockey procparam data)) - ((normal low-priority) ;; split off later and add logic to support low priority - (add-to-work-queue udata pdat dbfile qrykey prockey procparam data)) - (else - #f))) - (else - (print "INFO: params=" params " handlerkey=" handlerkey " controldat=" controldat) - #f)))) - (else - ;; (add-to-work-queue udata (get-peer-dat udata host-port) handlerkey qrykey data) - #f))) - (else - (print "BAD DATA? controldat=" controldat " data=" data) - #f)));; handles the incoming messages and dispatches to queues - -;; -(define (ulex-handler-loop udata) - (let* ((serv-listener (udat-serv-listener udata))) - ;; data comes as two lines - ;; handlerkey resp-addr:resp-port hostname pid qrykey [dbpath/dbfile.db] - ;; data - (let loop ((state 'start)) - (let-values (((inp oup)(tcp-accept serv-listener))) - (let* ((controldat (read inp)) - (data (read inp)) - (resp (ulex-handler udata controldat data))) - (if resp (write resp oup)) - (close-input-port inp) - (close-output-port oup)) - (loop state))))) - -;; add a proc to the handler list, these are done symetrically (i.e. in all instances) -;; so that the proc can be dereferenced remotely -;; -(define (register-handler udata key proc) - (hash-table-set! (udat-handlers udata) key proc)) - - -;;====================================================================== -;; work queues -;;====================================================================== - -(define (add-to-work-queue udata peer-dat handlerkey qrykey data) - (let ((wdat (make-work peer-dat: peer-dat handlerkey: handlerkey qrykey: qrykey data: data))) - (if (udat-busy udata) - (queue-add! (udat-work-queue udata) wdat) - (process-work udata wdat)) ;; passing in wdat tells process-work to first process the passed in wdat - )) - -(define (do-work udata wdat) - #f) - -(define (process-work udata #!optional wdat) - (if wdat (do-work udata wdat)) ;; process wdat - (let ((wqueue (udat-work-queue udata))) - (if (not (queue-empty? wqueue)) - (let loop ((wd (queue-remove! wqueue))) - (do-work udata wd) - (if (not (queue-empty? wqueue)) - (loop (queue-remove! wqueue))))))) - -;;====================================================================== -;; Generic db handling -;; setup a inmem db instance -;; open connection to on-disk db -;; sync on-disk db to inmem -;; get lock in on-disk db for dbowner of this db -;; put sync-proc, init-proc, on-disk handle, inmem handle in dbconn stuct -;; return the stuct -;;====================================================================== - -(defstruct dbconn - (fname #f) - (inmem #f) - (conn #f) - (sync #f) ;; sync proc - (init #f) ;; init proc - (lastsync (current-seconds)) - ) - -(defstruct dbinfo - (initproc #f) - (syncproc #f)) - -;; open inmem and disk database -;; init with initproc -;; return db struct -;; -;; appname; megatest, ulex or something else. -;; -(define (setup-db-connection udata fname-in appname dbtype) - (let* ((is-ulex (eq? appname 'ulex)) - (dbinf (if is-ulex ;; ulex is a built-in special case - (make-dbinfo initproc: ulexdb-init syncproc: ulexdb-sync) - (hash-table-ref/default (udat-dbtypes udata) dbtype #f))) - (initproc (dbinfo-initproc dbinf)) - (syncproc (dbinfo-syncproc dbinf)) - (fname (if is-ulex - (conc (udat-ulex-dir udata) "/ulex.db") - fname-in)) - (inmem-db (open-and-initdb udata #f 'inmem (dbinfo-initproc dbinf))) - (disk-db (open-and-initdb udata fname 'disk (dbinfo-initproc dbinf)))) - (make-dbconn inmem: inmem-db conn: disk-db sync: syncproc init: initproc))) - -;; dest='inmem or 'disk -;; -(define (open-and-initdb udata filename dest init-proc) - (let* ((inmem (eq? dest 'inmem)) - (dbfile (if inmem - ":INMEM:" - filename)) - (dbexists (if inmem #t (file-exists? dbfile))) - (db (sqlite3:open-database dbfile))) - (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) - (if (not dbexists) - (init-proc db)) - db)) - - -;;====================================================================== -;; Previous Ulex db stuff -;;====================================================================== - -(define (ulexdb-init db inmem) - (sqlite3:with-transaction - db - (lambda () - (for-each - (lambda (stmt) - (if stmt (sqlite3:execute db stmt))) - `("CREATE TABLE IF NOT EXISTS processes - (id INTEGER PRIMARY KEY, - host TEXT NOT NULL, - ipadr TEXT NOT NULL, - port INTEGER NOT NULL, - pid INTEGER NOT NULL, - regtime INTEGER DEFAULT (strftime('%s','now')), - last_update INTEGER DEFAULT (strftime('%s','now')));" - (if inmem - "CREATE TRIGGER IF NOT EXISTS update_proces_trigger AFTER UPDATE ON processes - FOR EACH ROW - BEGIN - UPDATE processes SET last_update=(strftime('%s','now')) - WHERE id=old.id; - END;" - #f)))))) - -;; open databases, do initial sync -(define (ulexdb-sync dbconndat udata) - #f) - - -) ;; END OF ULEX - - -;;; ;;====================================================================== -;;; ;; D E B U G H E L P E R S -;;; ;;====================================================================== -;;; -;;; (define (dbg> . args) -;;; (with-output-to-port (current-error-port) -;;; (lambda () -;;; (apply print "dbg> " args)))) -;;; -;;; (define (debug-pp . args) -;;; (if (get-environment-variable "ULEX_DEBUG") -;;; (with-output-to-port (current-error-port) -;;; (lambda () -;;; (apply pp args))))) -;;; -;;; (define *default-debug-port* (current-error-port)) -;;; -;;; (define (sdbg> fn stage-name stage-start stage-end start-time . message) -;;; (if (get-environment-variable "ULEX_DEBUG") -;;; (with-output-to-port *default-debug-port* -;;; (lambda () -;;; (apply print "ulex:" fn " " stage-name " took " (- (if stage-end stage-end (current-milliseconds)) stage-start) " ms. " -;;; (if start-time -;;; (conc "total time " (- (current-milliseconds) start-time) -;;; " ms.") -;;; "") -;;; message -;;; ))))) - -;;====================================================================== -;; M A C R O S -;;====================================================================== -;; iup callbacks are not dumping the stack, this is a work-around -;; - -;; Some of these routines use: -;; -;; http://www.cs.toronto.edu/~gfb/scheme/simple-macros.html -;; -;; Syntax for defining macros in a simple style similar to function definiton, -;; when there is a single pattern for the argument list and there are no keywords. -;; -;; (define-simple-syntax (name arg ...) body ...) -;; -;; -;; (define-syntax define-simple-syntax -;; (syntax-rules () -;; ((_ (name arg ...) body ...) -;; (define-syntax name (syntax-rules () ((name arg ...) (begin body ...))))))) -;; -;; (define-simple-syntax (catch-and-dump proc procname) -;; (handle-exceptions -;; exn -;; (begin -;; (print-call-chain (current-error-port)) -;; (with-output-to-port (current-error-port) -;; (lambda () -;; (print ((condition-property-accessor 'exn 'message) exn)) -;; (print "Callback error in " procname) -;; (print "Full condition info:\n" (condition->list exn))))) -;; (proc))) -;; -;; -;;====================================================================== -;; R E C O R D S -;;====================================================================== - -;;; ;; information about me as a server -;;; ;; -;;; (defstruct area -;;; ;; about this area -;;; (useportlogger #f) -;;; (lowport 32768) -;;; (server-type 'auto) ;; auto=create up to five servers/pkts, main=create pkts, passive=no pkt (unless there are no pkts at all) -;;; (conn #f) -;;; (port #f) -;;; (myaddr (get-my-best-address)) -;;; pktid ;; get pkt from hosts table if needed -;;; pktfile -;;; pktsdir -;;; dbdir -;;; (dbhandles (make-hash-table)) ;; fname => list-of-dbh, NOTE: Should really never need more than one? -;;; (mutex (make-mutex)) -;;; (rtable (make-hash-table)) ;; registration table of available actions -;;; (dbs (make-hash-table)) ;; filename => random number, used for choosing what dbs I serve -;;; ;; about other servers -;;; (hosts (make-hash-table)) ;; key => hostdat -;;; (hoststats (make-hash-table)) ;; key => alist of fname => ( qcount . qtime ) -;;; (reqs (make-hash-table)) ;; uri => queue -;;; ;; work queues -;;; (wqueues (make-hash-table)) ;; fname => qdat -;;; (stats (make-hash-table)) ;; fname => totalqueries -;;; (last-srvup (current-seconds)) ;; last time we updated the known servers -;;; (cookie2mbox (make-hash-table)) ;; map cookie for outstanding request to mailbox of awaiting call -;;; (ready #f) -;;; (health (make-hash-table)) ;; ipaddr:port => num failed pings since last good ping -;;; ) -;;; -;;; ;; host stats -;;; ;; -;;; (defstruct hostdat -;;; (pkt #f) -;;; (dbload (make-hash-table)) ;; "dbfile.db" => queries/min -;;; (hostload #f) ;; normalized load ( 5min load / numcpus ) -;;; ) -;;; -;;; ;; dbdat -;;; ;; -;;; (defstruct dbdat -;;; (dbh #f) -;;; (fname #f) -;;; (write-access #f) -;;; (sths (make-hash-table)) ;; hash mapping query strings to handles -;;; ) -;;; -;;; ;; qdat -;;; ;; -;;; (defstruct qdat -;;; (writeq (make-queue)) -;;; (readq (make-queue)) -;;; (rwq (make-queue)) -;;; (logq (make-queue)) ;; do we need a queue for logging? yes, if we use sqlite3 db for logging -;;; (osshort (make-queue)) -;;; (oslong (make-queue)) -;;; (misc (make-queue)) ;; used for things like ping-full -;;; ) -;;; -;;; ;; calldat -;;; ;; -;;; (defstruct calldat -;;; (ctype 'dbwrite) -;;; (obj #f) ;; this would normally be an SQL statement e.g. SELECT, INSERT etc. -;;; (rtime (current-milliseconds))) -;;; -;;; ;; make it a global? Well, it is local to area module -;;; -;;; (define *pktspec* -;;; `((server (hostname . h) -;;; (port . p) -;;; (pid . i) -;;; (ipaddr . a) -;;; ) -;;; (data (hostname . h) ;; sender hostname -;;; (port . p) ;; sender port -;;; (ipaddr . a) ;; sender ip -;;; (hostkey . k) ;; sending host key - store info at server under this key -;;; (servkey . s) ;; server key - this needs to match at server end or reject the msg -;;; (format . f) ;; sb=serialized-base64, t=text, sx=sexpr, j=json -;;; (data . d) ;; base64 encoded slln data -;;; ))) -;;; -;;; ;; work item -;;; ;; -;;; (defstruct witem -;;; (rhost #f) ;; return host -;;; (ripaddr #f) ;; return ipaddr -;;; (rport #f) ;; return port -;;; (servkey #f) ;; the packet representing the client of this workitem, used by final send-message -;;; (rdat #f) ;; the request - usually an sql query, type is rdat -;;; (action #f) ;; the action: immediate, dbwrite, dbread,oslong, osshort -;;; (cookie #f) ;; cookie id for response -;;; (data #f) ;; the data payload, i.e. parameters -;;; (result #f) ;; the result from processing the data -;;; (caller #f)) ;; the calling peer according to rpc itself -;;; -;;; (define (trim-pktid pktid) -;;; (if (string? pktid) -;;; (substring pktid 0 4) -;;; "nopkt")) -;;; -;;; (define (any->number num) -;;; (cond -;;; ((number? num) num) -;;; ((string? num) (string->number num)) -;;; (else num))) -;;; -;;; (use trace) -;;; (trace-call-sites #t) -;;; -;;; ;;====================================================================== -;;; ;; D A T A B A S E H A N D L I N G -;;; ;;====================================================================== -;;; -;;; ;; look in dbhandles for a db, return it, else return #f -;;; ;; -;;; (define (get-dbh acfg fname) -;;; (let ((dbh-lst (hash-table-ref/default (area-dbhandles acfg) fname '()))) -;;; (if (null? dbh-lst) -;;; (begin -;;; ;; (print "opening db for " fname) -;;; (open-db acfg fname)) ;; Note that the handles get put back in the queue in the save-dbh calls -;;; (let ((rem-lst (cdr dbh-lst))) -;;; ;; (print "re-using saved connection for " fname) -;;; (hash-table-set! (area-dbhandles acfg) fname rem-lst) -;;; (car dbh-lst))))) -;;; -;;; (define (save-dbh acfg fname dbdat) -;;; ;; (print "saving dbh for " fname) -;;; (hash-table-set! (area-dbhandles acfg) fname (cons dbdat (hash-table-ref/default (area-dbhandles acfg) fname '())))) -;;; -;;; ;; open the database, if never before opened init it. put the handle in the -;;; ;; open db's hash table -;;; ;; returns: the dbdat -;;; ;; -;;; (define (open-db acfg fname) -;;; (let* ((fullname (conc (area-dbdir acfg) "/" fname)) -;;; (exists (file-exists? fullname)) -;;; (write-access (if exists -;;; (file-write-access? fullname) -;;; (file-write-access? (area-dbdir acfg)))) -;;; (db (sqlite3:open-database fullname)) -;;; (handler (sqlite3:make-busy-timeout 136000)) -;;; ) -;;; (sqlite3:set-busy-handler! db handler) -;;; (sqlite3:execute db "PRAGMA synchronous = 0;") -;;; (if (not exists) ;; need to init the db -;;; (if write-access -;;; (let ((isql (get-rsql acfg 'dbinitsql))) ;; get the init sql statements -;;; ;; (sqlite3:with-transaction -;;; ;; db -;;; ;; (lambda () -;;; (if isql -;;; (for-each -;;; (lambda (sql) -;;; (sqlite3:execute db sql)) -;;; isql))) -;;; (print "ERROR: no write access to " (area-dbdir acfg)))) -;;; (make-dbdat dbh: db fname: fname write-access: write-access))) -;;; -;;; ;; This is a low-level command to retrieve or to prepare, save and return a prepared statment -;;; ;; you must extract the db handle -;;; ;; -;;; (define (get-sth db cache stmt) -;;; (if (hash-table-exists? cache stmt) -;;; (begin -;;; ;; (print "Reusing cached stmt for " stmt) -;;; (hash-table-ref/default cache stmt #f)) -;;; (let ((sth (sqlite3:prepare db stmt))) -;;; (hash-table-set! cache stmt sth) -;;; ;; (print "prepared stmt for " stmt) -;;; sth))) -;;; -;;; ;; a little more expensive but does all the tedious deferencing - only use if you don't already -;;; ;; have dbdat and db sitting around -;;; ;; -;;; (define (full-get-sth acfg fname stmt) -;;; (let* ((dbdat (get-dbh acfg fname)) -;;; (db (dbdat-dbh dbdat)) -;;; (sths (dbdat-sths dbdat))) -;;; (get-sth db sths stmt))) -;;; -;;; ;; write to a db -;;; ;; acfg: area data -;;; ;; rdat: request data -;;; ;; hdat: (host . port) -;;; ;; -;;; ;; (define (dbwrite acfg rdat hdat data-in) -;;; ;; (let* ((dbname (car data-in)) -;;; ;; (dbdat (get-dbh acfg dbname)) -;;; ;; (db (dbdat-dbh dbdat)) -;;; ;; (sths (dbdat-sths dbdat)) -;;; ;; (stmt (calldat-obj rdat)) -;;; ;; (sth (get-sth db sths stmt)) -;;; ;; (data (cdr data-in))) -;;; ;; (print "dbname: " dbname " acfg: " acfg " rdat: " (calldat->alist rdat) " hdat: " hdat " data: " data) -;;; ;; (print "dbdat: " (dbdat->alist dbdat)) -;;; ;; (apply sqlite3:execute sth data) -;;; ;; (save-dbh acfg dbname dbdat) -;;; ;; #t -;;; ;; )) -;;; -;;; (define (finalize-all-db-handles acfg) -;;; (let* ((dbhandles (area-dbhandles acfg)) ;; dbhandles is hash of fname ==> dbdat -;;; (num 0)) -;;; (for-each -;;; (lambda (area-name) -;;; (print "Closing handles for " area-name) -;;; (let ((dbdats (hash-table-ref/default dbhandles area-name '()))) -;;; (for-each -;;; (lambda (dbdat) -;;; ;; first close all statement handles -;;; (for-each -;;; (lambda (sth) -;;; (sqlite3:finalize! sth) -;;; (set! num (+ num 1))) -;;; (hash-table-values (dbdat-sths dbdat))) -;;; ;; now close the dbh -;;; (set! num (+ num 1)) -;;; (sqlite3:finalize! (dbdat-dbh dbdat))) -;;; dbdats))) -;;; (hash-table-keys dbhandles)) -;;; (print "FINALIZED " num " dbhandles"))) -;;; -;;; ;;====================================================================== -;;; ;; W O R K Q U E U E H A N D L I N G -;;; ;;====================================================================== -;;; -;;; (define (register-db-as-mine acfg dbname) -;;; (let ((ht (area-dbs acfg))) -;;; (if (not (hash-table-ref/default ht dbname #f)) -;;; (hash-table-set! ht dbname (random 10000))))) -;;; -;;; (define (work-queue-add acfg fname witem) -;;; (let* ((work-queue-start (current-milliseconds)) -;;; (action (witem-action witem)) ;; NB the action is the index into the rdat actions -;;; (qdat (or (hash-table-ref/default (area-wqueues acfg) fname #f) -;;; (let ((newqdat (make-qdat))) -;;; (hash-table-set! (area-wqueues acfg) fname newqdat) -;;; newqdat))) -;;; (rdat (hash-table-ref/default (area-rtable acfg) action #f))) -;;; (if rdat -;;; (queue-add! -;;; (case (calldat-ctype rdat) -;;; ((dbwrite) (register-db-as-mine acfg fname)(qdat-writeq qdat)) -;;; ((dbread) (register-db-as-mine acfg fname)(qdat-readq qdat)) -;;; ((dbrw) (register-db-as-mine acfg fname)(qdat-rwq qdat)) -;;; ((oslong) (qdat-oslong qdat)) -;;; ((osshort) (qdat-osshort qdat)) -;;; ((full-ping) (qdat-misc qdat)) -;;; (else -;;; (print "ERROR: no queue for " action ". Adding to dbwrite queue.") -;;; (qdat-writeq qdat))) -;;; witem) -;;; (case action -;;; ((full-ping)(qdat-misc qdat)) -;;; (else -;;; (print "ERROR: No action " action " was registered")))) -;;; (sdbg> "work-queue-add" "queue-add" work-queue-start #f #f) -;;; #t)) ;; for now, simply return #t to indicate request got to the queue -;;; -;;; (define (doqueue acfg q fname dbdat dbh) -;;; ;; (print "doqueue: " fname) -;;; (let* ((start-time (current-milliseconds)) -;;; (qlen (queue-length q))) -;;; (if (> qlen 1) -;;; (print "Processing queue of length " qlen)) -;;; (let loop ((count 0) -;;; (responses '())) -;;; (let ((delta (- (current-milliseconds) start-time))) -;;; (if (or (queue-empty? q) -;;; (> delta 400)) ;; stop working on this queue after 400ms have passed -;;; (list count delta responses) ;; return count, delta and responses list -;;; (let* ((witem (queue-remove! q)) -;;; (action (witem-action witem)) -;;; (rdat (witem-rdat witem)) -;;; (stmt (calldat-obj rdat)) -;;; (sth (full-get-sth acfg fname stmt)) -;;; (ctype (calldat-ctype rdat)) -;;; (data (witem-data witem)) -;;; (cookie (witem-cookie witem))) -;;; ;; do the processing and save the result in witem-result -;;; (witem-result-set! -;;; witem -;;; (case ctype ;; action -;;; ((noblockwrite) ;; blind write, no ack of success returned -;;; (apply sqlite3:execute sth data) -;;; (sqlite3:last-insert-rowid dbh)) -;;; ((dbwrite) ;; blocking write -;;; (apply sqlite3:execute sth data) -;;; #t) -;;; ((dbread) ;; TODO: consider breaking this up and shipping in pieces for large query -;;; (apply sqlite3:map-row (lambda x x) sth data)) -;;; ((full-ping) 'full-ping) -;;; (else (print "Not ready for action " action) #f))) -;;; (loop (add1 count) -;;; (if cookie -;;; (cons witem responses) -;;; responses)))))))) -;;; -;;; ;; do up to 400ms of processing on each queue -;;; ;; - the work-queue-processor will allow the max 1200ms of work to complete but it will flag as overloaded -;;; ;; -;;; (define (process-db-queries acfg fname) -;;; (if (hash-table-exists? (area-wqueues acfg) fname) -;;; (let* ((process-db-queries-start-time (current-milliseconds)) -;;; (qdat (hash-table-ref/default (area-wqueues acfg) fname #f)) -;;; (queue-sym->queue (lambda (queue-sym) -;;; (case queue-sym ;; lookup the queue from qdat given a name (symbol) -;;; ((wqueue) (qdat-writeq qdat)) -;;; ((rqueue) (qdat-readq qdat)) -;;; ((rwqueue) (qdat-rwq qdat)) -;;; ((misc) (qdat-misc qdat)) -;;; (else #f)))) -;;; (dbdat (get-dbh acfg fname)) -;;; (dbh (if (dbdat? dbdat)(dbdat-dbh dbdat) #f)) -;;; (nowtime (current-seconds))) -;;; ;; handle the queues that require a transaction -;;; ;; -;;; (map ;; -;;; (lambda (queue-sym) -;;; ;; (print "processing queue " queue-sym) -;;; (let* ((queue (queue-sym->queue queue-sym))) -;;; (if (not (queue-empty? queue)) -;;; (let ((responses -;;; (sqlite3:with-transaction ;; todo - catch exceptions... -;;; dbh -;;; (lambda () -;;; (let* ((res (doqueue acfg queue fname dbdat dbh))) ;; this does the work! -;;; ;; (print "res=" res) -;;; (match res -;;; ((count delta responses) -;;; (update-stats acfg fname queue-sym delta count) -;;; (sdbg> "process-db-queries" "sqlite3-transaction" process-db-queries-start-time #f #f) -;;; responses) ;; return responses -;;; (else -;;; (print "ERROR: bad return data from doqueue " res))) -;;; ))))) -;;; ;; having completed the transaction, send the responses. -;;; ;; (print "INFO: sending " (length responses) " responses.") -;;; (let loop ((responses-left responses)) -;;; (cond -;;; ((null? responses-left) #t) -;;; (else -;;; (let* ((witem (car responses-left)) -;;; (response (cdr responses-left))) -;;; (call-deliver-response acfg (witem-ripaddr witem)(witem-rport witem) -;;; (witem-cookie witem)(witem-result witem))) -;;; (loop (cdr responses-left)))))) -;;; ))) -;;; '(wqueue rwqueue rqueue)) -;;; -;;; ;; handle misc queue -;;; ;; -;;; ;; (print "processing misc queue") -;;; (let ((queue (queue-sym->queue 'misc))) -;;; (doqueue acfg queue fname dbdat dbh)) -;;; ;; .... -;;; (save-dbh acfg fname dbdat) -;;; #t ;; just to let the tests know we got here -;;; ) -;;; #f ;; nothing processed -;;; )) -;;; -;;; ;; run all queues in parallel per db but sequentially per queue for that db. -;;; ;; - process the queues every 500 or so ms -;;; ;; - allow for long running queries to continue but all other activities for that -;;; ;; db will be blocked. -;;; ;; -;;; (define (work-queue-processor acfg) -;;; (let* ((threads (make-hash-table))) ;; fname => thread -;;; (let loop ((fnames (hash-table-keys (area-wqueues acfg))) -;;; (target-time (+ (current-milliseconds) 50))) -;;; ;;(if (not (null? fnames))(print "Processing for these databases: " fnames)) -;;; (for-each -;;; (lambda (fname) -;;; ;; (print "processing for " fname) -;;; ;;(process-db-queries acfg fname)) -;;; (let ((th (hash-table-ref/default threads fname #f))) -;;; (if (and th (not (member (thread-state th) '(dead terminated)))) -;;; (begin -;;; (print "WARNING: worker thread for " fname " is taking a long time.") -;;; (print "Thread is in state " (thread-state th))) -;;; (let ((th1 (make-thread (lambda () -;;; ;; (catch-and-dump -;;; ;; (lambda () -;;; ;; (print "Process queries for " fname) -;;; (let ((start-time (current-milliseconds))) -;;; (process-db-queries acfg fname) -;;; ;; (thread-sleep! 0.01) ;; need the thread to take at least some time -;;; (hash-table-delete! threads fname)) ;; no mutexes? -;;; fname) -;;; "th1"))) ;; )) -;;; (hash-table-set! threads fname th1) -;;; (thread-start! th1))))) -;;; fnames) -;;; ;; (thread-sleep! 0.1) ;; give the threads some time to process requests -;;; ;; burn time until 400ms is up -;;; (let ((now-time (current-milliseconds))) -;;; (if (< now-time target-time) -;;; (let ((delta (- target-time now-time))) -;;; (thread-sleep! (/ delta 1000))))) -;;; (loop (hash-table-keys (area-wqueues acfg)) -;;; (+ (current-milliseconds) 50))))) -;;; -;;; ;;====================================================================== -;;; ;; S T A T S G A T H E R I N G -;;; ;;====================================================================== -;;; -;;; (defstruct stat -;;; (qcount-avg 0) ;; coarse running average -;;; (qtime-avg 0) ;; coarse running average -;;; (qcount 0) ;; total -;;; (qtime 0) ;; total -;;; (last-qcount 0) ;; last -;;; (last-qtime 0) ;; last -;;; (dbs '()) ;; list of db files handled by this node -;;; (when 0)) ;; when the last query happened - seconds -;;; -;;; -;;; (define (update-stats acfg fname bucket duration numqueries) -;;; (let* ((key fname) ;; for now do not use bucket. Was: (conc fname "-" bucket)) ;; lazy but good enough -;;; (stats (or (hash-table-ref/default (area-stats acfg) key #f) -;;; (let ((newstats (make-stat))) -;;; (hash-table-set! (area-stats acfg) key newstats) -;;; newstats)))) -;;; ;; when the last query happended (used to remove the fname from the active list) -;;; (stat-when-set! stats (current-seconds)) -;;; ;; last values -;;; (stat-last-qcount-set! stats numqueries) -;;; (stat-last-qtime-set! stats duration) -;;; ;; total over process lifetime -;;; (stat-qcount-set! stats (+ (stat-qcount stats) numqueries)) -;;; (stat-qtime-set! stats (+ (stat-qtime stats) duration)) -;;; ;; coarse average -;;; (stat-qcount-avg-set! stats (/ (+ (stat-qcount-avg stats) numqueries) 2)) -;;; (stat-qtime-avg-set! stats (/ (+ (stat-qtime-avg stats) duration) 2)) -;;; -;;; ;; here is where we add the stats for a given dbfile -;;; (if (not (member fname (stat-dbs stats))) -;;; (stat-dbs-set! stats (cons fname (stat-dbs stats)))) -;;; -;;; )) -;;; -;;; ;;====================================================================== -;;; ;; S E R V E R S T U F F -;;; ;;====================================================================== -;;; -;;; ;; this does NOT return! -;;; ;; -;;; (define (find-free-port-and-open acfg) -;;; (let ((port (or (area-port acfg) 3200))) -;;; (handle-exceptions -;;; exn -;;; (begin -;;; (print "INFO: cannot bind to port " (rpc:default-server-port) ", trying next port") -;;; (area-port-set! acfg (+ port 1)) -;;; (find-free-port-and-open acfg)) -;;; (rpc:default-server-port port) -;;; (area-port-set! acfg port) -;;; (tcp-read-timeout 120000) -;;; ;; ((rpc:make-server (tcp-listen port)) #t) -;;; (tcp-listen (rpc:default-server-port) -;;; )))) -;;; -;;; ;; register this node by putting a packet into the pkts dir. -;;; ;; look for other servers -;;; ;; contact other servers and compile list of servers -;;; ;; there are two types of server -;;; ;; main servers - dashboards, runners and dedicated servers - need pkt -;;; ;; passive servers - test executers, step calls, list-runs - no pkt -;;; ;; -;;; (define (register-node acfg hostip port-num) -;;; ;;(mutex-lock! (area-mutex acfg)) -;;; (let* ((server-type (area-server-type acfg)) ;; auto, main, passive (no pkt created) -;;; (best-ip (or hostip (get-my-best-address))) -;;; (mtdir (area-dbdir acfg)) -;;; (pktdir (area-pktsdir acfg))) ;; conc mtdir "/.server-pkts"))) -;;; (print "Registering node " best-ip ":" port-num) -;;; (if (not mtdir) ;; require a home for this node to put or find databases -;;; #f -;;; (begin -;;; (if (not (directory? pktdir))(create-directory pktdir)) -;;; ;; server is started, now create pkt if needed -;;; (print "Starting server in " server-type " mode with port " port-num) -;;; (if (member server-type '(auto main)) ;; TODO: if auto, count number of servers registers, if > 3 then don't put out a pkt -;;; (begin -;;; (area-pktid-set! acfg -;;; (write-alist->pkt -;;; pktdir -;;; `((hostname . ,(get-host-name)) -;;; (ipaddr . ,best-ip) -;;; (port . ,port-num) -;;; (pid . ,(current-process-id))) -;;; pktspec: *pktspec* -;;; ptype: 'server)) -;;; (area-pktfile-set! acfg (conc pktdir "/" (area-pktid acfg) ".pkt")))) -;;; (area-port-set! acfg port-num) -;;; #;(mutex-unlock! (area-mutex acfg)))))) -;;; -;;; (define *cookie-seqnum* 0) -;;; (define (make-cookie key) -;;; (set! *cookie-seqnum* (add1 *cookie-seqnum*)) -;;; ;;(print "MAKE COOKIE CALLED -- on "servkey"-"*cookie-seqnum*) -;;; (conc key "-" *cookie-seqnum*) -;;; ) -;;; -;;; ;; dispatch locally if possible -;;; ;; -;;; (define (call-deliver-response acfg ipaddr port cookie data) -;;; (if (and (equal? (area-myaddr acfg) ipaddr) -;;; (equal? (area-port acfg) port)) -;;; (deliver-response acfg cookie data) -;;; ((rpc:procedure 'response ipaddr port) cookie data))) -;;; -;;; (define (deliver-response acfg cookie data) -;;; (let ((deliver-response-start (current-milliseconds))) -;;; (thread-start! (make-thread -;;; (lambda () -;;; (let loop ((tries-left 5)) -;;; ;;(print "TOP OF DELIVER_RESPONSE LOOP; triesleft="tries-left) -;;; ;;(pp (hash-table->alist (area-cookie2mbox acfg))) -;;; (let* ((mbox (hash-table-ref/default (area-cookie2mbox acfg) cookie #f))) -;;; (cond -;;; ((eq? 0 tries-left) -;;; (print "ulex:deliver-response: I give up. Mailbox never appeared. cookie="cookie) -;;; ) -;;; (mbox -;;; ;;(print "got mbox="mbox" got data="data" send.") -;;; (mailbox-send! mbox data)) -;;; (else -;;; ;;(print "no mbox yet. look for "cookie) -;;; (thread-sleep! (/ (- 6 tries-left) 10)) -;;; (loop (sub1 tries-left)))))) -;;; ;; (debug-pp (list (conc "ulex:deliver-response took " (- (current-milliseconds) deliver-response-start) " ms, cookie=" cookie " data=") data)) -;;; (sdbg> "deliver-response" "mailbox-send" deliver-response-start #f #f cookie) -;;; ) -;;; (conc "deliver-response thread for cookie="cookie)))) -;;; #t) -;;; -;;; ;; action: -;;; ;; immediate - quick actions, no need to put in queues -;;; ;; dbwrite - put in dbwrite queue -;;; ;; dbread - put in dbread queue -;;; ;; oslong - os actions, e.g. du, that could take a long time -;;; ;; osshort - os actions that should be quick, e.g. df -;;; ;; -;;; (define (request acfg from-ipaddr from-port servkey action cookie fname params) ;; std-peer-handler -;;; ;; NOTE: Use rpc:current-peer for getting return address -;;; (let* ((std-peer-handler-start (current-milliseconds)) -;;; ;; (raw-data (alist-ref 'data dat)) -;;; (rdat (hash-table-ref/default -;;; (area-rtable acfg) action #f)) ;; this looks up the sql query or other details indexed by the action -;;; (witem (make-witem ripaddr: from-ipaddr ;; rhost: from-host -;;; rport: from-port action: action -;;; rdat: rdat cookie: cookie -;;; servkey: servkey data: params ;; TODO - rename data to params -;;; caller: (rpc:current-peer)))) -;;; (if (not (equal? servkey (area-pktid acfg))) -;;; `(#f . ,(conc "I don't know you servkey=" servkey ", pktid=" (area-pktid acfg))) ;; immediately return this -;;; (let* ((ctype (if rdat -;;; (calldat-ctype rdat) ;; is this necessary? these should be identical -;;; action))) -;;; (sdbg> "std-peer-handler" "immediate" std-peer-handler-start #f #f) -;;; (case ctype -;;; ;; (dbwrite acfg rdat (cons from-ipaddr from-port) data))) -;;; ((full-ping) `(#t "ack to full ping" ,(work-queue-add acfg fname witem) ,cookie)) -;;; ((response) `(#t "ack from requestor" ,(deliver-response acfg fname params))) -;;; ((dbwrite) `(#t "db write submitted" ,(work-queue-add acfg fname witem) ,cookie)) -;;; ((dbread) `(#t "db read submitted" ,(work-queue-add acfg fname witem) ,cookie )) -;;; ((dbrw) `(#t "db read/write submitted" ,cookie)) -;;; ((osshort) `(#t "os short submitted" ,cookie)) -;;; ((oslong) `(#t "os long submitted" ,cookie)) -;;; (else `(#f "unrecognised action" ,ctype))))))) -;;; -;;; ;; Call this to start the actual server -;;; ;; -;;; ;; start_server -;;; ;; -;;; ;; mode: ' -;;; ;; handler: proc which takes pktrecieved as argument -;;; ;; -;;; -;;; (define (start-server acfg) -;;; (let* ((conn (find-free-port-and-open acfg)) -;;; (port (area-port acfg))) -;;; (rpc:publish-procedure! -;;; 'delist-db -;;; (lambda (fname) -;;; (hash-table-delete! (area-dbs acfg) fname))) -;;; (rpc:publish-procedure! -;;; 'calling-addr -;;; (lambda () -;;; (rpc:current-peer))) -;;; (rpc:publish-procedure! -;;; 'ping -;;; (lambda ()(real-ping acfg))) -;;; (rpc:publish-procedure! -;;; 'request -;;; (lambda (from-addr from-port servkey action cookie dbname params) -;;; (request acfg from-addr from-port servkey action cookie dbname params))) -;;; (rpc:publish-procedure! -;;; 'response -;;; (lambda (cookie res-dat) -;;; (deliver-response acfg cookie res-dat))) -;;; (area-ready-set! acfg #t) -;;; (area-conn-set! acfg conn) -;;; ((rpc:make-server conn) #f)));; ((tcp-listen (rpc:default-server-port)) #t) -;;; -;;; -;;; (define (launch acfg) ;; #!optional (proc std-peer-handler)) -;;; (print "starting launch") -;;; (update-known-servers acfg) ;; gotta do this on every start (thus why limit number of publicised servers) -;;; #;(let ((original-handler (current-exception-handler))) ;; is th -;;; (lambda (exception) -;;; (server-exit-procedure) -;;; (original-handler exception))) -;;; (on-exit (lambda () -;;; (shutdown acfg))) ;; (finalize-all-db-handles acfg))) -;;; ;; set up the rpc handler -;;; (let* ((th1 (make-thread -;;; (lambda ()(start-server acfg)) -;;; "server thread")) -;;; (th2 (make-thread -;;; (lambda () -;;; (print "th2 starting") -;;; (let loop () -;;; (work-queue-processor acfg) -;;; (print "work-queue-processor crashed!") -;;; (loop))) -;;; "work queue thread"))) -;;; (thread-start! th1) -;;; (thread-start! th2) -;;; (let loop () -;;; (thread-sleep! 0.025) -;;; (if (area-ready acfg) -;;; #t -;;; (loop))) -;;; ;; attempt to fix my address -;;; (let* ((all-addr (get-all-ips-sorted))) ;; could use (tcp-addresses conn)? -;;; (let loop ((rem-addrs all-addr)) -;;; (if (null? rem-addrs) -;;; (begin -;;; (print "ERROR: Failed to figure out the ip address of myself as a server. Giving up.") -;;; (exit 1)) ;; BUG Changeme to raising an exception -;;; -;;; (let* ((addr (car rem-addrs)) -;;; (good-addr (handle-exceptions -;;; exn -;;; #f -;;; ((rpc:procedure 'calling-addr addr (area-port acfg)))))) -;;; (if good-addr -;;; (begin -;;; (print "Got good-addr of " good-addr) -;;; (area-myaddr-set! acfg good-addr)) -;;; (loop (cdr rem-addrs))))))) -;;; (register-node acfg (area-myaddr acfg)(area-port acfg)) -;;; (print "INFO: Server started on " (area-myaddr acfg) ":" (area-port acfg)) -;;; ;; (update-known-servers acfg) ;; gotta do this on every start (thus why limit number of publicised servers) -;;; )) -;;; -;;; (define (clear-server-pkt acfg) -;;; (let ((pktf (area-pktfile acfg))) -;;; (if pktf (delete-file* pktf)))) -;;; -;;; (define (shutdown acfg) -;;; (let (;;(conn (area-conn acfg)) -;;; (pktf (area-pktfile acfg)) -;;; (port (area-port acfg))) -;;; (if pktf (delete-file* pktf)) -;;; (send-all "imshuttingdown") -;;; ;; (rpc:close-all-connections!) ;; don't know if this is actually needed -;;; (finalize-all-db-handles acfg))) -;;; -;;; (define (send-all msg) -;;; #f) -;;; -;;; ;; given a area record look up all the packets -;;; ;; -;;; (define (get-all-server-pkts acfg) -;;; (let ((all-pkt-files (glob (conc (area-pktsdir acfg) "/*.pkt")))) -;;; (map (lambda (pkt-file) -;;; (read-pkt->alist pkt-file pktspec: *pktspec*)) -;;; all-pkt-files))) -;;; -;;; #;((Z . "9a0212302295a19610d5796fce0370fa130758e9") -;;; (port . "34827") -;;; (pid . "28748") -;;; (hostname . "zeus") -;;; (T . "server") -;;; (D . "1549427032.0")) -;;; -;;; #;(define (get-my-best-address) -;;; (let ((all-my-addresses (get-all-ips))) ;; (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name)))))) -;;; (cond -;;; ((null? all-my-addresses) -;;; (get-host-name)) ;; no interfaces? -;;; ((eq? (length all-my-addresses) 1) -;;; (ip->string (car all-my-addresses))) ;; only one to choose from, just go with it -;;; (else -;;; (ip->string (car (filter (lambda (x) ;; take any but 127. -;;; (not (eq? (u8vector-ref x 0) 127))) -;;; all-my-addresses))))))) -;;; -;;; ;; whoami? I am my pkt -;;; ;; -;;; (define (whoami? acfg) -;;; (hash-table-ref/default (area-hosts acfg)(area-pktid acfg) #f)) -;;; -;;; ;;====================================================================== -;;; ;; "Client side" operations -;;; ;;====================================================================== -;;; -;;; (define (safe-call call-key host port . params) -;;; (handle-exceptions -;;; exn -;;; (begin -;;; (print "Call " call-key " to " host ":" port " failed") -;;; #f) -;;; (apply (rpc:procedure call-key host port) params))) -;;; -;;; ;; ;; convert to/from string / sexpr -;;; ;; -;;; ;; (define (string->sexpr str) -;;; ;; (if (string? str) -;;; ;; (with-input-from-string str read) -;;; ;; str)) -;;; ;; -;;; ;; (define (sexpr->string s) -;;; ;; (with-output-to-string (lambda ()(write s)))) -;;; -;;; ;; is the server alive? -;;; ;; -;;; (define (ping acfg host port) -;;; (let* ((myaddr (area-myaddr acfg)) -;;; (myport (area-port acfg)) -;;; (start-time (current-milliseconds)) -;;; (res (if (and (equal? myaddr host) -;;; (equal? myport port)) -;;; (real-ping acfg) -;;; ((rpc:procedure 'ping host port))))) -;;; (cons (- (current-milliseconds) start-time) -;;; res))) -;;; -;;; ;; returns ( ipaddr port alist-fname=>randnum ) -;;; (define (real-ping acfg) -;;; `(,(area-myaddr acfg) ,(area-port acfg) ,(get-host-stats acfg))) -;;; -;;; ;; is the server alive AND the queues processing? -;;; ;; -;;; #;(define (full-ping acfg servpkt) -;;; (let* ((start-time (current-milliseconds)) -;;; (res (send-message acfg servpkt '(full-ping) 'full-ping))) -;;; (cons (- (current-milliseconds) start-time) -;;; res))) ;; (equal? res "got ping")))) -;;; -;;; -;;; ;; look up all pkts and get the server id (the hash), port, host/ip -;;; ;; store this info in acfg -;;; ;; return the number of responsive servers found -;;; ;; -;;; ;; DO NOT VERIFY THAT THE SERVER IS ALIVE HERE. This is called at times where the current server is not yet alive and cannot ping itself -;;; ;; -;;; (define (update-known-servers acfg) -;;; ;; readll all pkts -;;; ;; foreach pkt; if it isn't me ping the server; if alive, add to hosts hash, else rm the pkt -;;; (let* ((start-time (current-milliseconds)) -;;; (all-pkts (delete-duplicates -;;; (append (get-all-server-pkts acfg) -;;; (hash-table-values (area-hosts acfg))))) -;;; (hostshash (area-hosts acfg)) -;;; (my-id (area-pktid acfg)) -;;; (pktsdir (area-pktsdir acfg)) ;; needed to remove pkts from non-responsive servers -;;; (numsrvs 0) -;;; (delpkt (lambda (pktsdir sid) -;;; (print "clearing out server " sid) -;;; (delete-file* (conc pktsdir "/" sid ".pkt")) -;;; (hash-table-delete! hostshash sid)))) -;;; (area-last-srvup-set! acfg (current-seconds)) -;;; (for-each -;;; (lambda (servpkt) -;;; (if (list? servpkt) -;;; ;; (pp servpkt) -;;; (let* ((shost (alist-ref 'ipaddr servpkt)) -;;; (sport (any->number (alist-ref 'port servpkt))) -;;; (res (handle-exceptions -;;; exn -;;; (begin -;;; ;; (print "INFO: bad server on " shost ":" sport) -;;; #f) -;;; (ping acfg shost sport))) -;;; (sid (alist-ref 'Z servpkt)) ;; Z code is our name for the server -;;; (url (conc shost ":" sport)) -;;; ) -;;; #;(if (or (not res) -;;; (null? res)) -;;; (begin -;;; (print "STRANGE: ping of " url " gave " res))) -;;; -;;; ;; (print "Got " res " from " shost ":" sport) -;;; (match res -;;; ((qduration . payload) -;;; ;; (print "Server pkt:" (alist-ref 'ipaddr servpkt) ":" (alist-ref 'port servpkt) -;;; ;; (if payload -;;; ;; "Success" "Fail")) -;;; (match payload -;;; ((host port stats) -;;; ;; (print "From " host ":" port " got stats: " stats) -;;; (if (and host port stats) -;;; (let ((url (conc host ":" port))) -;;; (hash-table-set! hostshash sid servpkt) -;;; ;; store based on host:port -;;; (hash-table-set! (area-hoststats acfg) sid stats)) -;;; (print "missing data from the server, not sure what that means!")) -;;; (set! numsrvs (+ numsrvs 1))) -;;; (#f -;;; (print "Removing pkt " sid " due to #f from server or failed ping") -;;; (delpkt pktsdir sid)) -;;; (else -;;; (print "Got ")(pp res)(print " from server ")(pp servpkt) " but response did not match (#f/#t . msg)"))) -;;; (else -;;; ;; here we delete the pkt - can't reach the server, remove it -;;; ;; however this logic is inadequate. we should mark the server as checked -;;; ;; and not good, if it happens a second time - then remove the pkt -;;; ;; or something similar. I.e. don't be too quick to assume the server is wedged or dead -;;; ;; could be it is simply too busy to reply -;;; (let ((bad-pings (hash-table-ref/default (area-health acfg) url 0))) -;;; (if (> bad-pings 1) ;; two bad pings - remove pkt -;;; (begin -;;; (print "INFO: " bad-pings " bad responses from " url ", deleting pkt " sid) -;;; (delpkt pktsdir sid)) -;;; (begin -;;; (print "INFO: " bad-pings " bad responses from " shost ":" sport " not deleting pkt yet") -;;; (hash-table-set! (area-health acfg) -;;; url -;;; (+ (hash-table-ref/default (area-health acfg) url 0) 1)) -;;; )) -;;; )))) -;;; ;; servpkt is not actually a pkt? -;;; (begin -;;; (print "Bad pkt " servpkt)))) -;;; all-pkts) -;;; (sdbg> "update-known-servers" "end" start-time #f #f " found " numsrvs -;;; " servers, pkts: " (map (lambda (p) -;;; (alist-ref 'Z p)) -;;; all-pkts)) -;;; numsrvs)) -;;; -;;; (defstruct srvstat -;;; (numfiles 0) ;; number of db files handled by this server - subtract 1 for the db being currently looked at -;;; (randnum #f) ;; tie breaker number assigned to by the server itself - applies only to the db under consideration -;;; (pkt #f)) ;; the server pkt -;;; -;;; ;;(define (srv->srvstat srvpkt) -;;; -;;; ;; Get the server best for given dbname and key -;;; ;; -;;; ;; NOTE: key is not currently used. The key points to the kind of query, this may be useful for directing read-only queries. -;;; ;; -;;; (define (get-best-server acfg dbname key) -;;; (let* (;; (servers (hash-table-values (area-hosts acfg))) -;;; (servers (area-hosts acfg)) -;;; (skeys (sort (hash-table-keys servers) string>=?)) ;; a stable listing -;;; (start-time (current-milliseconds)) -;;; (srvstats (make-hash-table)) ;; srvid => srvstat -;;; (url (conc (area-myaddr acfg) ":" (area-port acfg)))) -;;; ;; (print "scores for " dbname ": " (map (lambda (k)(cons k (calc-server-score acfg dbname k))) skeys)) -;;; (if (null? skeys) -;;; (if (> (update-known-servers acfg) 0) -;;; (get-best-server acfg dbname key) ;; some risk of infinite loop here, TODO add try counter -;;; (begin -;;; (print "ERROR: no server found!") ;; since this process is also a server this should never happen -;;; #f)) -;;; (begin -;;; ;; (print "in get-best-server with skeys=" skeys) -;;; (if (> (- (current-seconds) (area-last-srvup acfg)) 10) -;;; (begin -;;; (update-known-servers acfg) -;;; (sdbg> "get-best-server" "update-known-servers" start-time #f #f))) -;;; -;;; ;; for each server look at the list of dbfiles, total number of dbs being handled -;;; ;; and the rand number, save the best host -;;; ;; also do a delist-db for each server dbfile not used -;;; (let* ((best-server #f) -;;; (servers-to-delist (make-hash-table))) -;;; (for-each -;;; (lambda (srvid) -;;; (let* ((server (hash-table-ref/default servers srvid #f)) -;;; (stats (hash-table-ref/default (area-hoststats acfg) srvid '(())))) -;;; ;; (print "stats: " stats) -;;; (if server -;;; (let* ((dbweights (car stats)) -;;; (srvload (length (filter (lambda (x)(not (equal? dbname (car x)))) dbweights))) -;;; (dbrec (alist-ref dbname dbweights equal?)) ;; get the pair with fname . randscore -;;; (randnum (if dbrec -;;; dbrec ;; (cdr dbrec) -;;; 0))) -;;; (hash-table-set! srvstats srvid (make-srvstat numfiles: srvload randnum: randnum pkt: server)))))) -;;; skeys) -;;; -;;; (let* ((sorted (sort (hash-table-values srvstats) -;;; (lambda (a b) -;;; (let ((numfiles-a (srvstat-numfiles a)) -;;; (numfiles-b (srvstat-numfiles b)) -;;; (randnum-a (srvstat-randnum a)) -;;; (randnum-b (srvstat-randnum b))) -;;; (if (< numfiles-a numfiles-b) ;; Note, I don't think adding an offset works here. Goal was only move file handling to a different server if it has 2 less -;;; #t -;;; (if (and (equal? numfiles-a numfiles-b) -;;; (< randnum-a randnum-b)) -;;; #t -;;; #f)))))) -;;; (best (if (null? sorted) -;;; (begin -;;; (print "ERROR: should never be null due to self as server.") -;;; #f) -;;; (srvstat-pkt (car sorted))))) -;;; #;(print "SERVER(" url "): " dbname ": " (map (lambda (srv) -;;; (let ((p (srvstat-pkt srv))) -;;; (conc (alist-ref 'ipaddr p) ":" (alist-ref 'port p) -;;; "(" (srvstat-numfiles srv)","(srvstat-randnum srv)")"))) -;;; sorted)) -;;; best)))))) -;;; -;;; ;; send out an "I'm about to exit notice to all known servers" -;;; ;; -;;; (define (death-imminent acfg) -;;; '()) -;;; -;;; ;;====================================================================== -;;; ;; U L E X - T H E I N T E R E S T I N G S T U F F ! ! -;;; ;;====================================================================== -;;; -;;; ;; register a handler -;;; ;; NOTES: -;;; ;; dbinitsql is reserved for a list of sql statements for initializing the db -;;; ;; dbinitfn is reserved for a db init function, if exists called after dbinitsql -;;; ;; -;;; (define (register acfg key obj #!optional (ctype 'dbwrite)) -;;; (let ((ht (area-rtable acfg))) -;;; (if (hash-table-exists? ht key) -;;; (print "WARNING: redefinition of entry " key)) -;;; (hash-table-set! ht key (make-calldat obj: obj ctype: ctype)))) -;;; -;;; ;; usage: register-batch acfg '((key1 . sql1) (key2 . sql2) ... ) -;;; ;; NB// obj is often an sql query -;;; ;; -;;; (define (register-batch acfg ctype data) -;;; (let ((ht (area-rtable acfg))) -;;; (map (lambda (dat) -;;; (hash-table-set! ht (car dat)(make-calldat obj: (cdr dat) ctype: ctype))) -;;; data))) -;;; -;;; (define (initialize-area-calls-from-specfile area specfile) -;;; (let* ((callspec (with-input-from-file specfile read ))) -;;; (for-each (lambda (group) -;;; (register-batch -;;; area -;;; (car group) -;;; (cdr group))) -;;; callspec))) -;;; -;;; ;; get-rentry -;;; ;; -;;; (define (get-rentry acfg key) -;;; (hash-table-ref/default (area-rtable acfg) key #f)) -;;; -;;; (define (get-rsql acfg key) -;;; (let ((cdat (get-rentry acfg key))) -;;; (if cdat -;;; (calldat-obj cdat) -;;; #f))) -;;; -;;; -;;; -;;; ;; blocking call: -;;; ;; client server -;;; ;; ------ ------ -;;; ;; call() -;;; ;; send-message() -;;; ;; nmsg-send() -;;; ;; nmsg-receive() -;;; ;; nmsg-respond(ack,cookie) -;;; ;; ack, cookie -;;; ;; mbox-thread-wait(cookie) -;;; ;; nmsg-send(client,cookie,result) -;;; ;; nmsg-respond(ack) -;;; ;; return result -;;; ;; -;;; ;; reserved action: -;;; ;; 'immediate -;;; ;; 'dbinitsql -;;; ;; -;;; (define (call acfg dbname action params #!optional (count 0)) -;;; (let* ((call-start-time (current-milliseconds)) -;;; (srv (get-best-server acfg dbname action)) -;;; (post-get-start-time (current-milliseconds)) -;;; (rdat (hash-table-ref/default (area-rtable acfg) action #f)) -;;; (myid (trim-pktid (area-pktid acfg))) -;;; (srvid (trim-pktid (alist-ref 'Z srv))) -;;; (cookie (make-cookie myid))) -;;; (sdbg> "call" "get-best-server" call-start-time #f call-start-time " from: " myid " to server: " srvid " for " dbname " action: " action " params: " params " rdat: " rdat) -;;; (print "INFO: call to " (alist-ref 'ipaddr srv) ":" (alist-ref 'port srv) " from " (area-myaddr acfg) ":" (area-port acfg) " for " dbname) -;;; (if (and srv rdat) ;; need both to dispatch a request -;;; (let* ((ripaddr (alist-ref 'ipaddr srv)) -;;; (rsrvid (alist-ref 'Z srv)) -;;; (rport (any->number (alist-ref 'port srv))) -;;; (res-full (if (and (equal? ripaddr (area-myaddr acfg)) -;;; (equal? rport (area-port acfg))) -;;; (request acfg ripaddr rport (area-pktid acfg) action cookie dbname params) -;;; (safe-call 'request ripaddr rport -;;; (area-myaddr acfg) -;;; (area-port acfg) -;;; #;(area-pktid acfg) -;;; rsrvid -;;; action cookie dbname params)))) -;;; ;; (print "res-full: " res-full) -;;; (match res-full -;;; ((response-ok response-msg rem ...) -;;; (let* ((send-message-time (current-milliseconds)) -;;; ;; (match res-full -;;; ;; ((response-ok response-msg) -;;; ;; (response-ok (car res-full)) -;;; ;; (response-msg (cadr res-full) -;;; ) -;;; ;; (res (take res-full 3))) ;; ctype == action, TODO: converge on one term <<=== what was this? BUG -;;; ;; (print "ulex:call: send-message took " (- send-message-time post-get-start-time) " ms params=" params) -;;; (sdbg> "call" "send-message" post-get-start-time #f call-start-time) -;;; (cond -;;; ((not response-ok) #f) -;;; ((member response-msg '("db read submitted" "db write submitted")) -;;; (let* ((cookie-id (cadddr res-full)) -;;; (mbox (make-mailbox)) -;;; (mbox-time (current-milliseconds))) -;;; (hash-table-set! (area-cookie2mbox acfg) cookie-id mbox) -;;; (let* ((mbox-timeout-secs 20) -;;; (mbox-timeout-result 'MBOX_TIMEOUT) -;;; (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result)) -;;; (mbox-receive-time (current-milliseconds))) -;;; (hash-table-delete! (area-cookie2mbox acfg) cookie-id) -;;; (sdbg> "call" "mailbox-receive" mbox-time #f call-start-time " from: " myid " to server: " srvid " for " dbname) -;;; ;; (print "ulex:call mailbox-receive took " (- mbox-receive-time mbox-time) "ms params=" params) -;;; res))) -;;; (else -;;; (print "Unhandled response \""response-msg"\"") -;;; #f)) -;;; ;; depending on what action (i.e. ctype) is we will block here waiting for -;;; ;; all the data (mechanism to be determined) -;;; ;; -;;; ;; if res is a "working on it" then wait -;;; ;; wait for result -;;; ;; mailbox thread wait on -;;; -;;; ;; if res is a "can't help you" then try a different server -;;; ;; if res is a "ack" (e.g. for one-shot requests) then return res -;;; )) -;;; (else -;;; (if (< count 10) -;;; (let* ((url (conc (alist-ref 'ipaddr srv) ":" (alist-ref 'port srv)))) -;;; (thread-sleep! 1) -;;; (print "ERROR: Bad result from " url ", dbname: " dbname ", action: " action ", params: " params ". Trying again in 1 second.") -;;; (call acfg dbname action params (+ count 1))) -;;; (begin -;;; (error (conc "ERROR: " count " tries, still have improper response res-full=" res-full))))))) -;;; (begin -;;; (if (not rdat) -;;; (print "ERROR: action " action " not registered.") -;;; (if (< count 10) -;;; (begin -;;; (thread-sleep! 1) -;;; (area-hosts-set! acfg (make-hash-table)) ;; clear out all known hosts -;;; (print "ERROR: no server found, srv=" srv ", trying again in 1 seconds") -;;; (call acfg dbname action params (+ count 1))) -;;; (begin -;;; (error (conc "ERROR: no server found after 10 tries, srv=" srv ", giving up.")) -;;; #;(error "No server available")))))))) -;;; -;;; -;;; ;;====================================================================== -;;; ;; U T I L I T I E S -;;; ;;====================================================================== -;;; -;;; ;; get a signature for identifing this process -;;; ;; -;;; (define (get-process-signature) -;;; (cons (get-host-name)(current-process-id))) -;;; -;;; ;;====================================================================== -;;; ;; S Y S T E M S T U F F -;;; ;;====================================================================== -;;; -;;; ;; get normalized cpu load by reading from /proc/loadavg and -;;; ;; /proc/cpuinfo return all three values and the number of real cpus -;;; ;; and the number of threads returns alist '((adj-cpu-load -;;; ;; . normalized-proc-load) ... etc. keys: adj-proc-load, -;;; ;; adj-core-load, 1m-load, 5m-load, 15m-load -;;; ;; -;;; (define (get-normalized-cpu-load) -;;; (let ((res (get-normalized-cpu-load-raw)) -;;; (default `((adj-proc-load . 2) ;; there is no right answer -;;; (adj-core-load . 2) -;;; (1m-load . 2) -;;; (5m-load . 0) ;; causes a large delta - thus causing default of throttling if stuff goes wrong -;;; (15m-load . 0) -;;; (proc . 1) -;;; (core . 1) -;;; (phys . 1) -;;; (error . #t)))) -;;; (cond -;;; ((and (list? res) -;;; (> (length res) 2)) -;;; res) -;;; ((eq? res #f) default) ;; add messages? -;;; ((eq? res #f) default) ;; this would be the #eof -;;; (else default)))) -;;; -;;; (define (get-normalized-cpu-load-raw) -;;; (let* ((actual-host (get-host-name))) ;; #f is localhost -;;; (let ((data (append -;;; (with-input-from-file "/proc/loadavg" read-lines) -;;; (with-input-from-file "/proc/cpuinfo" read-lines) -;;; (list "end"))) -;;; (load-rx (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$")) -;;; (proc-rx (regexp "^processor\\s+:\\s+(\\d+)\\s*$")) -;;; (core-rx (regexp "^core id\\s+:\\s+(\\d+)\\s*$")) -;;; (phys-rx (regexp "^physical id\\s+:\\s+(\\d+)\\s*$")) -;;; (max-num (lambda (p n)(max (string->number p) n)))) -;;; ;; (print "data=" data) -;;; (if (null? data) ;; something went wrong -;;; #f -;;; (let loop ((hed (car data)) -;;; (tal (cdr data)) -;;; (loads #f) -;;; (proc-num 0) ;; processor includes threads -;;; (phys-num 0) ;; physical chip on motherboard -;;; (core-num 0)) ;; core -;;; ;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num) -;;; (if (null? tal) ;; have all our data, calculate normalized load and return result -;;; (let* ((act-proc (+ proc-num 1)) -;;; (act-phys (+ phys-num 1)) -;;; (act-core (+ core-num 1)) -;;; (adj-proc-load (/ (car loads) act-proc)) -;;; (adj-core-load (/ (car loads) act-core)) -;;; (result -;;; (append (list (cons 'adj-proc-load adj-proc-load) -;;; (cons 'adj-core-load adj-core-load)) -;;; (list (cons '1m-load (car loads)) -;;; (cons '5m-load (cadr loads)) -;;; (cons '15m-load (caddr loads))) -;;; (list (cons 'proc act-proc) -;;; (cons 'core act-core) -;;; (cons 'phys act-phys))))) -;;; result) -;;; (regex-case -;;; hed -;;; (load-rx ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num)) -;;; (proc-rx ( x p ) (loop (car tal)(cdr tal) loads (max-num p proc-num) phys-num core-num)) -;;; (phys-rx ( x p ) (loop (car tal)(cdr tal) loads proc-num (max-num p phys-num) core-num)) -;;; (core-rx ( x c ) (loop (car tal)(cdr tal) loads proc-num phys-num (max-num c core-num))) -;;; (else -;;; (begin -;;; ;; (print "NO MATCH: " hed) -;;; (loop (car tal)(cdr tal) loads proc-num phys-num core-num)))))))))) -;;; -;;; (define (get-host-stats acfg) -;;; (let ((stats-hash (area-stats acfg))) -;;; ;; use this opportunity to remove references to dbfiles which have not been accessed in a while -;;; (for-each -;;; (lambda (dbname) -;;; (let* ((stats (hash-table-ref stats-hash dbname)) -;;; (last-access (stat-when stats))) -;;; (if (and (> last-access 0) ;; if zero then there has been no access -;;; (> (- (current-seconds) last-access) 10)) ;; not used in ten seconds -;;; (begin -;;; (print "Removing " dbname " from stats list") -;;; (hash-table-delete! stats-hash dbname) ;; remove from stats hash -;;; (stat-dbs-set! stats (hash-table-keys stats)))))) -;;; (hash-table-keys stats-hash)) -;;; -;;; `(,(hash-table->alist (area-dbs acfg)) ;; dbname => randnum -;;; ,(map (lambda (dbname) ;; dbname is the db name -;;; (cons dbname (stat-when (hash-table-ref stats-hash dbname)))) -;;; (hash-table-keys stats-hash)) -;;; (cpuload . ,(get-normalized-cpu-load))))) -;;; #;(stats . ,(map (lambda (k) ;; create an alist from the stats data -;;; (cons k (stat->alist (hash-table-ref (area-stats acfg) k)))) -;;; (hash-table-keys (area-stats acfg)))) -;;; -;;; #;(trace -;;; ;; assv -;;; ;; cdr -;;; ;; caar -;;; ;; ;; cdr -;;; ;; call -;;; ;; finalize-all-db-handles -;;; ;; get-all-server-pkts -;;; ;; get-normalized-cpu-load -;;; ;; get-normalized-cpu-load-raw -;;; ;; launch -;;; ;; nmsg-send -;;; ;; process-db-queries -;;; ;; receive-message -;;; ;; std-peer-handler -;;; ;; update-known-servers -;;; ;; work-queue-processor -;;; ) -;;; -;;; ;;====================================================================== -;;; ;; netutil -;;; ;; move this back to ulex-netutil.scm someday? -;;; ;;====================================================================== -;;; -;;; ;; #include -;;; ;; #include -;;; ;; #include -;;; ;; #include -;;; -;;; (foreign-declare "#include \"sys/types.h\"") -;;; (foreign-declare "#include \"sys/socket.h\"") -;;; (foreign-declare "#include \"ifaddrs.h\"") -;;; (foreign-declare "#include \"arpa/inet.h\"") -;;; -;;; ;; get IP addresses from ALL interfaces -;;; (define get-all-ips -;;; (foreign-safe-lambda* scheme-object () -;;; " -;;; -;;; // from https://stackoverflow.com/questions/17909401/linux-c-get-default-interfaces-ip-address : -;;; -;;; -;;; C_word lst = C_SCHEME_END_OF_LIST, len, str, *a; -;;; // struct ifaddrs *ifa, *i; -;;; // struct sockaddr *sa; -;;; -;;; struct ifaddrs * ifAddrStruct = NULL; -;;; struct ifaddrs * ifa = NULL; -;;; void * tmpAddrPtr = NULL; -;;; -;;; if ( getifaddrs(&ifAddrStruct) != 0) -;;; C_return(C_SCHEME_FALSE); -;;; -;;; // for (i = ifa; i != NULL; i = i->ifa_next) { -;;; for (ifa = ifAddrStruct; ifa != NULL; ifa = ifa->ifa_next) { -;;; if (ifa->ifa_addr->sa_family==AF_INET) { // Check it is -;;; // a valid IPv4 address -;;; tmpAddrPtr = &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; -;;; char addressBuffer[INET_ADDRSTRLEN]; -;;; inet_ntop(AF_INET, tmpAddrPtr, addressBuffer, INET_ADDRSTRLEN); -;;; // printf(\"%s IP Address %s\\n\", ifa->ifa_name, addressBuffer); -;;; len = strlen(addressBuffer); -;;; a = C_alloc(C_SIZEOF_PAIR + C_SIZEOF_STRING(len)); -;;; str = C_string(&a, len, addressBuffer); -;;; lst = C_a_pair(&a, str, lst); -;;; } -;;; -;;; // else if (ifa->ifa_addr->sa_family==AF_INET6) { // Check it is -;;; // // a valid IPv6 address -;;; // tmpAddrPtr = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr; -;;; // char addressBuffer[INET6_ADDRSTRLEN]; -;;; // inet_ntop(AF_INET6, tmpAddrPtr, addressBuffer, INET6_ADDRSTRLEN); -;;; //// printf(\"%s IP Address %s\\n\", ifa->ifa_name, addressBuffer); -;;; // len = strlen(addressBuffer); -;;; // a = C_alloc(C_SIZEOF_PAIR + C_SIZEOF_STRING(len)); -;;; // str = C_string(&a, len, addressBuffer); -;;; // lst = C_a_pair(&a, str, lst); -;;; // } -;;; -;;; // else { -;;; // printf(\" not an IPv4 address\\n\"); -;;; // } -;;; -;;; } -;;; -;;; freeifaddrs(ifa); -;;; C_return(lst); -;;; -;;; ")) -;;; -;;; ;; Change this to bias for addresses with a reasonable broadcast value? -;;; ;; -;;; (define (ip-pref-less? a b) -;;; (let* ((rate (lambda (ipstr) -;;; (regex-case ipstr -;;; ( "^127\\." _ 0 ) -;;; ( "^(10\\.0|192\\.168\\.)\\..*" _ 1 ) -;;; ( else 2 ) )))) -;;; (< (rate a) (rate b)))) -;;; -;;; -;;; (define (get-my-best-address) -;;; (let ((all-my-addresses (get-all-ips)) -;;; ;;(all-my-addresses-old (vector->list (hostinfo-addresses (hostname->hostinfo (get-host-name))))) -;;; ) -;;; (cond -;;; ((null? all-my-addresses) -;;; (get-host-name)) ;; no interfaces? -;;; ((eq? (length all-my-addresses) 1) -;;; (car all-my-addresses)) ;; only one to choose from, just go with it -;;; -;;; (else -;;; (car (sort all-my-addresses ip-pref-less?))) -;;; ;; (else -;;; ;; (ip->string (car (filter (lambda (x) ;; take any but 127. -;;; ;; (not (eq? (u8vector-ref x 0) 127))) -;;; ;; all-my-addresses)))) -;;; -;;; ))) -;;; -;;; (define (get-all-ips-sorted) -;;; (sort (get-all-ips) ip-pref-less?)) -;;; -;;; + (map address-info-host + (filter (lambda (x) + (equal? (address-info-type x) "tcp")) + (address-infos (get-host-name))))) + +;; (map ip->string (vector->list +;; (hostinfo-addresses +;; (host-information (current-hostname)))))) + + +) + +(import ulex trace big-chicken srfi-18 test matchable system-information) +(trace-call-sites #t) +(trace + ;; ulex-handler + ;; send + ;; add-to-work-queue + ) + +(define (handler-proc rem-host-port qrykey cmd params) + (print "handler-proc "rem-host-port" "qrykey" "cmd" "params) + (case cmd + ((ping) 'pong) + ((calc) (eval (with-input-from-string params read))) + ((print) + (print "params="params) + params) + ((reflect) `(,rem-host-port ,qrykey ,cmd ,params)) + (else `(data ,data)))) + +(define uconn (run-listener handler-proc)) + +(pp-uconn uconn) + +;; super basic loop back test +(define res #f) +(define targhost (conc (get-host-name)":"4242)) +(define th1 (make-thread (lambda () + (test #f 10 (send-receive uconn targhost 'calc "(+ 5 5)")) + (set! res (send-receive uconn targhost 'ping '())) + (test #f 'pong (send-receive uconn targhost 'ping '())) + ))) + +(thread-start! th1) +(thread-join! th1) + +(print "All done") +(print "Received "res)