Index: TODO ================================================================== --- TODO +++ TODO @@ -16,12 +16,17 @@ # along with Megatest. If not, see . TODO ==== -NextSteps -. Remove servermod.scm +Loose ends +---------- + +. -list-servers not correct +. move *remotedat* into bigdata +. add back server stats on exit (look in rmt:run in rmtmod.scm) + WW15 . fill newview matrix with data, filter pipeline gui elements . improve [script], especially indent handling Index: apimod.scm ================================================================== --- apimod.scm +++ apimod.scm @@ -39,10 +39,11 @@ chicken.process-context.posix chicken.string chicken.time chicken.condition chicken.process + chicken.pathname chicken.random chicken.file ;; (prefix sqlite3 sqlite3:) typed-records @@ -166,15 +167,20 @@ tasks-add tasks-set-state-given-param-key )) (define (api:run-server-process apath dbname) - (let* ((cmd (conc "nbfake megatest -server - -area "apath - " -db "dbname)) - (cleandbname (string-translate dbname "./" "_-")) - (logd (conc apath "/logs")) - (logf (conc logd "/server-"(current-seconds)cleandbname".log"))) + (let* ((cleandbname (pathname-strip-directory dbname)) ;; (string-translate dbname "./" "--")) + (logd (conc apath "/logs")) + (logf (conc logd "/server-launch-";;(current-process-id) + (seconds->year-work-week/day-time-fname (current-seconds)) + "-"cleandbname".log")) + (logf2 (conc logd "/server-" + (seconds->year-work-week/day-time-fname (current-seconds)) + "-"cleandbname"-")) + (cmd (conc "nbfake megatest -server - -area "apath + " -db "dbname" -autolog "logf2))) (if (not (directory-exists? logd)) (create-directory logd #t)) (system (conc "NBFAKE_LOG="logf" "cmd)))) ;; special function to get server @@ -203,15 +209,16 @@ ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl ;; SERVERS ;; ((start-server) (apply server:kind-run params)) ((kill-server) (set! *server-run* #f)) - ((get-server) (api:start-server dbstruct params)) + ((start-server get-server) (api:start-server dbstruct params)) + ((get-server-info) (apply db:get-server-info dbstruct params)) ((register-server) (apply db:register-server dbstruct params) );; dbstruct host port servkey pid ipaddr dbpath) ((deregister-server) (apply db:deregister-server dbstruct params) );; dbstruct host port servkey pid ipaddr dbpath) ((get-count-servers) (apply db:get-count-servers dbstruct params)) - + ((get-servers-info) (apply db:get-servers-info dbstruct params)) ;; TESTS ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items. ((test-set-state-status-by-id) @@ -343,10 +350,11 @@ ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params)) ((get-main-run-stats) (apply db:get-main-run-stats dbstruct params)) + ((log-to-main) (apply debug:print 0 *default-log-port* params)) ((get-var) (apply db:get-var dbstruct params)) ((get-run-stats) (apply db:get-run-stats dbstruct params)) ((get-run-times) (apply db:get-run-times dbstruct params)) ;; STEPS ADDED build-assist/README Index: build-assist/README ================================================================== --- /dev/null +++ build-assist/README @@ -0,0 +1,28 @@ +Here is how I like to install chicken for building Megatest. + +This guide assumes you have the Megatest fossil and are in the build-assist directory and +that you have the opensrc fossil with uv synced: + +fossil clone https://www.kiatoa.com/fossils/megatest +fossil clone https://www.kiatoa.com/fossils/opensrc;cd opensrc;fossil uv sync + +Make a build directory and go to it: + +mkdir build;cd build + +Make a destination directory and set PREFIX + +export PREFIX=/opt/chicken/5.3.0; mkdir -p $PREFIX + +Get chicken: + +wget https://code.call-cc.org/releases/5.3.0/chicken-5.3.0.tar.gz + +Extract, build, and install chicken: + +tar xf chicken-5.3.0.tar.gz; cd chicken-5.3.0; make PLATFORM=linux PREFIX=$PREFIX install; cd .. + +Install all needed eggs. +for egg in $(cat ../ck5-egg.list);do echo $egg;ck5 chicken-install $egg;done + +Now run the script ../iup-compile.sh for remaining instructions Index: build-assist/ck5 ================================================================== --- build-assist/ck5 +++ build-assist/ck5 @@ -1,9 +1,17 @@ #!/bin/bash -export PATH=/home/matt/data/buildall/ck5.2/bin:$PATH -if [[ -z /home/matt/data/buildall/ck5.2/lib:/home/matt/data/buildall/ck5.2/lib64 ]];then - export LD_LIBRARY_PATH=/home/matt/data/buildall/ck5.2/lib:/home/matt/data/buildall/ck5.2/lib64:$LD_LIBRARY_PATH +# /opt/chicken/5.3.0 +# WHICHCKVER=5.1.0_WW45 +WHICHCKVER=5.3.0 + +BASEDIR=/opt/chicken/$WHICHCKVER +export PATH="$BASEDIR/bin:$PATH" + +NEW_LD_LIBRARY_PATH="$BASEDIR/lib:$BASEDIR/lib64" +if [[ -z "$LD_LIBRARY_PATH" ]];then + export LD_LIBRARY_PATH=$NEW_LD_LIBRARY_PATH else - export LD_LIBRARY_PATH=/home/matt/data/buildall/ck5.2/lib:/home/matt/data/buildall/ck5.2/lib64 + export LD_LIBRARY_PATH="$NEW_LD_LIBRARY_PATH:$LD_LIBRARY_PATH" fi -export CHICKEN_DOC_PAGER=cat + exec "$@" + Index: build-assist/ck5-eggs.list ================================================================== --- build-assist/ck5-eggs.list +++ build-assist/ck5-eggs.list @@ -1,12 +1,14 @@ +csm address-info ansi-escape-sequences apropos base64 crypt csv-abnf directory-utils +dot-locking filepath fmt format http-client itemsmod ADDED build-assist/installing-nng Index: build-assist/installing-nng ================================================================== --- /dev/null +++ build-assist/installing-nng @@ -0,0 +1,23 @@ +wget https://github.com/nanomsg/nng/archive/refs/tags/v1.5.2.tar.gz +tar xf v1.5.2.tar.gz +cd nng-1.5.2 +mkdir build +cd build +make +sudo make install +vi CMakeCache.txt + +Change OFF to ON for shared libraries: + +//Build shared library +BUILD_SHARED_LIBS:BOOL=ON + +make +sudo make install +sudo ldconfig + +chicken-install nng +-or- +git clone https://gitlab.com/ariSun/chicken-nng.git +cd chicken-ngg;chicken-install + Index: build-assist/iup-compile.sh ================================================================== --- build-assist/iup-compile.sh +++ build-assist/iup-compile.sh @@ -4,11 +4,16 @@ fi echo "Put iup, im and cd .a and .so files in PREFIX/lib" echo " 1. get opensrc fossil from https://www.kiatoa.com/fossils/opensrc" echo " 2. list the unversioned files and export the cd, im and iup lib for your kernel (try uname -a for the kernel number) 4.15 ==> 415_64" +echo ' for x in $(fossil uv list | grep 415| awk ''{print $6}'');do targ=$(echo $x|cut -d/ -f3); fossil uv export $x $targ; done' echo " 3. untar iup, im and cp tars into a clean working dir and then copy:" +echo " find . -name \*.a -print -exec cp {} $PREFIX/lib \;" +echo " find . -name \*.so -print -exec cp {} $PREFIX/lib \;" +echo " rsync -av include/ $PREFIX/include/" +echo " or (depending on versions and what you see in the iup tars - they seem to vary" echo " cp *.a *.so $PREFIX/lib" echo " cp include/*.h $PREFIX/include" echo " 4. run the chicken-install like this:" echo "If you use a wrapper (e.g. ck5) to create the chicken environment:" Index: commonmod.scm ================================================================== --- commonmod.scm +++ commonmod.scm @@ -275,16 +275,16 @@ launch:is-test-alive common:get-num-cpus common:wait-for-normalized-load common:wait-for-cpuload tasks:kill-server -server:get-logs-list -server:get-list -server:get-num-alive -server:get-best -server:get-first-best -server:get-rand-best +;; server:get-logs-list +;; server:get-list +;; server:get-num-alive +;; server:get-best +;; server:get-first-best +;; server:get-rand-best server:record->id server:get-num-servers server:logf-get-start-info get-uname realpath @@ -314,10 +314,11 @@ seconds->time-string seconds->work-week/day-time seconds->work-week/day seconds->year-work-week/day seconds->year-work-week/day-time +seconds->year-work-week/day-time-fname seconds->year-week/day-time seconds->quarter common:date-time->seconds common:find-start-mark-and-mark-delta common:expand-cron-slash @@ -2825,21 +2826,21 @@ (system (conc "gzip " logfile)) (unset-environment-variable! "TARGETHOST_LOGF") (unset-environment-variable! "TARGETHOST")))) -(define (server:get-logs-list area-path) +#;(define (server:get-logs-list area-path) (let* (;; (server-logs-cmd (conc "grep -iL exiting " area-path "/logs/server-*-*.log")) ;; (server-logs (string-split (string-chomp (with-input-from-pipe server-logs-cmd read-string)))) (server-logs (glob (conc area-path"/logs/server-*-*.log"))) ) server-logs)) ;; get a list of servers with all relevant data ;; ( mod-time host port start-time pid ) ;; -(define (server:get-list areapath #!key (limit #f)) +#;(define (server:get-list areapath #!key (limit #f)) (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) (day-seconds (* 24 60 60))) ;; if the directory exists continue to get the list ;; otherwise attempt to create the logs dir and then ;; continue @@ -2887,11 +2888,11 @@ (> (length new-res) limit)) new-res ;; (take new-res limit) <= need intelligent sorting before this will work new-res) (loop (string-chomp (car tal)) (cdr tal) new-res))))))))) -(define (server:get-num-alive srvlst) +#;(define (server:get-num-alive srvlst) (let ((num-alive 0)) (for-each (lambda (server) (handle-exceptions exn @@ -2914,11 +2915,11 @@ ;; mod-time host port start-time pid ;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off ;; and servers should stick around for about two hours or so. ;; -(define (server:get-best srvlst) +#;(define (server:get-best srvlst) (let* ((nums (server:get-num-servers)) (now (current-seconds)) (slst (sort (filter (lambda (rec) (if (and (list? rec) @@ -2942,18 +2943,18 @@ (list-ref b 3)))))) (if (> (length slst) nums) (take slst nums) slst))) -(define (server:get-first-best areapath) +#;(define (server:get-first-best areapath) (let ((srvrs (server:get-best (server:get-list areapath)))) (if (and srvrs (not (null? srvrs))) (car srvrs) #f))) -(define (server:get-rand-best areapath) +#;(define (server:get-rand-best areapath) (let ((srvrs (server:get-best (server:get-list areapath)))) (if (and (list? srvrs) (not (null? srvrs))) (let* ((len (length srvrs)) (idx (pseudo-random-integer len))) @@ -3579,10 +3580,14 @@ (seconds->local-time sec) "ww%V.%u")) (define (seconds->year-work-week/day sec) (time->string (seconds->local-time sec) "%yww%V.%w")) + +(define (seconds->year-work-week/day-time-fname sec) + (time->string + (seconds->local-time sec) "%yww%V.%w.%H%M%S")) (define (seconds->year-work-week/day-time sec) (time->string (seconds->local-time sec) "%Yww%V.%w %H:%M")) Index: configfmod.scm ================================================================== --- configfmod.scm +++ configfmod.scm @@ -86,10 +86,11 @@ (prefix base64 base64:) (prefix dbi dbi:) (prefix sqlite3 sqlite3:) (srfi 18) directory-utils + dot-locking format matchable md5 message-digest regex @@ -112,10 +113,15 @@ ;; parameters ;;====================================================================== ;; while targets are Megatest specific they are a useful concept (define mytarget (make-parameter #f)) + +;; locking is optional, many environments don't care (e.g. running on one machine) +;; NOTE: the locker must follow the same syntax as with-dot-lock* +;; +(define my-with-lock (make-parameter with-dot-lock*)) ;;====================================================================== ;; move debug stuff to separate module then put these back where they belong ;;====================================================================== ;;====================================================================== @@ -1186,34 +1192,36 @@ ;; DO THE LOCKING AROUND THE CALL ;;====================================================================== ;; (define (configf:write-alist cdat fname) ;; (if (not (common:faux-lock fname)) - (debug:print 0 *default-log-port* "INFO: NEED LOCKING ADDED HERE " fname) - (let* ((dat (configf:config->alist cdat)) - (res - (begin - (with-output-to-file fname ;; first write out the file - (lambda () - (pp dat))) - ;; I don't like this. It makes write-alist opaque and complicated. -mrw- - (if (file-exists? fname) ;; now verify it is readable - (if (configf:read-alist fname) - #t ;; data is good. - (begin - (handle-exceptions + ;; (debug:print 0 *default-log-port* "INFO: NEED LOCKING ADDED HERE " fname) + ((my-with-lock) + fname + (lambda () + (let* ((dat (configf:config->alist cdat)) + (res + (begin + (with-output-to-file fname ;; first write out the file + (lambda () + (pp dat))) + ;; I don't like this. It makes write-alist opaque and complicated. -mrw- + (if (file-exists? fname) ;; now verify it is readable + (if (configf:read-alist fname) + #t ;; data is good. + (begin + (handle-exceptions exn - (begin - (debug:print 0 *default-log-port* "deleting " fname " failed, exn=" exn) - #f) - (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.") - (delete-file fname)) - #f)) - #f)))) - ;; (common:faux-unlock fname) - res)) + (begin + (debug:print 0 *default-log-port* "deleting " fname " failed, exn=" exn) + #f) + (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.") + (delete-file fname)) + #f)) + #f)))) + res)))) (define (common:get-fields cfgdat) (let ((fields (hash-table-ref/default cfgdat "fields" '()))) (map car fields))) ) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -42,10 +42,11 @@ (declare (uses subrunmod)) (declare (uses tree)) (declare (uses vgmod)) (declare (uses testsmod)) (declare (uses tasksmod)) +(declare (uses dbi)) ;; needed for configf scripts, scheme etc. ;; (declare (uses apimod.import)) ;; (declare (uses debugprint.import)) ;; (declare (uses mtargs.import)) @@ -89,10 +90,11 @@ (prefix iup iup:) canvas-draw canvas-draw-iup (prefix sqlite3 sqlite3:) + (prefix dbi dbi:) srfi-1 regex regex-case srfi-69 typed-records sparse-vectors format @@ -238,10 +240,11 @@ ;; ;;;(if (file-write-access? (conc *toppath* "/megatest.db")) ;;(debug:print-info 13 *default-log-port* "Before common:watchdog spawn") (thread-start! (make-thread common:watchdog "Watchdog thread")) + ;;(debug:print-info 13 *default-log-port* "After common:watchdog spawn") ;; (if (not (args:get-arg "-use-db-cache")) ;; (begin ;; (debug:print-info 0 *default-log-port* "Forcing db-cache mode due to read-only access to megatest.db") ;; (hash-table-set! args:arg-hash "-use-db-cache" #t)));;;) @@ -2842,11 +2845,11 @@ (dashboard:runs-horizontal-slider runs-dat)))) controls )) (views-cfgdat (common:load-views-config)) (additional-tabnames '()) - (tab-start-num 5) ;; DON'T FORGET TO UPDATE THIS WHEN CHANGING THE STANDARD TABS BELOW + (tab-start-num 6) ;; DON'T FORGET TO UPDATE THIS WHEN CHANGING THE STANDARD TABS BELOW ;; (data (dboard:tabdat-init (make-d:data))) (additional-views ;; process views-dat (let ((tab-num tab-start-num) (result '())) (for-each @@ -2889,18 +2892,21 @@ runs-view ;; (make-runs-view commondat runs2-dat 2) (dashboard:runs-summary commondat onerun-dat tab-num: 2) (dashboard:run-controls commondat runcontrols-dat tab-num: 3) (dashboard:run-times commondat runtimes-dat tab-num: 4) + (iup:vbox (iup:button "Pushme")) ;; tab 5 additional-views))) ;; (set! (iup:callback tabs tabchange-cb:) (lambda (a b c)(print "SWITCHED TO TAB: " a " " b " " c))) (iup:attribute-set! tabs "TABTITLE0" "Summary") (iup:attribute-set! tabs "TABTITLE1" "Runs") ;; (iup:attribute-set! tabs "TABTITLE2" "Runs2") (iup:attribute-set! tabs "TABTITLE2" "Run Summary") (iup:attribute-set! tabs "TABTITLE3" "Run Control") (iup:attribute-set! tabs "TABTITLE4" "Run Times") + (iup:attribute-set! tabs "TABTITLE5" "Sys Status") + ;; (iup:attribute-set! tabs "TABTITLE3" "New View") ;; (iup:attribute-set! tabs "TABTITLE4" "Run Control") ;; set the tab names for user added tabs (for-each @@ -3664,11 +3670,11 @@ ;;====================================================================== ;; The heavy lifting starts here ;;====================================================================== (define (dashboard-main) - (let ((mtdb-path (conc *toppath* "/megatest.db"))) ;; + (let ((mtdb-path (conc *toppath* "/megatest.db"))) ;; not used for now, update for .db area and use for write access detection #;(if (and (common:file-exists? mtdb-path) (file-writable? mtdb-path)) (if (not (args:get-arg "-skip-version-check")) (common:exit-on-version-changed))) (let* ((commondat (dboard:commondat-make))) Index: dbi.scm ================================================================== --- dbi.scm +++ dbi.scm @@ -17,7 +17,8 @@ ;; along with Megatest. If not, see . ;;====================================================================== (declare (unit dbi)) +(declare (uses autoload)) (include "dbi/dbi.scm") Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -44,15 +44,16 @@ db:get-ddb db:open-dbdat db:open-run-db db:open-inmem-db db:setup -db:get-main-lock +;; db:get-main-lock db:with-lock-db db:get-iam-server-lock db:get-locker db:take-lock +db:steal-lock-db db:release-lock db:general-sqlite-error-dump db:first-result-default db:generic-error-printout db:with-db @@ -226,10 +227,11 @@ db:get-cache-stmth db:register-server db:deregister-server db:get-server-info db:get-count-servers +db:get-servers-info db:get-steps-info-by-id make-dbr:dbdat dbr:dbdat-db dbr:dbdat-inmem @@ -511,11 +513,11 @@ ;; The lockname is the filename (can have many to one, run-id to fname ;;====================================================================== ;; only use for main.db - need to re-write some of this :( ;; -(define (db:get-main-lock dbfile) +#;(define (db:get-main-lock dbfile) (db:with-lock-db dbfile (lambda (dbh dbfile) (db:get-iam-server-lock dbh dbfile)))) (define (db:with-lock-db dbfile proc) @@ -524,38 +526,47 @@ ;; (sqlite3:finalize! dbh) res)) ;; called before db is open? ;; -(define (db:get-iam-server-lock dbh dbfname) +(define (db:get-iam-server-lock dbh dbfname host port) (sqlite3:with-transaction dbh (lambda () (let* ((locker (db:get-locker dbh dbfname))) (if locker - #f - (db:take-lock dbh dbfname)))))) + locker + (db:take-lock dbh dbfname port)))))) ;; (exn sqlite3) (define (db:get-locker dbh dbfname) (condition-case - (sqlite3:first-row dbh "SELECT owner_pid,owner_host,event_time FROM locks WHERE lockname=?;" dbfname) + (sqlite3:first-row dbh "SELECT owner_pid,owner_host,owner_port,event_time FROM locks WHERE lockname=?;" dbfname) (exn (sqlite3) #f))) ;; should never fail because it is run in a transaction with a test for the lock ;; -(define (db:take-lock dbh dbfname) +(define (db:take-lock dbh dbfname port) ;; (condition-case ;; (begin - (sqlite3:execute dbh "INSERT INTO locks (lockname,owner_pid,owner_host) VALUES (?,?,?);" dbfname (current-process-id) (get-host-name)) + (sqlite3:execute dbh "INSERT INTO locks (lockname,owner_pid,owner_host,owner_port) VALUES (?,?,?,?);" dbfname (current-process-id) (get-host-name) port) ;; #t) ;; (exn (sqlite3) #f))) #t) -(define (db:release-lock dbh dbfname) +(define (db:steal-lock-db dbh dbfname port) + (sqlite3:execute dbh "DELETE FROM locks WHERE lockname=?;" dbfname) + (sqlite3:execute dbh "INSERT INTO locks (lockname,owner_pid,owner_host,owner_port) VALUES (?,?,?,?);" dbfname (current-process-id) (get-host-name) port) + #t) + +(define (db:release-lock-force dbh dbfname) (sqlite3:execute dbh "DELETE FROM locks WHERE lockname=?;" dbfname)) +;; release a lock if it matches +(define (db:release-lock dbh dbfname host port) + (sqlite3:execute dbh "DELETE FROM locks WHERE lockname=? AND owner_host=? AND owner_port=?;" dbfname host port)) + ;;====================================================================== ;; SQLITE3 HELPERS ;;====================================================================== (define (db:general-sqlite-error-dump exn stmt . params) @@ -1515,10 +1526,11 @@ (sqlite3:execute db "CREATE TABLE IF NOT EXISTS locks (id INTEGER PRIMARY KEY, lockname TEXT, owner_pid INTEGER, owner_host TEXT, + owner_port TEXT, event_time TIMESTAMP DEFAULT (strftime('%s','now')), CONSTRAINT lock_constraint UNIQUE (lockname));") ;; maps to *srvpktspec* from http-transportmod (sqlite3:execute db "CREATE TABLE IF NOT EXISTS servers @@ -5855,11 +5867,12 @@ #f) ;; server already deregistered (begin (sqlite3:execute db "DELETE FROM servers WHERE apath=? AND dbname=?;" ;; (host,port,servkey,pid,ipaddr,apath,dbname) VALUES (?,?,?,?,?,?,?);" ;; host port servkey pid ipaddr apath dbname) - #;(db:get-server-info dbstruct apath dbname))))))))) + #;(db:get-server-info dbstruct apath dbname) + 'done)))))))) (define (db:get-server-info dbstruct apath dbname) (db:with-db dbstruct #f #f @@ -5882,7 +5895,20 @@ (max res count)) 0 db "SELECT count(*) FROM servers WHERE apath=?;" apath)))) + +(define (db:get-servers-info dbstruct apath) + (db:with-db + dbstruct + #f #f + (lambda (db) + (sqlite3:fold-row + (lambda (res . row) + (cons row res)) + '() + db + "SELECT * FROM servers WHERE apath=?;" + apath)))) ) Index: dcommon.scm ================================================================== --- dcommon.scm +++ dcommon.scm @@ -902,14 +902,14 @@ (servers-matrix (iup:matrix #:expand "YES" #:numcol 7 #:numcol-visible 7 #:numlin-visible 5 )) - (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) + (colnames (list "Id" "MTver" "Pid" "Host" "Interface:port" "Runtime" "State" "Db")) (updater (lambda () (if (dashboard:monitor-changed? commondat tabdat) - (let ((servers (server:get-list *toppath* limit: 10))) + (let ((servers (rmt:get-servers-info *toppath*)#;(server:get-list *toppath* limit: 10))) (iup:attribute-set! servers-matrix "NUMLIN" (length servers)) ;; (set! colnum 0) ;; (for-each (lambda (colname) ;; ;; (print "colnum: " colnum " colname: " colname) ;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) @@ -916,28 +916,25 @@ ;; (set! colnum (+ 1 colnum))) ;; colnames) (set! rownum 1) (for-each (lambda (server) - (set! colnum 0) - (match-let (((mod-time host port start-time server-id pid) + (set! colnum 0) ;; id host port servkey pid ipaddr apath dbname event_time + (match-let (((id host port server-id pid ipaddr apath dbname start-time) ;; (mod-time host port start-time server-id pid) server)) - (let* ((uptime (- (current-seconds) mod-time)) - (runtime (if start-time + (let* ((uptime (- (current-seconds) start-time)) + #;(runtime (if start-time (- mod-time start-time) 0)) - (vals (list "-" ;; (vector-ref server 0) ;; Id + (vals (list server-id ;; (vector-ref server 0) ;; Id "-" ;; (vector-ref server 9) ;; MT-Ver pid ;; (vector-ref server 1) ;; Pid host ;; (vector-ref server 2) ;; Hostname (conc host ":" port) ;; (conc (vector-ref server 3) ":" (vector-ref server 4)) ;; IP:Port - (seconds->hr-min-sec runtime) ;; (- (current-seconds) start-time)) ;; (vector-ref server 6))) - (cond - ((< uptime 5) "alive") - ((< uptime 16) "probably alive");; less than 15 seconds since mod, call it alive (vector-ref server 8) ;; State - (else "dead")) - "-" ;; (vector-ref server 12) ;; RunId + (seconds->hr-min-sec uptime) ;; Runtime + "Running" ;; State - Do some kind of ping here + dbname ;; Db ))) (for-each (lambda (val) (let* ((row-col (conc rownum ":" colnum)) (curr-val (iup:attribute servers-matrix row-col))) (if (not (equal? (conc val) curr-val)) Index: debugprint.scm ================================================================== --- debugprint.scm +++ debugprint.scm @@ -8,20 +8,24 @@ (import scheme chicken.base chicken.string chicken.port chicken.process-context + chicken.process-context.posix + (prefix mtargs args:) srfi-1 + system-information ) ;;====================================================================== ;; debug stuff ;;====================================================================== (define verbosity (make-parameter '())) (define *default-log-port* (current-error-port)) +(define debug:print-logger (make-parameter #f)) ;; se to a proc to call on every logging print (define (debug:setup) (let ((debugstr (or (args:get-arg "-debug") (args:get-arg "-debug-noprop") (get-environment-variable "MT_DEBUG_MODE")))) @@ -99,25 +103,35 @@ ((and (number? vb) (list? n)) (member vb n)) (else #f)))) +(define (debug:handle-remote-logging params) + (if (debug:print-logger) ;; NOTE: turn params into string to keep things simple for now + ((debug:print-logger)(conc "REMOTE ("(get-host-name)", pid="(current-process-id)") " + (string-intersperse (map conc params) " ") "; " + (string-intersperse (command-line-arguments) " "))))) + (define (debug:print n e . params) (if (debug:debug-mode n) (with-output-to-port (or e (current-error-port)) (lambda () ;; (if *logging* ;; (db:log-event (apply conc params)) (apply print params) - )))) ;; ) + (debug:handle-remote-logging params) + ))) + #t ;; only here to make remote stuff happy. It'd be nice to fix that ... + ) (define (debug:print-error n e . params) ;; normal print (if (debug:debug-mode n) (with-output-to-port (if (port? e) e (current-error-port)) (lambda () (apply print "ERROR: " params) + (debug:handle-remote-logging (cons "ERROR: " params)) ))) ;; pass important messages to stderr (if (and (eq? n 0)(not (eq? e (current-error-port)))) (with-output-to-port (current-error-port) (lambda () @@ -127,8 +141,16 @@ (define (debug:print-info n e . params) (if (debug:debug-mode n) (with-output-to-port (if (port? e) e (current-error-port)) (lambda () (apply print "INFO: (" n ") " params) ;; res) + (debug:handle-remote-logging (cons "INFO: " params)) )))) +(define (debug:print-warn n e . params) + (if (debug:debug-mode n) + (with-output-to-port (if (port? e) e (current-error-port)) + (lambda () + (apply print "WARN: (" n ") " params) ;; res) + (debug:handle-remote-logging (cons "WARN: " params)) + )))) ) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -437,10 +437,11 @@ - to automatically figure out hostname -adjutant C,M : start the server/adjutant with allocated cores C and Mem M (Gig), use 0,0 to auto use full machine -transport http|rpc : use http or rpc for transport (default is http) -log logfile : send stdout and stderr to logfile + -autolog logfilebase : appends pid and host to logfilebase for logfile -list-servers : list the servers -kill-servers : kill all servers -repl : start a repl (useful for extending megatest) -load file.scm : load and run file.scm -mark-incompletes : find and mark incomplete tests @@ -630,10 +631,11 @@ "-run-id" "-ping" "-refdb2dat" "-o" "-log" + "-autolog" "-sync-log" "-since" "-fields" "-recover-test" ;; run-id,test-id - used internally to recover a test stuck in RUNNING state "-sort" @@ -784,20 +786,24 @@ ;; this segment will run launch:setup only if -log is not set. This is fairly safe as servers are not ;; manually started and thus should never be started in a non-megatest area. Thus no need to handle situation ;; where (launch:setup) returns #f? ;; - (if (or (args:get-arg "-log")#;(args:get-arg "-server")) ;; redirect the log always when a server + (if (or (args:get-arg "-log") ;;(args:get-arg "-server") ;; redirect the log always when a server + (args:get-arg "-autolog")) (handle-exceptions exn (begin (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) ) - (let* ((tl (or (args:get-arg "-log")(launch:setup))) ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified - (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name - (conc tl "/logs/server-" (current-process-id) "-" (get-host-name) ".log"))) - (oup (open-logfile logf))) + (let* ((tl (or (args:get-arg "-log") + (args:get-arg "-autolog") ;; autolog provides the basename .../logs/something- for the logfile + (launch:setup))) ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified + (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name + (conc tl (current-process-id)"-"(get-host-name)".log") + (conc tl "/logs/server-" (current-process-id) "-" (get-host-name) ".log"))) + (oup (open-logfile logf))) (if (not (args:get-arg "-log")) (hash-table-set! args:arg-hash "-log" logf)) ;; fake out future queries of -log (debug:print-info 0 *default-log-port* "Sending log output to " logf) (set! *default-log-port* oup)))) @@ -1135,38 +1141,40 @@ (if (or (args:get-arg "-list-servers") (args:get-arg "-kill-servers")) (let ((tl (launch:setup))) (if tl ;; all roads from here exit - (let* ((servers (server:get-list *toppath*)) + (let* ((servers (rmt:get-servers-info *toppath*)) (fmtstr "~8a~22a~20a~20a~8a\n")) - (format #t fmtstr "pid" "Interface:port" "age (hms)" "Last mod" "State") - (format #t fmtstr "===" "==============" "=========" "========" "=====") + ;; id INTEGER PRIMARY KEY, + ;; host TEXT, + ;; port INTEGER, + ;; servkey TEXT, + ;; pid TEXT, + ;; ipaddr TEXT, + ;; apath TEXT, + ;; dbname TEXT, + ;; event_time + (format #t fmtstr "pid" "Interface:port" "State" "dbname" "apath") + (format #t fmtstr "===" "==============" "=====" "======" "=====") (for-each ;; ( mod-time host port start-time pid ) (lambda (server) - (let* ((mtm (any->number (car server))) - (mod (if mtm (- (current-seconds) mtm) "unk")) - (age (- (current-seconds)(or (any->number (list-ref server 3)) (current-seconds)))) - (url (conc (cadr server) ":" (caddr server))) - (pid (list-ref server 4)) - (alv (if (number? mod)(< mod 10) #f))) - (format #t - fmtstr - pid - url - (seconds->hr-min-sec age) - (seconds->hr-min-sec mod) - (if alv "alive" "dead")) - (if (and alv - (args:get-arg "-kill-servers")) + (match-let + (((id host port servkey pid ipaddr apath dbname event_time) server)) + (format #t + fmtstr + pid + (conc host":"port) + (if (server-ready? host port servkey) "Running" "Dead") + dbname ;; (seconds->hr-min-sec mod) + apath + ) + (if (args:get-arg "-kill-servers") (begin (debug:print-info 0 *default-log-port* "Attempting to kill server with pid " pid " !!needs completion!!") #;(server:kill server))))) - (sort servers (lambda (a b) - (let ((ma (or (any->number (car a)) 9e9)) - (mb (or (any->number (car b)) 9e9))) - (> ma mb))))) + servers) ;; (debug:print-info 1 *default-log-port* "Done with listservers") (set! *didsomething* #t) (exit)) (exit)))) ;; must do, would have to add checks to many/all calls below Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -133,26 +133,26 @@ ) (define (servdat->url sdat) (conc (servdat-host sdat)":"(servdat-port sdat))) - ;; generate entries for ~/.megatestrc with the following ;; ;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u -(defstruct rmt:remote - (conns (make-hash-table)) ;; apath/dbname => rmt:conn +(defstruct remotedat + (conns (make-hash-table)) ;; apath/dbname => conndat ) -(defstruct rmt:conn +(defstruct conndat (apath #f) (dbname #f) (fullname #f) (hostport #f) (ipaddr #f) (port #f) + (socket #f) (srvpkt #f) (srvkey #f) (lastmsg 0) (expires 0) (inport #f) @@ -169,16 +169,16 @@ ;;====================================================================== ;; S U P P O R T F U N C T I O N S ;;====================================================================== ;; replaces *runremote* -(define *rmt:remote* (make-rmt:remote)) +(define *remotedat* (make-remotedat)) ;; -> http://abc.com:900/ ;; -(define (rmt:conn->uri conn entrypoint) - (conc "http://"(rmt:conn-ipaddr conn)":"(rmt:conn-port conn)"/"entrypoint)) +(define (conndat->uri conn entrypoint) + (conc "http://"(conndat-ipaddr conn)":"(conndat-port conn)"/"entrypoint)) ;; set up the api proc, seems like there should be a better place for this? (define api-proc (make-parameter conc)) (api-proc api:process-request) @@ -187,15 +187,15 @@ ;; ;; else setup a connection ;; ;; if that fails, return '(#f "some reason") ;; NB// convert to raising an exception ;; -(define (rmt:get-conn remote apath dbname) - (let* ((fullname (db:dbname->path apath dbname)) ;; we'll switch to full name later - (conn (hash-table-ref/default (rmt:remote-conns remote) dbname #f))) +(define (rmt:get-conn remdat apath dbname) + (let* ((fullname (db:dbname->path apath dbname)) + (conn (hash-table-ref/default (remotedat-conns remdat) fullname #f))) (if (and conn - (< (current-seconds) (rmt:conn-expires conn))) + (< (current-seconds) (conndat-expires conn))) conn #f))) (define (rmt:find-main-server apath dbname) (let* ((pktsdir (get-pkts-dir apath)) @@ -202,69 +202,105 @@ (all-srvpkts (get-all-server-pkts pktsdir *srvpktspec*)) ;; (dbpath (conc apath "/" dbname)) (viable-srvs (get-viable-servers all-srvpkts dbname))) (get-the-server apath viable-srvs))) -;; looks for a connection to main + +(define *connstart-mutex* (make-mutex)) +(define *last-main-start* 0) + +;; looks for a connection to main, returns if have and not exired +;; creates new otherwise +;; ;; connections for other servers happens by requesting from main ;; ;; TODO: This is unnecessarily re-creating the record in the hash table ;; -(define (rmt:open-main-connection remote apath) - (let* ((dbname (db:run-id->dbname #f)) - (the-srv (rmt:find-main-server apath dbname)) - (start-main-srv (lambda () - ;; srv not ready, delay a little and try again - (api:run-server-process apath dbname) - (thread-sleep! 4) - (rmt:open-main-connection remote apath) ;; TODO: Add limit to number of tries - ))) - (if the-srv ;; yes, we have a server, now try connecting to it - (let* ((srv-addr (server-address the-srv)) - (ipaddr (alist-ref 'ipaddr the-srv)) - (port (alist-ref 'port the-srv)) - (srvkey (alist-ref 'servkey the-srv)) - (fullpath (db:dbname->path apath dbname)) - (srvready (server-ready? ipaddr port srvkey))) - (if srvready - (begin - (hash-table-set! (rmt:remote-conns remote) - dbname ;; fullpath ;; yes, I'd prefer it to be fullpath - FIXME later - (make-rmt:conn - apath: apath - dbname: dbname - fullname: fullpath - hostport: srv-addr - ipaddr: ipaddr - port: port - srvpkt: the-srv - srvkey: srvkey ;; generated by rmt:get-signature on the server side - lastmsg: (current-seconds) - expires: (+ (current-seconds) 60) ;; this needs to be gathered during the ping - )) - #t) - (start-main-srv))) - (start-main-srv)))) - -;; NB// remote is a rmt:remote struct +(define (rmt:open-main-connection remdat apath) + (let* ((fullpath (db:dbname->path apath "/.db/main.db")) + (conns (remotedat-conns remdat)) + (conn (hash-table-ref/default conns fullpath #f))) ;; TODO - create call for this + (cond + ((and conn ;; conn is NOT a socket, just saying ... + (< (current-seconds) (conndat-expires conn))) + #t) ;; we are current and good to go - we'll deal elsewhere with a server that was killed or died + ((and conn + (>= (current-seconds)(conndat-expires conn))) + (debug:print-info 0 *default-log-port* "connection to "fullpath" server expired. Reconnecting.") + (if (conndat-socket conn) + (nng-close! (conndat-socket conn))) + (hash-table-set! conns fullpath #f) ;; clean up + (rmt:open-main-connection remdat apath)) + (else + ;; Below we will find or create and connect to main + (let* ((dbname (db:run-id->dbname #f)) + (the-srv (rmt:find-main-server apath dbname)) + (start-main-srv (lambda () ;; call IF there is no the-srv found + (mutex-lock! *connstart-mutex*) + (if (> (- (current-seconds) *last-main-start*) 5) ;; at least four seconds since last attempt to start main server + (begin + (api:run-server-process apath dbname) + (set! *last-main-start* (current-seconds)) + (thread-sleep! 1))) + (mutex-unlock! *connstart-mutex*) + (rmt:open-main-connection remdat apath) ;; TODO: Add limit to number of tries + ))) + (if (not the-srv) ;; have server, try connecting to it + (start-main-srv) + (let* ((srv-addr (server-address the-srv)) ;; need serv + (ipaddr (alist-ref 'ipaddr the-srv)) + (port (alist-ref 'port the-srv)) + (srvkey (alist-ref 'servkey the-srv)) + (fullpath (db:dbname->path apath dbname)) + + (new-the-srv (make-conndat + apath: apath + dbname: dbname + fullname: fullpath + hostport: srv-addr + socket: (open-nn-connection srv-addr) + ipaddr: ipaddr + port: port + srvpkt: the-srv + srvkey: srvkey ;; generated by rmt:get-signature on the server side + lastmsg: (current-seconds) + expires: (+ (current-seconds) 60) ;; this needs to be gathered during the ping + ))) + (hash-table-set! conns fullpath new-the-srv))) + #t))))) + +;; NB// remdat is a remotedat struct ;; -(define (rmt:general-open-connection remote apath dbname #!key (num-tries 5)) - (let* ((mdbname (db:run-id->dbname #f)) - (mconn (rmt:get-conn remote apath mdbname))) +(define (rmt:general-open-connection remdat apath dbname #!key (num-tries 5)) + (assert (not (equal? dbname ".db/main.db")) "ERROR: general-open-connection should never be called with main as the db") + (let* ((mdbname (db:run-id->dbname #f)) + (fullname (db:dbname->path apath dbname)) + (conns (remotedat-conns remdat)) + (mconn (rmt:get-conn remdat apath mdbname))) + (if (and mconn + (not (debug:print-logger))) + (begin + (debug:print-info 0 *default-log-port* "Turning on logging to main, look in logs dir for main log.") + (debug:print-logger rmt:log-to-main))) (cond ((or (not mconn) ;; no channel open to main? - (< (rmt:conn-expires mconn)(+ (current-seconds) 2))) ;; restablish connection if less than 2 seconds on the lease - (rmt:open-main-connection remote apath) - (rmt:general-open-connection remote apath mdbname)) - ((not (rmt:get-conn remote apath dbname)) ;; no channel open to dbname? - (let* ((res (rmt:send-receive-real remote apath mdbname 'get-server `(,apath ,dbname)))) + (< (conndat-expires mconn)(+ (current-seconds) 2))) ;; restablish connection if less than 2 seconds on the lease + (if mconn ;; previously opened - clean up NB// consolidate this with the similar code in open main above + (begin + (debug:print-info 0 *default-log-port* "Clearing out connection to main that has expired.") + (nng-close! (conndat-socket mconn)) + (hash-table-set! conns fullname #f))) + (rmt:open-main-connection remdat apath) + (rmt:general-open-connection remdat apath mdbname)) + ((not (rmt:get-conn remdat apath dbname)) ;; no channel open to dbname? + (let* ((res (rmt:send-receive-real remdat apath mdbname 'get-server `(,apath ,dbname)))) (case res ((server-started) (if (> num-tries 0) (begin (thread-sleep! 2) - (rmt:general-open-connection remote apath dbname num-tries: (- num-tries 1))) + (rmt:general-open-connection remdat apath dbname num-tries: (- num-tries 1))) (begin (debug:print-error 0 *default-log-port* "Failed to start servers needed or open channel to "apath", "dbname) (exit 1)))) (else (if (list? res) ;; server has been registered and the info was returned. pass it on. @@ -275,16 +311,17 @@ ;; ".db/1.db") (match res ((host port servkey pid ipaddr apath dbname) (debug:print-info 0 *default-log-port* "got "res) - (hash-table-set! (rmt:remote-conns remote) - dbname - (make-rmt:conn + (hash-table-set! conns + fullname + (make-conndat apath: apath dbname: dbname hostport: (conc host":"port) + socket: (open-nn-connection (conc host":"port)) ipaddr: ipaddr port: port srvkey: servkey lastmsg: (current-seconds) expires: (+ (current-seconds) 60)))) @@ -291,72 +328,78 @@ (else (debug:print-info 0 *default-log-port* "return data from starting server did not match host port servkey pid ipaddr apath dbname " res))) res) (begin (debug:print-info 0 *default-log-port* "Unexpected result: " res) - res)))))) - - - ))) + res))))))) + + #t)) ;;====================================================================== ;; FOR DEBUGGING SET TO #t -(define *localmode* #t) +;; (define *localmode* #t) +(define *localmode* #f) (define *dbstruct* (make-dbr:dbstruct)) ;; Defaults to current area ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) - (if (not *rmt:remote*)(set! *rmt:remote* (make-rmt:remote))) + ;; (if (not *remotedat*)(set! *remotedat* (make-remotedat))) (let* ((apath *toppath*) - (conns *rmt:remote*) + (remdat *remotedat*) + (conns (remotedat-conns remdat)) ;; just checking that remdat is a remotedat (dbname (db:run-id->dbname rid))) (if *localmode* (let* ((dbdat (dbr:dbstruct-get-dbdat *dbstruct* dbname)) (indat `((cmd . ,cmd)(params . ,params)))) (api:process-request *dbstruct* indat) ;; (api:process-request dbdat indat) ) (begin - (rmt:general-open-connection conns apath dbname) - (rmt:send-receive-real conns apath dbname cmd params))))) + (rmt:open-main-connection remdat apath) + (if rid (rmt:general-open-connection remdat apath dbname)) + (rmt:send-receive-real remdat apath dbname cmd params))))) #;(define (rmt:send-receive-setup conn) - (if (not (rmt:conn-inport conn)) - (let-values (((i o) (tcp-connect (rmt:conn-ipaddr conn) - (rmt:conn-port conn)))) - (rmt:conn-inport-set! conn i) - (rmt:conn-outport-set! conn o)))) + (if (not (conndat-inport conn)) + (let-values (((i o) (tcp-connect (conndat-ipaddr conn) + (conndat-port conn)))) + (conndat-inport-set! conn i) + (conndat-outport-set! conn o)))) ;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed ;; sometime in the future ;; -(define (rmt:send-receive-real remote apath dbname cmd params) - (let* ((conn (rmt:get-conn remote apath dbname))) +(define (rmt:send-receive-real remdat apath dbname cmd params) + (let* ((conn (rmt:get-conn remdat apath dbname))) (assert conn "FATAL: rmt:send-receive-real called without the needed channels opened") - (let* ((key #f) - (host (rmt:conn-ipaddr conn)) - (port (rmt:conn-port conn)) + (assert (conndat-socket conn) "FATAL: rmt:send-receive-real called without the channel socket opened.") + (let* ((soc (conndat-socket conn)) + (key #f) + (host (conndat-ipaddr conn)) + (port (conndat-port conn)) (payload `((cmd . ,cmd) - (key . ,(rmt:conn-srvkey conn)) + (key . ,(conndat-srvkey conn)) (params . ,params))) - (res (open-send-receive-nn (conc host":"port) - (sexpr->string payload)))) - (string->sexpr res)))) + (res (send-receive-nn soc ;; (open-send-receive-nn (conc host":"port) + (sexpr->string payload)))) + (if (member res '("#")) ;; TODO - fix this in string->sexpr + #f + (string->sexpr res))))) ;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed ;; sometime in the future. ;; ;; Purpose - call the main.db server and request a server be started ;; for the given area path and dbname ;; -;; (define (rmt:send-receive-server-start remote apath dbname) -;; (let* ((conn (rmt:get-conn remote apath dbname))) +;; (define (rmt:send-receive-server-start remdat apath dbname) +;; (let* ((conn (rmt:get-conn remdat apath dbname))) ;; (assert conn "FATAL: Unable to connect to db "apath"/"dbname) ;; #;(let* ((res (with-input-from-request -;; (rmt:conn->uri conn "api") +;; (conndat->uri conn "api") ;; `((params . (,apath ,dbname))) ;; read-string))) ;; (string->sexpr res)))) (define (rmt:print-db-stats) @@ -406,14 +449,17 @@ ;;====================================================================== ;; S E R V E R ;;====================================================================== (define (rmt:kill-server run-id) - (rmt:send-receive 'kill-server run-id (list run-id))) + (rmt:send-receive 'kill-server #f (list run-id))) (define (rmt:start-server run-id) - (rmt:send-receive 'start-server 0 (list run-id))) + (rmt:send-receive 'start-server #f (list run-id))) + +(define (rmt:get-server-info apath dbname) + (rmt:send-receive 'get-server-info #f (list apath dbname))) ;;====================================================================== ;; M I S C ;;====================================================================== @@ -764,10 +810,13 @@ ) ;; ) (define (rmt:get-main-run-stats run-id) (rmt:send-receive 'get-main-run-stats #f (list run-id))) +(define (rmt:log-to-main . params) + (rmt:send-receive 'log-to-main #f params)) + (define (rmt:get-var run-id varname) (rmt:send-receive 'get-var run-id (list run-id varname))) (define (rmt:del-var run-id varname) (rmt:send-receive 'del-var run-id (list run-id varname))) @@ -1471,54 +1520,56 @@ (define (common:api-changed?) (not (equal? (substring (->string megatest-version) 0 4) (substring (conc (common:get-last-run-version)) 0 4)))) -(define (rmt:server-shutdown) +;; host and port are used to ensure we are remove proper records +(define (rmt:server-shutdown host port) (let ((dbfile (servdat-dbfile *server-info*))) (debug:print-info 0 *default-log-port* "dbfile is "dbfile) (if dbfile (let* ((am-server (args:get-arg "-server")) (dbfile (args:get-arg "-db")) (apath *toppath*) - (dbdat (db:get-dbdat *dbstruct-db* apath dbfile)) - (db (dbr:dbdat-db dbdat)) - (inmem (dbr:dbdat-db dbdat)) - ) - ;; do a final sync here - (debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds)) - (db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t) - ;; let's finalize here - (debug:print-info 0 *default-log-port* "Finalizing db and inmem") - (sqlite3:finalize! db) - (sqlite3:finalize! inmem) - (debug:print-info 0 *default-log-port* "Finalizing db and inmem complete") - (if am-server + (remdat *remotedat*)) ;; foundation for future fix + (if *dbstruct-db* + (let* ((dbdat (db:get-dbdat *dbstruct-db* apath dbfile)) + (db (dbr:dbdat-db dbdat)) + (inmem (dbr:dbdat-db dbdat)) + ) + ;; do a final sync here + (debug:print-info 0 *default-log-port* "Doing final sync for "apath" "dbfile" at "(current-seconds)) + (db:sync-inmem->disk *dbstruct-db* apath dbfile force-sync: #t) + ;; let's finalize here + (debug:print-info 0 *default-log-port* "Finalizing db and inmem") + (if (sqlite3:database? db) + (sqlite3:finalize! db) + (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, db is not a database, not finalizing...")) + (if (sqlite3:database? inmem) + (sqlite3:finalize! inmem) + (debug:print-info 0 *default-log-port* "in rmt:server-shutdown, inmem is not a database, not finalizing...")) + (debug:print-info 0 *default-log-port* "Finalizing db and inmem complete")) + (debug:print-info 0 *default-log-port* "Db was never opened, no cleanup to do.")) + (if (not am-server) + (debug:print-info 0 *default-log-port* "I am not a server, should NOT get here!") (if (string-match ".*/main.db$" dbfile) (let ((pkt-file (conc (get-pkts-dir *toppath*) "/" (servdat-uuid *server-info*) ".pkt"))) (debug:print-info 0 *default-log-port* "removing pkt "pkt-file) (delete-file* pkt-file) - (debug:print-info 0 *default-log-port* "Releasing lock for "dbfile) - (db:with-lock-db (servdat-dbfile *server-info*) - (lambda (dbh dbfile) - (db:release-lock dbh dbfile)))) + (debug:print-info 0 *default-log-port* "Releasing lock (if any) for "dbfile ", host "host", port "port) + (db:with-lock-db + (servdat-dbfile *server-info*) + (lambda (dbh dbfile) + (db:release-lock dbh dbfile host port)))) ;; I'm not the server - should not have a lock to remove (let* ((sdat *server-info*) ;; we have a run-id server (host (servdat-host sdat)) (port (servdat-port sdat)) - (uuid (servdat-uuid sdat))) - (if (not (string-match ".db/main.db" (args:get-arg "-db"))) - (let* ((res (rmt:deregister-server *rmt:remote* ;; TODO/BUG: why is this requiring *rmt:remote*? - *toppath* - (servdat-host *server-info*) ;; iface - (servdat-port *server-info*) - (servdat-uuid *server-info*) - (current-process-id) - ))) - (debug:print-info 0 *default-log-port* "deregistered-server, res="res))) - + (uuid (servdat-uuid sdat)) + (res (rmt:deregister-server remdat *toppath* host port uuid dbfile))) + (debug:print-info 0 *default-log-port* "deregistered-server, res="res) (debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid) ))))))) (define (std-exit-procedure) ;;(common:telemetry-log-close) @@ -1537,11 +1588,12 @@ (let* ((start-time (current-seconds))) (if (and *server-info* *unclean-shutdown*) (begin (debug:print-info 0 *default-log-port* "Unclean server exit, calling server-shtudown") - (rmt:server-shutdown))) + (rmt:server-shutdown (servdat-host *server-info*) + (servdat-port *server-info*)))) (debug:print-info 0 *default-log-port* "Shutdown activities completed in "(- (current-seconds) start-time)" seconds")) ;; (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated #;(if (bdat-task-db *bdat*) ;; TODO: Check that this is correct for task db (let ((db (cdr (bdat-task-db *bdat*)))) (if (sqlite3:database? db) @@ -1581,12 +1633,17 @@ ;; called in megatest.scm, host-port is string hostname:port ;; ;; NOTE: This is NOT called directly from clients as not all transports support a client running ;; in the same process as the server. ;; -(define (server:ping host port server-id #!key (do-exit #f)) - (server-ready? host port server-id)) +;; conn is a conndat record +;; +(define (server:ping conn #!key (do-exit #f)) + (let* ((req (conndat-socket conn)) + (srvkey (conndat-srvkey conn)) + (msg (sexpr->string '(ping ,srvkey)))) + (send-receive-nn req msg))) ;; (server-ready? host port server-id)) ;;====================================================================== ;; http-transportmod.scm contents moved here ;;====================================================================== @@ -1600,24 +1657,25 @@ ;; ====================================================================== (define (http-get-function fnkey) (hash-table-ref/default *http-functions* fnkey (lambda () "nothing here yet"))) +;; Main entry point to start a server. was start-server (define (rmt:run hostn) ;; ;; Configurations for server ;; (tcp-buffer-size 2048) ;; (max-connections 2048) - (debug:print 2 *default-log-port* "Attempting to start the server ...") + (debug:print 2 *default-log-port* "PID: "(current-process-id)". Attempting to start the server ...") (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily (hostname (get-host-name)) (ipaddrstr (let ((ipstr (if (string=? "-" hostn) ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") (server:get-best-guess-address hostname) #f))) (if ipstr ipstr hostn))) ;; hostname))) (port (portlogger:open-run-close portlogger:find-port)) - (link-tree-path (common:get-linktree)) + ;; (link-tree-path (common:get-linktree)) ;; (tmp-area (common:get-db-tmp-area)) #;(start-file (conc tmp-area "/.server-start"))) (debug:print-info 0 *default-log-port* "portlogger recommended port: " port) (if *server-info* (begin @@ -1638,13 +1696,16 @@ (set! *db-last-access* (current-seconds)) (nng-send rep resdat) (loop (nng-recv rep))))))) (debug:print-info 0 *default-log-port* "After server, should never see this") ;; server exit stuff here - (let* ((portnum (servdat-port *server-info*))) + (let* ((portnum (servdat-port *server-info*)) + (host (servdat-host *server-info*))) (portlogger:open-run-close portlogger:set-port portnum "released") - (rmt:server-shutdown) + (if (not (equal? (get-host-name) host)) + (debug:print-info 0 *default-log-port* "Server shutdown called for host "host", but we are on "(get-host-name)) + (rmt:server-shutdown host portnum)) ;; (bdat-time-to-exit-set! *bdat* #t) ;; tell on-exit to be fast as we've already cleaned up (portlogger:open-run-close portlogger:set-port port "released") ;; done in rmt:run ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) ;; (debug:print-info 0 *default-log-port* "Average cached write time " @@ -1676,82 +1737,70 @@ (set! *server-info* (make-servdat host: ipaddrstr port: portnum))) (debug:print-info 0 *default-log-port* "rmt:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum) - (if (is-port-in-use portnum) - (begin - (portlogger:open-run-close portlogger:set-failed portnum) - (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") - ;; (thread-sleep! 0.1) - (rmt:try-start-server ipaddrstr - (portlogger:open-run-close - portlogger:find-port))) - (begin - (if (not *server-info*) - (set! *server-info* (make-servdat - host: ipaddrstr - port: portnum))) - (servdat-status-set! *server-info* 'starting) - (servdat-port-set! *server-info* portnum) - (if (not (servdat-rep *server-info*)) - (let ((rep (make-rep-socket))) - (servdat-rep-set! *server-info* rep) - (socket-set! rep 'nng/recvtimeo 2000))) - (let* ((rep (servdat-rep *server-info*))) - (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) - (handle-exceptions - exn - (begin - (print-error-message exn) - (if (< portnum 64000) - (begin - (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 5 *default-log-port* "exn=" (condition->list exn)) - (portlogger:open-run-close portlogger:set-failed portnum) - (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") - ;; (thread-sleep! 0.1) - (rmt:try-start-server ipaddrstr - (portlogger:open-run-close portlogger:find-port))) - (begin - (print "ERROR: Tried and tried but could not start the server, stopping at port "portnum)))) - (nng-listen rep (conc "tcp://*:" portnum)) - rep))))) + (assert (servdat? *server-info*) "FATAL: Must always have *server-info* properly set up by here.") + (servdat-status-set! *server-info* 'starting) + (servdat-port-set! *server-info* portnum) + (if (not (servdat-rep *server-info*)) + (let ((rep (make-rep-socket))) + (servdat-rep-set! *server-info* rep) + (socket-set! rep 'nng/recvtimeo 2000))) + (let* ((rep (servdat-rep *server-info*))) + (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) + (handle-exceptions + exn + (begin + (print-error-message exn) + (if (< portnum 64000) + (begin + (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (portlogger:open-run-close portlogger:set-failed portnum) + (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") + ;; (thread-sleep! 0.1) + (rmt:try-start-server ipaddrstr + (portlogger:open-run-close portlogger:find-port))) + (begin + (print "ERROR: Tried and tried but could not start the server, stopping at port "portnum)))) + (nng-listen rep (conc "tcp://*:" portnum)) + rep))) ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== ;;====================================================================== ;; C L I E N T S ;;====================================================================== -(define (http-transport:get-time-to-cleanup) +(define (rmt:get-time-to-cleanup) (let ((res #f)) (mutex-lock! *http-mutex*) (set! res (> (current-seconds) *http-connections-next-cleanup*)) (mutex-unlock! *http-mutex*) res)) -(define (http-transport:inc-requests-count) +(define (rmt:inc-requests-count) (mutex-lock! *http-mutex*) (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)) ;; Use this opportunity to slow things down iff there are too many requests in flight (if (> *http-requests-in-progress* 5) (begin (debug:print-info 0 *default-log-port* "Whoa there buddy, ease up...") (thread-sleep! 1))) (mutex-unlock! *http-mutex*)) -(define (http-transport:dec-requests-count proc) +(define (rmt:dec-requests-count proc) (mutex-lock! *http-mutex*) (proc) (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) (mutex-unlock! *http-mutex*)) -(define (http-transport:dec-requests-count-and-close-all-connections) +(define (rmt:dec-requests-count-and-close-all-connections) (set! *http-requests-in-progress* (- *http-requests-in-progress* 1)) (let loop ((etime (+ (current-seconds) 5))) ;; give up in five seconds (if (> *http-requests-in-progress* 0) (if (> etime (current-seconds)) (begin @@ -1761,68 +1810,37 @@ "requests still in progress after 5 seconds of waiting. I'm going to pass on cleaning up http connections")) #;(close-idle-connections!))) (set! *http-connections-next-cleanup* (+ (current-seconds) 10)) (mutex-unlock! *http-mutex*)) -(define (http-transport:inc-requests-and-prep-to-close-all-connections) +(define (rmt:inc-requests-and-prep-to-close-all-connections) (mutex-lock! *http-mutex*) (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*))) -;; careful closing of connections stored in *runremote* -;; -(define (http-transport:close-connections #!key (area-dat #f)) - (debug:print-info 0 *default-log-port* "http-transport:close-connections doesn't do anything now!")) -;; (let* ((runremote (or area-dat *runremote*)) -;; (server-dat (if runremote -;; (remote-conndat runremote) -;; #f))) ;; (hash-table-ref/default *runremote* run-id #f))) -;; (if (vector? server-dat) -;; (let ((api-dat (http-transport:server-dat-get-api-uri server-dat))) -;; (handle-exceptions -;; exn -;; (begin -;; (print-call-chain *default-log-port*) -;; (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) -;; (close-connection! api-dat) -;; ;;(close-idle-connections!) -;; #t)) -;; #f))) - - - -;; initialize servdat for client side, setup needed parameters -;; pass in #f as sdat-in to create sdat -;; -#;(define (servdat-init sdat-in iface port uuid) - (let* ((sdat (or sdat-in (make-servdat)))) - - (assert #f "This is a bad idea.") - - (if uuid (servdat-uuid-set! sdat uuid)) - (servdat-host-set! sdat iface) - (servdat-port-set! sdat port) - (servdat-api-url-set! sdat (conc "http://" iface ":" port "/api")) - (servdat-api-uri-set! sdat (uri-reference (servdat-api-url sdat))) - (servdat-api-req-set! sdat (make-request method: 'POST - uri: (servdat-api-uri sdat))) - ;; set up the http-client parameters - (max-retry-attempts 1) - ;; consider all requests indempotent - (retry-request? (lambda (request) - #f)) - (determine-proxy (constantly #f)) - sdat)) - ;;====================================================================== ;; NEW SERVER METHOD ;;====================================================================== ;; only use for main.db - need to re-write some of this :( ;; -(define (get-lock-db sdat dbfile) - (let* ((dbh (db:open-run-db dbfile db:initialize-db)) - (res (db:get-iam-server-lock dbh dbfile))) +(define (get-lock-db sdat dbfile host port) + (assert host "FATAL: get-lock-db called with host not set.") + (assert port "FATAL: get-lock-db called with port not set.") + (let* ((dbh (db:open-run-db dbfile db:initialize-db)) ;; open-run-db creates a standard db with schema used by all situations + (res (db:get-iam-server-lock dbh dbfile host port))) + ;; res => list then already locked, check server is responsive + ;; => #t then sucessfully got the lock + ;; => #f reserved for future use as to indicate something went wrong + (match res + ((owner_pid owner_host owner_port event_time) + (if (server-ready? owner_host owner_port "abc") + #f ;; locked by someone else + (begin ;; locked by someone dead and gone + (debug:print 0 *default-log-port* "WARNING: stale lock - have to steal it. This may fail.") + (db:steal-lock-db dbh dbfile port)))) + (#t #t) ;; placeholder so that we don't touch res if it is #t + (else (set! res #f))) (sqlite3:finalize! dbh) res)) (define (register-server pkts-dir pkt-spec host port servkey ipaddr dbpath) @@ -1867,47 +1885,18 @@ (define (server-address srv-pkt) (conc (alist-ref 'host srv-pkt) ":" (alist-ref 'port srv-pkt))) (define (server-ready? host port key) ;; server-address is host:port -;; (let-values (((i o)(handle-exceptions -;; exn -;; (values #f #f) -;; (tcp-connect host port)))) -;; (if (and i o) (let* ((data (sexpr->string `((cmd . ping) (key . ,key) (params . ())))) (res (open-send-receive-nn (conc host ":" port) data))) - (string->sexpr res))) - -;; (let ((res (with-input-from-port i -;; read))) -;; (close-output-port o) -;; (close-input-port i) -;; res)) -;; (if (string? res) -;; (string->sexpr res) -;; res))) -;; (begin ;; connection failed -;; (debug:print-info 0 *default-log-port* "Server at "host":"port" is not responding.") -;; #f)))) - -;; (define (loop-test host port data) ;; server-address is host:port -;; ;; ping the server and ask it -;; ;; if it ready -;; ;; (let* ((sdat (servdat-init #f host port #f))) -;; ;; (http-transport:send-receive sdat "abc" 'ping '()))) -;; (let* ((payload (sexpr->string data)) -;; (res (with-input-from-request -;; (conc "http://"host":"port"/loop-test") -;; `((data . ,payload)) -;; read-string))) -;; (string->sexpr res)) -;; #f -;; ) - + (if res + (string->sexpr res) + res))) + ; from the pkts return servers associated with dbpath ;; NOTE: Only one can be alive - have to check on each ;; in the list of pkts returned ;; (define (get-viable-servers serv-pkts dbpath) @@ -1918,10 +1907,26 @@ (let* ((spkt (car tail))) (loop (cdr tail) (if (equal? dbpath (alist-ref 'dbpath spkt)) (cons spkt res) res)))))) + +(define (remove-pkts-if-not-alive serv-pkts) + (filter (lambda (pkt) + (let* ((host (alist-ref 'host pkt)) + (port (alist-ref 'port pkt)) + (key (alist-ref 'servkey pkt)) + (pktz (alist-ref 'Z pkt)) + (res (server-ready? host port key))) + (if res + res + (let* ((pktsdir (get-pkts-dir *toppath*)) + (pktpath (conc pktsdir"/"pktz".pkt"))) + (debug:print 0 *default-log-port* "WARNING: pkt with no server "pktpath) + (delete-file* pktpath) + #f)))) + serv-pkts)) ;; from viable servers get one that is alive and ready ;; (define (get-the-server apath serv-pkts) (let loop ((tail serv-pkts)) @@ -1971,11 +1976,11 @@ ;; END NEW SERVER METHOD ;;====================================================================== ;; if .db/main.db check the pkts ;; -(define (http-transport:wait-for-server pkts-dir db-file server-key) +(define (rmt:wait-for-server pkts-dir db-file server-key) (let* ((sdat *server-info*)) (let loop ((start-time (current-seconds)) (changed #t) (last-sdat "not this")) (begin ;; let ((sdat #f)) @@ -2005,32 +2010,39 @@ (servdat-host sdat) db-file)) ;; (set! *my-signature* (servdat-uuid sdat)) ;; replace with Z, no, stick with proper key ;; now read pkts and see if we are a contender (let* ((all-pkts (get-all-server-pkts pkts-dir *srvpktspec*)) (viables (get-viable-servers all-pkts db-file)) - (best-srv (get-best-candidate viables db-file)) - (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f))) - (debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key) + (alive (remove-pkts-if-not-alive viables)) + (best-srv (get-best-candidate alive db-file)) + (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f)) + (i-am-srv (equal? best-srv-key server-key)) + (delete-pkt (lambda () + (let* ((pktfile (conc (get-pkts-dir *toppath*) + "/" (servdat-uuid *server-info*) + ".pkt"))) + (debug:print-info 0 *default-log-port* "Attempting to remove bogus pkt file "pktfile) + (delete-file* pktfile))))) ;; remove immediately instead of waiting for on-exit + (debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key", i-am-srv: "i-am-srv) ;; am I the best-srv, compare server-keys to know - (if (equal? best-srv-key server-key) - (if (get-lock-db sdat db-file) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id) + (if i-am-srv + (if (get-lock-db sdat db-file (servdat-host sdat)(servdat-port sdat)) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id) (begin - (debug:print 0 *default-log-port* "I'm the server!") + (debug:print-info 0 *default-log-port* "I'm the server!") (servdat-dbfile-set! sdat db-file) (servdat-status-set! sdat 'db-locked)) (begin - (debug:print 0 *default-log-port* "I'm not the server, exiting.") + (debug:print-info 0 *default-log-port* "I'm not the server, exiting.") (bdat-time-to-exit-set! *bdat* #t) + (delete-pkt) (thread-sleep! 0.2) (exit))) (begin - (debug:print 0 *default-log-port* + (debug:print-info 0 *default-log-port* "Keys do not match "best-srv-key", "server-key", exiting.") (bdat-time-to-exit-set! *bdat* #t) - (delete-file* (conc (get-pkts-dir *toppath*) - "/" (servdat-uuid *server-info*) - ".pkt")) ;; remove immediately instead of waiting for on-exit + (delete-pkt) (thread-sleep! 0.2) (exit))) sdat)) (begin ;; sdat not yet contains server info (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) @@ -2041,42 +2053,47 @@ (exit)) (loop start-time (equal? sdat last-sdat) sdat)))))))) -(define (rmt:register-server remote apath iface port server-key dbname) - (rmt:open-main-connection remote apath) ;; we need a channel to main.db - (rmt:send-receive-real remote apath ;; params: host port servkey pid ipaddr dbpath +(define (rmt:register-server remdat apath iface port server-key dbname) + (remotedat-conns remdat) ;; just checking types + (rmt:open-main-connection remdat apath) ;; we need a channel to main.db + (rmt:send-receive-real remdat apath ;; params: host port servkey pid ipaddr dbpath (db:run-id->dbname #f) 'register-server `(,iface ,port ,server-key ,(current-process-id) ,iface ,apath ,dbname))) -(define (rmt:get-count-servers remote apath) - (rmt:open-main-connection remote apath) ;; we need a channel to main.db - (rmt:send-receive-real remote apath ;; params: host port servkey pid ipaddr dbpath +(define (rmt:get-count-servers remdat apath) + (remotedat-conns remdat) ;; just checking types + (rmt:open-main-connection remdat apath) ;; we need a channel to main.db + (rmt:send-receive-real remdat apath ;; params: host port servkey pid ipaddr dbpath (db:run-id->dbname #f) - 'get-count-servers `(,apath - ))) + 'get-count-servers `(,apath))) + +(define (rmt:get-servers-info apath) + (rmt:send-receive 'get-servers-info #f `(,apath))) -(define (rmt:deregister-server remote apath iface port server-key dbname) - (rmt:open-main-connection remote apath) ;; we need a channel to main.db - (rmt:send-receive-real remote apath ;; params: host port servkey pid ipaddr dbpath +(define (rmt:deregister-server remdat apath iface port server-key dbname) + (remotedat-conns remdat) ;; just checking types + (rmt:open-main-connection remdat apath) ;; we need a channel to main.db + (rmt:send-receive-real remdat apath ;; params: host port servkey pid ipaddr dbpath (db:run-id->dbname #f) 'deregister-server `(,iface ,port ,server-key ,(current-process-id) ,iface ,apath ,dbname))) -(define (http-transport:wait-for-stable-interface #!optional (num-tries-allowed 100)) +(define (rmt:wait-for-stable-interface #!optional (num-tries-allowed 100)) ;; wait until *server-info* stops changing (let* ((stime (current-seconds))) (let loop ((last-host #f) (last-port #f) (tries 0)) @@ -2083,17 +2100,17 @@ (let* ((curr-host (and *server-info* (servdat-host *server-info*))) (curr-port (and *server-info* (servdat-port *server-info*)))) ;; first we verify port and interface, update *server-info* in need be. (cond ((> tries num-tries-allowed) - (debug:print 0 *default-log-port* "http-transport:keep-running, giving up after trying for several minutes.") + (debug:print 0 *default-log-port* "rmt:keep-running, giving up after trying for several minutes.") (exit 1)) ((not *server-info*) (thread-sleep! 0.25) (loop curr-host curr-port (+ tries 1))) ((or (not last-host)(not last-port)) - (debug:print 0 *default-log-port* "http-transport:keep-running, still no interface, tries="tries) + (debug:print 0 *default-log-port* "rmt:keep-running, still no interface, tries="tries) (thread-sleep! 0.25) (loop curr-host curr-port (+ tries 1))) ((or (not (equal? last-host curr-host)) (not (equal? last-port curr-port))) (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") @@ -2111,36 +2128,46 @@ " AT " (current-seconds) " server signature: " *my-signature* " with "(servdat-trynum *server-info*)" port changes") (flush-output *default-log-port*) #t)))))) -;; run http-transport:keep-running in a parallel thread to monitor that the db is being +;; run rmt:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; (define (rmt:keep-running dbname) ;; if none running or if > 20 seconds since ;; server last used then start shutdown ;; This thread waits for the server to come alive (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") - (let* ((server-start-time (current-seconds)) + (let* ((remdat *remotedat*) + (server-start-time (current-seconds)) (pkts-dir (get-pkts-dir)) (server-key (rmt:get-signature)) ;; This servers key (is-main (equal? (args:get-arg "-db") ".db/main.db")) (last-access 0) - (server-timeout (server:expiration-timeout))) + (server-timeout (server:expiration-timeout)) + (shutdown-server-sequence (lambda (host port) + (set! *unclean-shutdown* #f) + (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) + (rmt:server-shutdown host port) + (portlogger:open-run-close portlogger:set-port port "released") + (exit))) + (timed-out? (lambda () + (<= (+ last-access server-timeout) + (current-seconds))))) + (servdat-dbfile-set! *server-info* (args:get-arg "-db")) ;; main and run db servers have both got wait logic (could/should merge it) (if is-main - (http-transport:wait-for-server pkts-dir dbname server-key) - (http-transport:wait-for-stable-interface)) + (rmt:wait-for-server pkts-dir dbname server-key) + (rmt:wait-for-stable-interface)) ;; this is our forever loop (let* ((iface (servdat-host *server-info*)) (port (servdat-port *server-info*))) (let loop ((count 0) (bad-sync-count 0) (start-time (current-milliseconds))) - (if (and (not is-main) (common:low-noise-print 60 "servdat-status")) (debug:print-info 0 *default-log-port* "servdat-status is " (servdat-status *server-info*))) ;; set up the database handle @@ -2150,18 +2177,28 @@ (debug:print 0 *default-log-port* "SERVER: dbprep") (db:setup dbname) ;; sets *dbstruct-db* as side effect (servdat-status-set! *server-info* 'db-opened) ;; IFF I'm not main, call into main and register self (if (not is-main) - (let ((res (rmt:register-server *rmt:remote* + (let ((res (rmt:register-server remdat *toppath* iface port server-key dbname))) (if res ;; we are the server (servdat-status-set! *server-info* 'have-interface-and-db) - (begin - (debug:print 0 *default-log-port* "We are not the server for "dbname", exiting.") - (exit))))) + ;; now check that the db locker is alive, clear it out if not + (let* ((serv-info (rmt:get-server-info *toppath* dbname))) + (match serv-info + ((host port servkey pid ipaddr apath dbpath) + (if (not (server-ready? host port servkey)) + (begin + (debug:print-info 0 *default-log-port* "Server registered but not alive. Removing and trying again.") + (rmt:deregister-server remdat apath host port servkey dbpath) ;; servkey pid ipaddr apath dbpath) + + (loop (+ count 1) bad-sync-count start-time)))) + (else + (debug:print 0 *default-log-port* "We are not the server for "dbname", exiting. Server info is: "serv-info) + (exit))))))) (debug:print 0 *default-log-port* "SERVER: running, db "dbname" opened, megatest version: " (common:get-full-version)) ;; start the watchdog @@ -2203,26 +2240,28 @@ (begin (debug:print 0 *default-log-port* "Server stats:") (db:print-current-query-stats))) (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) (cond + ((not *server-run*) + (debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.") + (shutdown-server-sequence (get-host-name) port)) + ((timed-out?) + (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) + (shutdown-server-sequence (get-host-name) port)) ((and *server-run* - (> (+ last-access server-timeout) - (current-seconds)) - (if is-main - (> (rmt:get-count-servers *rmt:remote* *toppath*) 1) - #t)) + (or (not (timed-out?)) + (if is-main ;; do not exit if there are other servers (keep main open until all others gone) + (> (rmt:get-count-servers remdat *toppath*) 1) + #f))) (if (common:low-noise-print 120 "server continuing") (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))) (loop 0 bad-sync-count (current-milliseconds))) (else (set! *unclean-shutdown* #f) (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) - (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) - (rmt:server-shutdown) - (portlogger:open-run-close portlogger:set-port port "released") - (exit) + (shutdown-server-sequence (get-host-name) port) #;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: " (open-send-receive-nn (conc iface":"port) ;; do this here and not in server-shutdown (sexpr->string 'quit))) ))))))) @@ -2292,78 +2331,85 @@ ;;start a server, returns the connection ;; (define (start-nn-server portnum ) (let ((rep (make-rep-socket))) ;; (nn-socket 'rep))) (socket-set! rep 'nng/recvtimeo 2000) - (handle-exceptions + (handle-exceptions ;; why have exception handler here? exn (let ((emsg ((condition-property-accessor 'exn 'message) exn))) (print "ERROR: Failed to start server \"" emsg "\"") (exit 1)) (nng-dial #;nn-bind rep (conc "tcp://*:" portnum))) rep)) -;; open connection to server, send message, close connection -;; -(define (open-send-close-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds - (let ((req (make-req-socket 'req)) - (uri (conc "tcp://" host-port)) - (res #f) - ;; (contacts (alist-ref 'contact attrib)) - ;; (mode (alist-ref 'mode attrib)) - ) +(define (open-nn-connection host-port) + (let ((req (make-req-socket)) + (uri (conc "tcp://" host-port))) + (nng-dial req uri) (socket-set! req 'nng/recvtimeo 2000) - (handle-exceptions - exn - (let ((emsg ((condition-property-accessor 'exn 'message) exn))) - ;; Send notification - (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\"" ) - #f) - (nng-dial req uri) - ;; (print "Connected to the server " ) - (nng-send req msg) - ;; (print "Request Sent") - (let* ((th1 (make-thread (lambda () - (let ((resp (nng-recv req))) - (nng-close! req) - (set! res (if (equal? resp "ok") - #t - #f)))) - "recv thread")) - (th2 (make-thread (lambda () - (thread-sleep! timeout) - (thread-terminate! th1)) - "timer thread"))) - (thread-start! th1) - (thread-start! th2) - (thread-join! th1) - res)))) - + req)) + +(define (send-receive-nn req msg) + (nng-send req msg) + (nng-recv req)) + +(define (close-nn-connection req) + (nng-close! req)) + +;; ;; open connection to server, send message, close connection +;; ;; +;; (define (open-send-close-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds +;; (let ((req (make-req-socket 'req)) +;; (uri (conc "tcp://" host-port)) +;; (res #f) +;; ;; (contacts (alist-ref 'contact attrib)) +;; ;; (mode (alist-ref 'mode attrib)) +;; ) +;; (socket-set! req 'nng/recvtimeo 2000) +;; (handle-exceptions +;; exn +;; (let ((emsg ((condition-property-accessor 'exn 'message) exn))) +;; ;; Send notification +;; (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\"" ) +;; #f) +;; (nng-dial req uri) +;; ;; (print "Connected to the server " ) +;; (nng-send req msg) +;; ;; (print "Request Sent") +;; (let* ((th1 (make-thread (lambda () +;; (let ((resp (nng-recv req))) +;; (nng-close! req) +;; (set! res (if (equal? resp "ok") +;; #t +;; #f)))) +;; "recv thread")) +;; (th2 (make-thread (lambda () +;; (thread-sleep! timeout) +;; (thread-terminate! th1)) +;; "timer thread"))) +;; (thread-start! th1) +;; (thread-start! th2) +;; (thread-join! th1) +;; res)))) +;; (define (open-send-receive-nn host-port msg #!key (timeout 3) ) ;; default timeout is 3 seconds (let ((req (make-req-socket)) (uri (conc "tcp://" host-port)) - (res #f) - ;; (contacts (alist-ref 'contact attrib)) - ;; (mode (alist-ref 'mode attrib)) - ) + (res #f)) (handle-exceptions exn (let ((emsg ((condition-property-accessor 'exn 'message) exn))) ;; Send notification (debug:print 0 *default-log-port* "ERROR: Failed to connect / send to " uri " message was \"" emsg "\", exn=" exn) #f) (nng-dial req uri) - ;; (print "Connected to the server " ) (nng-send req msg) - ;; (print "Request Sent") - ;; receive code here - ;;(print (nn-recv req)) (let* ((th1 (make-thread (lambda () (let ((resp (nng-recv req))) (nng-close! req) - (print resp) + ;; (print resp) (set! res resp))) "recv thread")) (th2 (make-thread (lambda () (thread-sleep! timeout) (thread-terminate! th1)) @@ -2457,17 +2503,17 @@ ;; '(/ "loop-test")) ;; (send-response body: (alist-ref 'data ($)) ;; headers: '((content-type text/plain)))) ;; ((equal? (uri-path (request-uri (current-request))) ;; '(/ "")) -;; (send-response body: ((http-get-function 'http-transport:main-page)))) +;; (send-response body: ((http-get-function 'rmt:main-page)))) ;; ((equal? (uri-path (request-uri (current-request))) ;; '(/ "json_api")) -;; (send-response body: ((http-get-function 'http-transport:main-page)))) +;; (send-response body: ((http-get-function 'rmt:main-page)))) ;; ((equal? (uri-path (request-uri (current-request))) ;; '(/ "runs")) -;; (send-response body: ((http-get-function 'http-transport:main-page)))) +;; (send-response body: ((http-get-function 'rmt:main-page)))) ;; ((equal? (uri-path (request-uri (current-request))) ;; '(/ any)) ;; (send-response body: "hey there!\n" ;; headers: '((content-type text/plain)))) ;; ((equal? (uri-path (request-uri (current-request))) @@ -2474,16 +2520,16 @@ ;; '(/ "hey")) ;; (send-response body: "hey there!\n" ;; headers: '((content-type text/plain)))) ;; ((equal? (uri-path (request-uri (current-request))) ;; '(/ "jquery3.1.0.js")) -;; (send-response body: ((http-get-function 'http-transport:show-jquery)) +;; (send-response body: ((http-get-function 'rmt:show-jquery)) ;; headers: '((content-type application/javascript)))) ;; ((equal? (uri-path (request-uri (current-request))) ;; '(/ "test_log")) -;; (send-response body: ((http-get-function 'http-transport:html-test-log) $) +;; (send-response body: ((http-get-function 'rmt:html-test-log) $) ;; headers: '((content-type text/HTML)))) ;; ((equal? (uri-path (request-uri (current-request))) ;; '(/ "dashboard")) -;; (send-response body: ((http-get-function 'http-transport:html-dboard) $) +;; (send-response body: ((http-get-function 'rmt:html-dboard) $) ;; headers: '((content-type text/HTML)))) ;; (else (continue)))))))) Index: stml2.scm ================================================================== --- stml2.scm +++ stml2.scm @@ -18,8 +18,10 @@ ;;====================================================================== (declare (unit stml2)) (declare (uses cookie)) +(declare (uses dbi)) +(declare (uses autoload)) (include "stml2/stml2.scm") Index: stml2/formdat.scm ================================================================== --- stml2/formdat.scm +++ stml2/formdat.scm @@ -10,12 +10,11 @@ ;; (declare (unit formdat)) (module formdat * -(import chicken scheme data-structures extras srfi-13 ports ) -(use html-filter) +(import chicken scheme data-structures extras srfi-13 ports html-filter) -(use regex) -(require-extension srfi-69) +(import regex) +(import srfi-69) ) Index: stml2/html-filter.scm ================================================================== --- stml2/html-filter.scm +++ stml2/html-filter.scm @@ -11,11 +11,11 @@ (module html-filter * (import chicken scheme data-structures extras srfi-13 ports ) -(use misc-stml) +(import misc-stml) -(require-extension regex) +(import regex) ;; ) Index: stml2/misc-stml.scm ================================================================== --- stml2/misc-stml.scm +++ stml2/misc-stml.scm @@ -16,9 +16,8 @@ (module misc-stml * (import chicken scheme data-structures extras srfi-13 ports posix) -(use regex (prefix dbi dbi:)) -(use (prefix crypt c:)) -(use (prefix dbi dbi:)) +(import regex (prefix dbi dbi:)) +(import (prefix crypt c:)) ) Index: stml2/rollup-pages.scm ================================================================== --- stml2/rollup-pages.scm +++ stml2/rollup-pages.scm @@ -1,6 +1,6 @@ -(use regex posix srfi-69 srfi-1) +(import regex posix srfi-69 srfi-1) (define extract-rx (regexp "pages\\/(.*)_(view|ctrl).scm")) (define (print-page-wrapper lookup page) (print "(define (pages:" page " session db shared)") Index: stml2/session.scm ================================================================== --- stml2/session.scm +++ stml2/session.scm @@ -11,10 +11,9 @@ (module session * (import chicken scheme data-structures extras srfi-13 ports posix files srfi-1) -(use (prefix dbi dbi:) srfi-69) -(require-extension regex) -(use cookie stmlcommon) ;; (declare (uses cookie)) +(import (prefix dbi dbi:) srfi-69 regex) +(import cookie stmlcommon) ;; (declare (uses cookie)) ) Index: stml2/setup.scm ================================================================== --- stml2/setup.scm +++ stml2/setup.scm @@ -9,13 +9,12 @@ (module setup * (import chicken scheme data-structures extras srfi-13 ports posix) -(uses session misc-stml) +(import session misc-stml) ;; (declare (unit setup))se ;; (declare (uses session)) -(require-extension srfi-69) -(require-extension regex) +(import srfi-69 regex) ) Index: stml2/spiffyserver.scm ================================================================== --- stml2/spiffyserver.scm +++ stml2/spiffyserver.scm @@ -1,8 +1,8 @@ ;; This doesn't work yet ;; -(use spiffy cgi-handler) +(import spiffy cgi-handler) (spiffy-debug-mode #t) (spiffy-file-ext-handlers `(("drcdb" . ,(cgi-handler* "/path/to/drcdb")))) Index: stml2/sqlite3.scm ================================================================== --- stml2/sqlite3.scm +++ stml2/sqlite3.scm @@ -9,11 +9,11 @@ ;; ;; I used this to get a simple interactive sqlite editor on the nokia n800 ;; since I couldn't get sqlite3 to install (for reasons I can't remember). -(use sqlite3) +(import sqlite3) (define args (argv)) (define num-args (length args)) (define dbname #f) Index: stml2/stmlcommon.scm ================================================================== --- stml2/stmlcommon.scm +++ stml2/stmlcommon.scm @@ -13,8 +13,8 @@ (module stmlcommon * (import chicken scheme data-structures extras srfi-13 ports posix) -(use (prefix dbi dbi:) regex (prefix crypt c:) srfi-69) +(import (prefix dbi dbi:) regex (prefix crypt c:) srfi-69) ) Index: stml2/stmlrun.scm ================================================================== --- stml2/stmlrun.scm +++ stml2/stmlrun.scm @@ -11,9 +11,9 @@ ;; (require-extension syntax-case) ;; (declare (run-time-macros)) ;; (include "stmlcommon.scm") -(require-library stml) +(import stml) (stml:main #f) Index: stml2/test.scm ================================================================== --- stml2/test.scm +++ stml2/test.scm @@ -1,8 +1,7 @@ -(use test md5) +(import test md5) -(require-extension sqlite3) (import (prefix sqlite3 sqlite3:)) (require-library dbi) ;; (declare (uses stml)) Index: tests/Makefile ================================================================== --- tests/Makefile +++ tests/Makefile @@ -40,16 +40,16 @@ TARGET = "ubuntu/nfs/none" all : build unit test4 # test1 test2 test3 test4 test5 test6 test7 test8 test9 -unit : basicserver.log server.log +unit : basicserver.log server.log all-rmt.log # all-rmt.log all-api.log # runs.log misc.log tests.log # inter dependencies on the unit tests, I wish these could be "suggestions" -all-rmt.log : all-api.log +# all-rmt.log : all-api.log rel : cd release;dashboard -rows 25 & ## basicserver.log : unittests/basicserver.scm ADDED tests/simplerun/Makefile Index: tests/simplerun/Makefile ================================================================== --- /dev/null +++ tests/simplerun/Makefile @@ -0,0 +1,3 @@ + +cleanup : + killall mtest -v -9;rm -rf .meta .db Index: tests/simplerun/megatest.config ================================================================== --- tests/simplerun/megatest.config +++ tests/simplerun/megatest.config @@ -36,11 +36,11 @@ state start end completed # Job tools are more advanced ways to control how your jobs are launched [jobtools] useshell yes -launcher nbfind +launcher nbfake # You can override environment variables for all your tests here [env-override] EXAMPLE_VAR example value ADDED tests/simplerun/stress-test.scm Index: tests/simplerun/stress-test.scm ================================================================== --- /dev/null +++ tests/simplerun/stress-test.scm @@ -0,0 +1,102 @@ +;;====================================================================== +;; S E R V E R +;;====================================================================== +;; Copyright 2006-2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . + + +;; Run like this: +;; +;; (cd ..;make && make install) && ./rununittest.sh server 1;(cd simplerun;megatest -stop-server 0) +(import big-chicken + chicken.random + test + srfi-18 + + rmtmod + trace + apimod + dbmod + launchmod + commonmod + ) + +(trace-call-sites #t) +(trace + + ;; db:get-dbdat + ;; rmt:find-main-server + ;; rmt:send-receive-real + ;; rmt:send-receive + ;; sexpr->string + ;; server-ready? + ;; rmt:register-server + ;; rmt:deregister-server + ;; rmt:open-main-connection + ;; rmt:general-open-connection + ;; rmt:get-conn + ;; common:watchdog + ;; rmt:find-main-server + ;; get-all-server-pkts + ;; get-viable-servers + ;; get-best-candidate + ;; api:run-server-process + ;; api:process-request + ;; rmt:run + ;; rmt:try-start-server + ) + + +(define *db* (db:setup ".db/main.db")) + +;; these let me cut and paste from source easily +(define apath *toppath*) +(define run-id (pseudo-random-integer 10)) +(define dbname (conc ".db/"run-id".db")) +(define remote *remotedat*) +(define keyvals '(("SYSTEM" "a")("RELEASE" "b"))) + +(test #f #t (rmt:open-main-connection remote apath)) +(test #f #t (conndat? (rmt:get-conn *remotedat* *toppath* ".db/main.db"))) +(test #f #t (conndat? (rmt:get-conn *remotedat* *toppath* ".db/main.db"))) +(test #f dbname (list-ref (rmt:send-receive-real *remotedat* *toppath* ".db/main.db" 'get-server `(,apath ,dbname)) + 6)) + +(thread-sleep! 2) +(test #f #t (rmt:general-open-connection *remotedat* *toppath* dbname)) + +(let loop ((end-time (+ (current-seconds) 600))) + (test #f #t (list? (rmt:get-servers-info *toppath*))) + + (test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) + (test #f 1 (rmt:send-receive 'register-run #f (list keyvals "run1" "new" "n/a" "justme" #f))) + ;; (print "Got here.") + + (test #f 1 (rmt:send-receive 'register-run 1 (list keyvals "run1" "new" "n/a" "justme" #f))) + + (test #f 2 (rmt:register-run keyvals "run2" "new" "n/a" "justme" #f)) + ;; (test #f 2 (rmt:deregister-server *remotedat* *toppath* iface port server-key dbname + + (test #f #t (number? (rmt:get-count-servers *remotedat* *toppath*))) + + (test #f "run2" (rmt:get-run-name-from-id 2)) + (test #f #f (rmt:send-receive 'get-test-info-by-id 2 '(2 1))) + + (test #f #t (rmt:general-call 'update-cpuload-diskfree 2 1.5 1e6 1)) + (if (< (current-seconds) end-time)(loop end-time))) + +(exit) Index: tests/tests.scm ================================================================== --- tests/tests.scm +++ tests/tests.scm @@ -15,11 +15,18 @@ ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see . ;; ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') -(import srfi-18 test) +(import srfi-18 + test + chicken.string + chicken.process-context + chicken.file + chicken.pretty-print + commonmod + ) (define test-work-dir (current-directory)) ;; given list of lists ;; ( ( msg expected param1 param2 ...) Index: tests/unittests/all-rmt.scm ================================================================== --- tests/unittests/all-rmt.scm +++ tests/unittests/all-rmt.scm @@ -29,74 +29,76 @@ ;; NTN - no test needed ;; DEP - function is deprecated, no point in testing ;; NED - function nested under others, no test needed. ;; DEF - deferred -(print "start dir: " (current-directory)) - -(define toppath (current-directory)) - -(test #f #f (server:check-if-running toppath)) ;; these are used by server:start-and-wait -(test #f #t (list? (server:get-list toppath))) -(test #f '() (server:get-best '())) -(test #f #t (common:simple-file-lock-and-wait "test.lock" expire-time: 15)) -(test #f "test.lock" (common:simple-file-release-lock "test.lock")) -(test #f #t (server:get-best-guess-address (get-host-name))) -(test #f #t (string? (common:get-homehost))) - -;; clean out any old running servers -;; -(let ((servers (server:get-list toppath))) - (print "Known servers: " servers) - (if (not (null? servers)) - (begin - (for-each - (lambda (server) - (let ((pid (list-ref server 4))) - (thread-start! - (make-thread - (lambda () - (print "Attempting to kill server: " server) - (print "Attempting to kill pid " pid) - (system (conc "kill " pid)) - (thread-sleep! 2) - (system (conc "kill -9 " pid))) - (conc pid))))) - servers) - (thread-sleep! 2)))) -;; let's start up a server the mechanical way -(system "nbfake megatest -server -") -(thread-sleep! 2) -;; (test #f #t (string? (server:start-and-wait *toppath*))) - -(test "setup for run" #t (begin (launch:setup) - (string? (getenv "MT_RUN_AREA_HOME")))) -(test #f #t (client:setup-http toppath)) -(test #f #t (vector? (client:setup toppath))) - -(test #f #t (vector? (rmt:get-connection-info toppath))) ;; TODO: push areapath down. -(test #f #t (string? (server:check-if-running "."))) -;; DEF (test #f #f (rmt:send-receive-no-auto-client-setup *runremote* 'get-keys #f '())) -;; DEF (rmt:kill-server run-id) -;; DEF (rmt:start-server run-id) -(test #f '(#t "successful login")(rmt:login #f)) -;; DEF (rmt:login-no-auto-client-setup connection-info) -(test #f #t (pair? (rmt:get-latest-host-load (get-host-name)))) - -;; get-latest-host-load does a lookup in the db, it won't return a useful value unless -;; a test ran recently on host -(test-batch rmt:get-latest-host-load - "rmt:get-latest-host-load" - (list (list "localhost" #t (get-host-name)) - (list "not-a-host" #t "not-a-host" )) - post-proc: pair?) - -(test #f #t (list? (rmt:get-changed-record-ids 0))) - -(test #f #f (begin (runs:update-all-test_meta #f) #f)) - -(test #f '("test1" "test2")(sort (alist-ref "tagtwo" (rmt:get-tests-tags) equal?) string<=)) +(import big-chicken rmtmod apimod runsmod) + +(print "start dir: " (current-directory)) +;; +(define toppath (current-directory)) +;; +;; (test #f #f (server:check-if-running toppath)) ;; these are used by server:start-and-wait +;; (test #f #t (list? (server:get-list toppath))) +;; (test #f '() (server:get-best '())) +;; (test #f #t (common:simple-file-lock-and-wait "test.lock" expire-time: 15)) +;; (test #f "test.lock" (common:simple-file-release-lock "test.lock")) +;; (test #f #t (server:get-best-guess-address (get-host-name))) +;; (test #f #t (string? (common:get-homehost))) +;; +;; ;; clean out any old running servers +;; ;; +;; (let ((servers (server:get-list toppath))) +;; (print "Known servers: " servers) +;; (if (not (null? servers)) +;; (begin +;; (for-each +;; (lambda (server) +;; (let ((pid (list-ref server 4))) +;; (thread-start! +;; (make-thread +;; (lambda () +;; (print "Attempting to kill server: " server) +;; (print "Attempting to kill pid " pid) +;; (system (conc "kill " pid)) +;; (thread-sleep! 2) +;; (system (conc "kill -9 " pid))) +;; (conc pid))))) +;; servers) +;; (thread-sleep! 2)))) +;; ;; let's start up a server the mechanical way +;; (system "nbfake megatest -server -") +;; (thread-sleep! 2) +;; ;; (test #f #t (string? (server:start-and-wait *toppath*))) +;; +;; (test "setup for run" #t (begin (launch:setup) +;; (string? (getenv "MT_RUN_AREA_HOME")))) +;; (test #f #t (client:setup-http toppath)) +;; (test #f #t (vector? (client:setup toppath))) +;; +;; (test #f #t (vector? (rmt:get-connection-info toppath))) ;; TODO: push areapath down. +;; (test #f #t (string? (server:check-if-running "."))) +;; ;; DEF (test #f #f (rmt:send-receive-no-auto-client-setup *runremote* 'get-keys #f '())) +;; ;; DEF (rmt:kill-server run-id) +;; ;; DEF (rmt:start-server run-id) +;; (test #f '(#t "successful login")(rmt:login #f)) +;; ;; DEF (rmt:login-no-auto-client-setup connection-info) +;; (test #f #t (pair? (rmt:get-latest-host-load (get-host-name)))) +;; +;; ;; get-latest-host-load does a lookup in the db, it won't return a useful value unless +;; ;; a test ran recently on host +;; (test-batch rmt:get-latest-host-load +;; "rmt:get-latest-host-load" +;; (list (list "localhost" #t (get-host-name)) +;; (list "not-a-host" #t "not-a-host" )) +;; post-proc: pair?) +;; +;; (test #f #t (list? (rmt:get-changed-record-ids 0))) +;; +(test #f #f (begin (runs:update-all-test_meta #f) #f)) + +(test #f '("test1" "test2")(sort (alist-ref "tagtwo" (rmt:get-tests-tags) equal?) string<=?)) (test #f '() (rmt:get-key-val-pairs 0)) (test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) (test #f '("SYSTEM" "RELEASE") (rmt:get-keys-write)) ;; dummy query to force server start (test #f '() (rmt:get-key-vals 1)) (test #f (vector '("SYSTEM" "RELEASE") '()) (rmt:get-targets)) @@ -131,11 +133,11 @@ (test #f #f (begin (rmt:set-state-status-and-roll-up-items 1 "foo" "" "COMPLETED" "FAIL" "Just yet another message") #f)) (test #f #t (rmt:top-test-set-per-pf-counts 1 "foo")) (test #f '() (rmt:get-raw-run-stats 1)) (test #f #t (vector? (rmt:get-run-info 1))) (test #f 0 (rmt:get-num-runs "%")) -(define keypatts '(("SYSTEM" "ubuntu")("RELEASE" "v1.234")) ) +(define keypatts '(("SYSTEM" "ubuntu")("RELEASE" "v1.234"))) (test #f 1 (rmt:register-run '(("SYSTEM" "ubuntu")("RELEASE" "v1.234")) "bar" "NEW" "JUSTFINE" "bobafett" "quick")) (test #f "bar" (rmt:get-run-name-from-id 1)) (test #f #t (begin (rmt:delete-run 2) #t)) ;; delete a non-existant run (test #f #t (begin (rmt:update-run-stats 1 '()) #t)) (test #f #t (begin (rmt:delete-old-deleted-test-records) #t)) @@ -166,50 +168,51 @@ (test #f '(("Totals" "UNKNOWN" 1) ("bar" "UNKNOWN" 1)) (begin (rmt:get-run-stats))) (test #f #t (begin (rmt:set-run-state-status 1 "COMPLETE" "PASS") #t)) (test #f '"COMPLETE" (rmt:get-run-state 1)) (test #f '"PASS" (rmt:get-run-status 1)) -(test #f #t (begin (rmt:set-var "foo" "bar")#t)) -(test #f "bar" (rmt:get-var "foo")) +(test #f #t (begin (rmt:set-var 1 "foo" "bar")#t)) +(test #f "bar" (rmt:get-var 1 "foo")) (test #f #t (begin (rmt:print-db-stats) #t)) -(test #f #t (begin (rmt:del-var "foo") #t)) -(test #f #f (rmt:get-var "foo")) +(test #f #t (begin (rmt:del-var 1 "foo") #t)) +(test #f #f (rmt:get-var 1 "foo")) (test #f (vector #f #f #f #f #f #f #f #f #f #f #f #f) (rmt:get-data-info-by-id 1)) (test #f '() (rmt:get-key-vals 1)) (test #f "ubuntu/v1.234" (rmt:get-target 1)) (print (rmt:get-run-info 1)) (test #f '((runs) (tests) (test_steps) (test_data)) (rmt:get-run-record-ids "ubuntu/v1.234" 1 '("fail_count") "bar")) -;; (rmt:find-and-mark-incomplete run-id ovr-deadtime) -;; (rmt:get-main-run-stats run-id) -;; (rmt:get-var varname) -;; (rmt:set-var varname value) -;; (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f)) -;; (rmt:get-previous-test-run-record run-id test-name item-path) -;; (rmt:get-run-stats) -;; (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile) -;; (rmt:get-steps-for-test run-id test-id) -;; (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) -;; (rmt:testmeta-add-record testname) -;; (rmt:testmeta-get-record testname) -;; (rmt:testmeta-update-field test-name fld val) -;; (rmt:test-data-rollup run-id test-id status) -;; (rmt:csv->test-data run-id test-id csvdata) -;; (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt) -;; (rmt:tasks-add action owner target runname testpatt params) -;; (rmt:tasks-set-state-given-param-key param-key new-state) -;; (rmt:tasks-get-last target runname) -;; (rmt:archive-get-allocations testname itempath dneeded) -;; (rmt:archive-register-block-name bdisk-id archive-path) -;; (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey) -;; (rmt:archive-register-disk bdisk-name bdisk-path df) -;; (rmt:test-set-archive-block-id run-id test-id archive-block-id) -;; (rmt:test-get-archive-block-info archive-block-id) -;; NED (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) -;; NED (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected -;; DEF (test #f #f (rmt:print-db-stats)) -;; DEF (rmt:get-max-query-average run-id) -;; NED (rmt:general-call stmtname run-id . params) -;; DEP (rmt:sdb-qry qry val run-id) -;; DEF (rmt:runtests user run-id testpatt params) -;; DEP (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f)) -;; DEP (rmt:synchash-get run-id proc synckey keynum params) -;; DEP (test #f #f (rmt:update-pass-fail-counts 1 "foo")) +;; ;; (rmt:find-and-mark-incomplete run-id ovr-deadtime) +;; ;; (rmt:get-main-run-stats run-id) +;; ;; (rmt:get-var varname) +;; ;; (rmt:set-var varname value) +;; ;; (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f)) +;; ;; (rmt:get-previous-test-run-record run-id test-name item-path) +;; ;; (rmt:get-run-stats) +;; ;; (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile) +;; ;; (rmt:get-steps-for-test run-id test-id) +;; ;; (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) +;; ;; (rmt:testmeta-add-record testname) +;; ;; (rmt:testmeta-get-record testname) +;; ;; (rmt:testmeta-update-field test-name fld val) +;; ;; (rmt:test-data-rollup run-id test-id status) +;; ;; (rmt:csv->test-data run-id test-id csvdata) +;; ;; (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt) +;; ;; (rmt:tasks-add action owner target runname testpatt params) +;; ;; (rmt:tasks-set-state-given-param-key param-key new-state) +;; ;; (rmt:tasks-get-last target runname) +;; ;; (rmt:archive-get-allocations testname itempath dneeded) +;; ;; (rmt:archive-register-block-name bdisk-id archive-path) +;; ;; (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey) +;; ;; (rmt:archive-register-disk bdisk-name bdisk-path df) +;; ;; (rmt:test-set-archive-block-id run-id test-id archive-block-id) +;; ;; (rmt:test-get-archive-block-info archive-block-id) +;; ;; NED (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) +;; ;; NED (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected +;; ;; DEF (test #f #f (rmt:print-db-stats)) +;; ;; DEF (rmt:get-max-query-average run-id) +;; ;; NED (rmt:general-call stmtname run-id . params) +;; ;; DEP (rmt:sdb-qry qry val run-id) +;; ;; DEF (rmt:runtests user run-id testpatt params) +;; ;; DEP (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f)) +;; ;; DEP (rmt:synchash-get run-id proc synckey keynum params) +;; ;; DEP (test #f #f (rmt:update-pass-fail-counts 1 "foo")) +;; Index: tests/unittests/basicserver.scm ================================================================== --- tests/unittests/basicserver.scm +++ tests/unittests/basicserver.scm @@ -21,11 +21,11 @@ ;; Run like this: ;; ;; ./rununittest.sh server 1;(cd simplerun;megatest -stop-server 0) (import rmtmod trace http-client apimod dbmod - launchmod) + launchmod srfi-69) (trace-call-sites #t) (trace ;; db:get-dbdat ;; rmt:find-main-server @@ -45,20 +45,20 @@ ;; api:run-server-process ;; rmt:run ;; rmt:try-start-server ) -(test #f #t (rmt:remote? (let ((r (make-rmt:remote))) - (set! *rmt:remote* r) - r))) -(test #f #f (rmt:get-conn *rmt:remote* *toppath* ".db/main.db")) +(test #f #t (remotedat? (let ((r (make-remotedat))) + (set! *remotedat* r) + r))) +(test #f #f (rmt:get-conn *remotedat* *toppath* ".db/main.db")) (test #f #f (rmt:find-main-server *toppath* ".db/main.db")) -(test #f #t (rmt:open-main-connection *rmt:remote* *toppath*)) -(pp (hash-table->alist (rmt:remote-conns *rmt:remote*))) -(test #f #t (rmt:conn? (rmt:get-conn *rmt:remote* *toppath* ".db/main.db"))) +(test #f #t (rmt:open-main-connection *remotedat* *toppath*)) +(pp (hash-table->alist (remotedat-conns *remotedat*))) +(test #f #t (conndat? (rmt:get-conn *remotedat* *toppath* ".db/main.db"))) -(define *main* (rmt:get-conn *rmt:remote* *toppath* ".db/main.db")) +(define *main* (rmt:get-conn *remotedat* *toppath* ".db/main.db")) ;; (for-each (lambda (tdat) ;; (test #f tdat (loop-test (rmt:conn-ipaddr *main*) ;; (rmt:conn-port *main*) tdat))) ;; (list 'a @@ -68,13 +68,13 @@ (define *db* (db:setup ".db/main.db")) ;; these let me cut and paste from source easily (define apath *toppath*) (define dbname ".db/2.db") -(define remote *rmt:remote*) +(define remote *remotedat*) (define keyvals '(("SYSTEM" "a")("RELEASE" "b"))) (test #f '() (string->sexpr "()")) (test #f 'server-started (api:execute-requests *db* 'get-server (list *toppath* ".db/2.db"))) (set! *dbstruct-db* #f) (exit) Index: tests/unittests/server.scm ================================================================== --- tests/unittests/server.scm +++ tests/unittests/server.scm @@ -20,19 +20,20 @@ ;; Run like this: ;; ;; (cd ..;make && make install) && ./rununittest.sh server 1;(cd simplerun;megatest -stop-server 0) -(import rmtmod trace http-client apimod dbmod +(import big-chicken rmtmod trace http-client apimod dbmod launchmod) (trace-call-sites #t) (trace + ;; db:get-dbdat ;; rmt:find-main-server -;; rmt:send-receive-real -;; rmt:send-receive + ;; rmt:send-receive-real + ;; rmt:send-receive ;; sexpr->string ;; server-ready? ;; rmt:register-server ;; rmt:deregister-server ;; rmt:open-main-connection @@ -52,31 +53,38 @@ (define *db* (db:setup ".db/main.db")) ;; these let me cut and paste from source easily (define apath *toppath*) (define dbname ".db/2.db") -(define remote *rmt:remote*) +(define remote *remotedat*) (define keyvals '(("SYSTEM" "a")("RELEASE" "b"))) (test #f #t (rmt:open-main-connection remote apath)) -(test #f #t (rmt:conn? (rmt:get-conn *rmt:remote* *toppath* ".db/main.db"))) -(test #f #t (rmt:conn? (rmt:get-conn *rmt:remote* *toppath* ".db/main.db"))) -(test #f 'server-started (rmt:send-receive-real *rmt:remote* *toppath* ".db/main.db" 'get-server `(,apath ,dbname))) +(test #f #t (conndat? (rmt:get-conn *remotedat* *toppath* ".db/main.db"))) +(test #f #t (conndat? (rmt:get-conn *remotedat* *toppath* ".db/main.db"))) +(test #f ".db/2.db" (list-ref (rmt:send-receive-real *remotedat* *toppath* ".db/main.db" 'get-server `(,apath ,dbname)) + 6)) (thread-sleep! 2) -(test #f #t (list? (rmt:general-open-connection *rmt:remote* *toppath* ".db/2.db"))) - -(test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) -(test #f 1 (rmt:send-receive 'register-run #f (list keyvals "run1" "new" "n/a" "justme" #f))) -(print "Got here.") - -(test #f 1 (rmt:send-receive 'register-run 1 (list keyvals "run1" "new" "n/a" "justme" #f))) - -(test #f 2 (rmt:register-run keyvals "run2" "new" "n/a" "justme" #f)) -;; (test #f 2 (rmt:deregister-server *rmt:remote* *toppath* iface port server-key dbname - -(test #f 2 (rmt:get-count-servers *rmt:remote* *toppath*)) - -(test #f "run2" (rmt:get-run-name-from-id 2)) - -;; (exit) - +(test #f #t (rmt:general-open-connection *remotedat* *toppath* ".db/2.db")) + +;; (let loop ((end-time (+ (current-seconds) 61))) + (test #f #t (list? (rmt:get-servers-info *toppath*))) + + (test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) + (test #f 1 (rmt:send-receive 'register-run #f (list keyvals "run1" "new" "n/a" "justme" #f))) + ;; (print "Got here.") + + (test #f 1 (rmt:send-receive 'register-run 1 (list keyvals "run1" "new" "n/a" "justme" #f))) + + (test #f 2 (rmt:register-run keyvals "run2" "new" "n/a" "justme" #f)) + ;; (test #f 2 (rmt:deregister-server *remotedat* *toppath* iface port server-key dbname + + (test #f 2 (rmt:get-count-servers *remotedat* *toppath*)) + + (test #f "run2" (rmt:get-run-name-from-id 2)) + (test #f #f (rmt:send-receive 'get-test-info-by-id 2 '(2 1))) + + (test #f #t (rmt:general-call 'update-cpuload-diskfree 2 1.5 1e6 1)) +;; (if (< (current-seconds) end-time)(loop end-time))) + +(exit) Index: testsmod.scm ================================================================== --- testsmod.scm +++ testsmod.scm @@ -26,10 +26,11 @@ (declare (uses itemsmod)) (declare (uses rmtmod)) (declare (uses stml2)) (declare (uses dbmod)) (declare (uses tasksmod)) +(declare (uses dbi)) (module testsmod * (import scheme