Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -6,11 +6,11 @@ ods.scm runconfig.scm server.scm configf.scm \ db.scm keys.scm margs.scm megatest-version.scm \ process.scm runs.scm tasks.scm tests.scm genexample.scm \ fs-transport.scm http-transport.scm \ client.scm gutils.scm synchash.scm daemon.scm mt.scm dcommon.scm \ - tree.scm + tree.scm rmt.scm api.scm GUISRCF = dashboard-tests.scm dashboard-guimonitor.scm OFILES = $(SRCFILES:%.scm=%.o) GOFILES = $(GUISRCF:%.scm=%.o) Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -205,21 +205,21 @@ (iup:frame #:title "Set fields" (iup:vbox (iup:hbox (iup:label "Comment:") (iup:textbox #:action (lambda (val a b) - (open-run-close db:test-set-state-status-by-id #f test-id #f #f b) + (cdb:remote-run db:test-set-state-status-by-id #f test-id #f #f b) (set! newcomment b)) #:value (db:test-get-comment testdat) #:expand "HORIZONTAL")) (apply iup:hbox (iup:label "STATE:" #:size "30x") (let* ((btns (map (lambda (state) (let ((btn (iup:button state #:expand "HORIZONTAL" #:size "50x" #:font "Courier New, -10" #:action (lambda (x) - (open-run-close db:test-set-state-status-by-id #f test-id state #f #f) + (cdb:remote-run db:test-set-state-status-by-id #f test-id state #f #f) (db:test-set-state! testdat state))))) btn)) (list "COMPLETED" "NOT_STARTED" "RUNNING" "REMOTEHOSTSTART" "KILLED" "KILLREQ")))) (vector-set! *state-status* 0 (lambda (state color) @@ -235,11 +235,11 @@ (iup:label "STATUS:" #:size "30x") (let* ((btns (map (lambda (status) (let ((btn (iup:button status #:expand "HORIZONTAL" #:size "50x" #:font "Courier New, -10" #:action (lambda (x) - (open-run-close db:test-set-state-status-by-id #f test-id #f status #f) + (cdb:remote-run db:test-set-state-status-by-id #f test-id #f status #f) (db:test-set-status! testdat status))))) btn)) (list "PASS" "WARN" "FAIL" "CHECK" "n/a" "WAIVED" "SKIP")))) (vector-set! *state-status* 1 (lambda (status color) @@ -255,11 +255,11 @@ ;;====================================================================== ;; ;;====================================================================== (define (examine-test test-id) ;; run-id run-key origtest) - (let* ((testdat (open-run-close db:get-test-info-by-id #f test-id)) + (let* ((testdat (cdb:remote-run db:get-test-info-by-id #f test-id)) (db-path (conc *toppath* "/megatest.db")) (db-mod-time 0) ;; (file-modification-time db-path)) (last-update 0) ;; (current-seconds)) (request-update #t) (db #f)) @@ -266,12 +266,12 @@ (if (not testdat) (begin (debug:print 0 "ERROR: No test data found for test " test-id ", exiting") (exit 1)) (let* ((run-id (if testdat (db:test-get-run_id testdat) #f)) - (keydat (if testdat (open-run-close db:get-key-val-pairs #f run-id) #f)) - (rundat (if testdat (open-run-close db:get-run-info #f run-id) #f)) + (keydat (if testdat (cdb:remote-run db:get-key-val-pairs #f run-id) #f)) + (rundat (if testdat (cdb:remote-run db:get-run-info #f run-id) #f)) (runname (if testdat (db:get-value-by-header (db:get-row rundat) (db:get-header rundat) "runname") #f)) ;; These next two are intentional bad values to ensure errors if they should not ;; get filled in properly. @@ -279,11 +279,11 @@ (rundir logfile) (teststeps (if testdat (db:get-compressed-steps test-id work-area: rundir) '())) (testfullname (if testdat (db:test-get-fullname testdat) "Gathering data ...")) (testname (if testdat (db:test-get-testname testdat) "n/a")) (testmeta (if testdat - (let ((tm (open-run-close db:testmeta-get-record #f testname))) + (let ((tm (cdb:remote-run db:testmeta-get-record #f testname))) (if tm tm (make-db:testmeta))) (make-db:testmeta))) (keystring (string-intersperse (map (lambda (keyval) @@ -319,11 +319,11 @@ request-update)) (newtestdat (if need-update (handle-exceptions exn (debug:print-info 2 "test db access issue: " ((condition-property-accessor 'exn 'message) exn)) - (open-run-close db:get-test-info-by-id #f test-id ))))) + (cdb:remote-run db:get-test-info-by-id #f test-id ))))) (cond ((and need-update newtestdat) (set! testdat newtestdat) (set! teststeps (db:get-compressed-steps test-id work-area: rundir)) (set! logfile (conc (db:test-get-rundir testdat) "/" (db:test-get-final_logf testdat))) @@ -496,11 +496,11 @@ (db:test-data-get-tol x) (db:test-data-get-status x) (db:test-data-get-units x) (db:test-data-get-type x) (db:test-data-get-comment x))) - (open-run-close db:read-test-data #f test-id "%"))) + (cdb:remote-run db:read-test-data #f test-id "%"))) "\n"))) (if (not (equal? currval newval)) (iup:attribute-set! test-data "VALUE" newval ))))) ;; "TITLE" newval))))) test-data)) ;;(dashboard:run-controls) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -2139,11 +2139,11 @@ ((eq? (db:step-get-event_time a)(db:step-get-event_time b)) (< (db:step-get-id a) (db:step-get-id b))) (else #f))))) res))) -(define (db:get-compressed-steps test-id #!key (work-area #f)) +(define (:get-compressed-steps test-id #!key (work-area #f)) (if (or (not work-area) (file-exists? (conc work-area "/testdat.db"))) (let* ((comprsteps (open-run-close db:get-steps-table #f test-id work-area: work-area))) (map (lambda (x) ;; take advantage of the \n on time->string Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -11,11 +11,11 @@ (require-extension (srfi 18) extras tcp s11n) (use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest) (import (prefix sqlite3 sqlite3:)) -(use spiffy uri-common intarweb http-client spiffy-request-vars uri-common intarweb spiffy-directory-listing) +(use spiffy uri-common intarweb http-client spiffy-request-vars intarweb spiffy-directory-listing) ;; Configurations for server (tcp-buffer-size 2048) (max-connections 2048) @@ -98,10 +98,15 @@ (if (not db)(set! db (open-db))) (let* (($ (request-vars source: 'both)) (dat ($ 'dat)) (res #f)) (cond + ((equal? (uri-path (request-uri (current-request))) + '(/ "api")) + (print "Got api request") + (send-response body: (api:process-request db $) ;; the $ is the request vars proc + headers: '((content-type text/plain)))) ;; This is the /ctrl path where data is handed to the server and ;; responses ((equal? (uri-path (request-uri (current-request))) '(/ "ctrl")) (let* ((packet (db:string->obj dat)) @@ -239,15 +244,79 @@ (let ((match (string-search (regexp "(.*)<.body>") res))) (debug:print-info 11 "match=" match) (let ((final (cadr match))) (debug:print-info 11 "final=" final) final))))))) + +;; Send "cmd" with json payload "params" to serverdat and receive result +;; +(define (http-transport:client-api-send-receive serverdat cmd params #!key (numretries 30)) + (let* ((fullurl (if (list? serverdat) + (cadddr serverdat) ;; this is the uri for /api + (begin + (debug:print 0 "FATAL ERROR: http-transport:client-send-receive called with no server info") + (exit 1)))) + (res #f)) + (handle-exceptions + exn + (begin + ;; TODO: Send this output to a log file so it isn't lost when running as daemon + (print "ERROR IN http-transport:client-send-receive " ((condition-property-accessor 'exn 'message) exn)) + (thread-sleep! 2) + (if (> numretries 0) + (http-transport:client-api-send-receive serverdat cmd params numretries: (- numretries 1)))) + (begin + (debug:print-info 11 "fullurl=" fullurl "\n") + ;; set up the http-client here + (max-retry-attempts 5) + ;; consider all requests indempotent + (retry-request? (lambda (request) + #t)) ;; (thread-sleep! (/ (if (> numretries 100) 100 numretries) 10)) + ;; (set! numretries (- numretries 1)) + ;; #t)) + ;; send the data and get the response + ;; extract the needed info from the http data and + ;; process and return it. + + ;; (with-input-from-request "http://localhost/echo-service" + ;; '((test . "value")) read-string) + + (let* ((send-recieve (lambda () + (mutex-lock! *http-mutex*) + (set! res (with-input-from-request + fullurl + (list (cons 'key "thekey") + (cons 'cmd cmd) + (cons 'params params)) + read-string)) + (close-all-connections!) + (mutex-unlock! *http-mutex*))) + (time-out (lambda () + (thread-sleep! 45) + (if (not res) + (begin + (debug:print 0 "WARNING: communication with the server timed out.") + (mutex-unlock! *http-mutex*) + (http-transport:client-api-send-receive serverdat cmd params numretries: (- numretries 1)) + (if (< numretries 3) ;; on last try just exit + (begin + (debug:print 0 "ERROR: communication with the server timed out. Giving up.") + (exit 1))))))) + (th1 (make-thread send-recieve "with-input-from-request")) + (th2 (make-thread time-out "time out"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + (thread-terminate! th2) + (debug:print-info 11 "got res=" res) + res))))) (define (http-transport:client-connect iface port) (let* ((login-res #f) (uri-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl")))) - (serverdat (list iface port uri-dat))) + (uri-api-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/api")))) + (serverdat (list iface port uri-dat uri-api-dat))) (set! login-res (client:login serverdat)) (if (and (not (null? login-res)) (car login-res)) (begin (debug:print-info 2 "Logged in and connected to " iface ":" port) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -25,10 +25,12 @@ (declare (uses client)) (declare (uses tests)) (declare (uses genexample)) (declare (uses daemon)) (declare (uses db)) +(declare (uses mt)) +(declare (uses api)) (define *db* #f) ;; this is only for the repl, do not use in general!!!! (include "common_records.scm") (include "key_records.scm") Index: mt.scm ================================================================== --- mt.scm +++ mt.scm @@ -85,5 +85,10 @@ (define (mt:get-prereqs-not-met run-id waitons ref-item-path #!key (mode 'normal)) (db:get-prereqs-not-met run-id waitons ref-item-path mode: mode)) (define (mt:get-run-stats) (cdb:remote-run db:get-run-stats #f)) + +;;====================================================================== +;; S T E P S +;;====================================================================== + Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -908,10 +908,34 @@ (let ((dparts (string-split dir "/")) (count (if (null? params) 1 (car params)))) (conc "/" (string-intersperse (take dparts (- (length dparts) count)) "/")))) + +(define (runs:recursive-delete-with-error-msg real-dir) + (if (> (system (conc "rm -rf " real-dir)) 0) + (debug:print 0 "ERROR: There was a problem removing " real-dir " with rm -f"))) + +(define (runs:safe-delete-test-dir real-dir) + ;; first delete all sub-directories + (directory-fold + (lambda (f x) + (let ((fullname (conc real-dir "/" f))) + (if (directory? fullname)(runs:recursive-delete-with-error-msg fullname))) + (+ 1 x)) + 0 real-dir) + ;; then files other than *testdat.db* + (directory-fold + (lambda (f x) + (let ((fullname (conc real-dir "/" f))) + (if (not (string-search (regexp "testdat.db") f)) + (runs:recursive-delete-with-error-msg fullname))) + (+ 1 x)) + 0 real-dir) + ;; then the entire directory + (runs:recursive-delete-with-error-msg real-dir)) + ;; Remove runs ;; fields are passing in through ;; action: ;; 'remove-runs ;; 'set-state-status @@ -989,11 +1013,13 @@ ((remove-runs) (debug:print-info 0 "test: " test-name " itest-state: " test-state) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) (begin (if (not (hash-table-ref/default test-retry-time test-fulln #f)) - (hash-table-set! test-retry-time test-fulln (current-seconds))) + (begin + ;; want to set to REMOVING BUT CANNOT do it here? + (hash-table-set! test-retry-time test-fulln (current-seconds)))) (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give ;; up and blow it away. (begin @@ -1006,20 +1032,19 @@ ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... (if (null? tal) (loop new-test-dat tal) (loop (car tal)(append tal (list new-test-dat))))) (begin - (cdb:remote-run db:delete-test-records db #f (db:test-get-id test)) + (cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) "REMOVING" "LOCKED" #f) (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) (if (and real-dir (> (string-length real-dir) 5) (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. (begin ;; let* ((realpath (resolve-pathname run-dir))) (debug:print-info 1 "Recursively removing " real-dir) (if (file-exists? real-dir) - (if (> (system (conc "rm -rf " real-dir)) 0) - (debug:print 0 "ERROR: There was a problem removing " real-dir " with rm -f")) + (runs:safe-delete-test-dir real-dir) (debug:print 0 "WARNING: test dir " real-dir " appears to not exist or is not readable"))) (if real-dir (debug:print 0 "WARNING: directory " real-dir " does not exist") (debug:print 0 "WARNING: no real directory corrosponding to link " run-dir ", nothing done"))) (if (symbolic-link? run-dir) @@ -1038,10 +1063,12 @@ (delete-directory run-dir))) (if run-dir (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) )) + ;; Only delete the records *after* removing the directory. If things fail we have a record + (cdb:remote-run db:delete-test-records db #f (db:test-get-id test)) (if (not (null? tal)) (loop (car tal)(cdr tal)))))) ((set-state-status) (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status)) (cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) (car state-status)(cadr state-status) #f)