Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -1,6 +1,8 @@ # make install CSCOPTS='-accumulate-profile -profile-name $(PWD)/profile-ww$(shell date +%V.%u)' +# rm .o ; make install CSCOPTS='-profile' ; ... ; chicken-profile | less + PREFIX=$(PWD) CSCOPTS= INSTALL=install SRCFILES = common.scm items.scm launch.scm \ ods.scm runconfig.scm server.scm configf.scm \ Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -107,11 +107,11 @@ (define (api:execute-requests dbstruct dat) (handle-exceptions exn (let ((call-chain (get-call-chain))) (print-call-chain (current-error-port)) - (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens (if (not (vector? dat)) ;; it is an error to not receive a vector (vector #f #f "remote must be called with a vector") (vector ;; return a vector + the returned data structure #t Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -3053,14 +3053,14 @@ (string-substitute (regexp "=") "_" (base64:base64-encode (z3:encode-buffer (with-output-to-string - (lambda ()(serialize obj))))) + (lambda ()(serialize obj))))) ;; BB: serialize - this is what causes problems between different builds of megatest communicating. serialize is sensitive to binary image of mtest. #t)) ((zmq nmsg)(with-output-to-string (lambda ()(serialize obj)))) - (else obj))) + (else obj))) ;; rpc (define (db:string->obj msg #!key (transport 'http)) (case transport ;; ((fs) msg) ((http fs) @@ -3073,11 +3073,11 @@ (lambda ()(deserialize))) (begin (debug:print-error 0 *default-log-port* "reception failed. Received " msg " but cannot translate it.") msg))) ;; crude reply for when things go awry ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize)))) - (else msg))) + (else msg))) ;; rpc (define (db:test-set-status-state dbstruct run-id test-id status state msg) (let ((dbdat (db:get-db dbstruct run-id))) (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) (db:general-call dbdat 'set-test-start-time (list test-id))) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -146,11 +146,11 @@ -update-meta : update the tests metadata for all tests -setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are overwritten by values set in config files. -server -|hostname : start the server (reduces contention on megatest.db), use - to automatically figure out hostname - -transport http|zmq : use http or zmq for transport (default is http) + -transport http|rpc : use http or rpc for transport (default is http) -daemonize : fork into background and disconnect from stdin/out -log logfile : send stdout and stderr to logfile -list-servers : list the servers -stop-server id : stop server specified by id (see output of -list-servers), use 0 to kill all @@ -817,11 +817,11 @@ (if status "alive" "dead") transport) (if (or (equal? id sid) (equal? sid 0)) ;; kill all/any (begin (debug:print-info 0 *default-log-port* "Attempting to stop server with pid " pid) - (tasks:kill-server status hostname pullport pid transport))))) + (tasks:kill-server hostname pid))))) servers) (debug:print-info 1 *default-log-port* "Done with listservers") (set! *didsomething* #t) (exit)) ;; must do, would have to add checks to many/all calls below (exit)))) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -1324,11 +1324,11 @@ ;; now *if* -run-wait we wait for all tests to be done ;; Now wait for any RUNNING tests to complete (if in run-wait mode) (thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) (prev-num-running 0)) - ;; (debug:print 0 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running) + (BB> "num-running=" num-running ", prev-num-running=" prev-num-running) (if (and (or (args:get-arg "-run-wait") (equal? (configf:lookup *configdat* "setup" "run-wait") "yes")) (> num-running 0)) (begin ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -101,10 +101,11 @@ result))) ;; Given a run id start a server process ### NOTE ### > file 2>&1 ;; if the run-id is zero and the target-host is set ;; try running on that host +;; incidental: rotate logs in logs/ dir. ;; (define (server:run run-id) (let* ((curr-host (get-host-name)) (curr-ip (server:get-best-guess-address curr-host)) (target-host (configf:lookup *configdat* "server" "homehost" )) @@ -116,12 +117,15 @@ "") " -m testsuite:" testsuite))) ;; (conc " >> " logfile " 2>&1 &"))))) (debug:print 0 *default-log-port* "INFO: Starting server (" cmdln ") as none running ...") (push-directory *toppath*) (if (not (directory-exists? "logs"))(create-directory "logs")) + ;; Rotate logs, logic: - ;; if > 500k and older than 1 week, remove previous compressed log and compress this log + ;; if > 500k and older than 1 week: + ;; remove previous compressed log and compress this log + ;; (directory-fold (lambda (file rem) (if (and (string-match "^.*.log" file) (> (file-size (conc "logs/" file)) 200000)) (let ((gzfile (conc "logs/" file ".gz"))) @@ -141,19 +145,20 @@ (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) (not (equal? curr-ip target-host))) (begin (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) (setenv "TARGETHOST" target-host))) + (setenv "TARGETHOST_LOGF" logfile) (common:wait-for-normalized-load 4 " delaying server start due to load" remote-host: (get-environment-variable "TARGETHOST")) ;; do not try starting servers on an already overloaded machine, just wait forever (system (conc "nbfake " cmdln)) (unsetenv "TARGETHOST_LOGF") (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) ;; (system cmdln) (pop-directory))) -(define (server:get-client-signature) +(define (server:get-client-signature) ;; BB> why is this proc named "get-"? it returns nothing -- set! has not return value. (if *my-client-signature* *my-client-signature* (let ((sig (server:mk-signature))) (set! *my-client-signature* sig) *my-client-signature*))) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -229,10 +229,18 @@ (define (tasks:server-force-clean-run-record mdb run-id iface port tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=? AND interface=? AND port=?;" (conc "defunct" tag) run-id iface port)) + +;; BB> adding missing func for --list-servers +(define (tasks:server-deregister mdb hostname #!key (pullport #f) (pid #f) (action #f)) ;;pullport pid: pid action: 'delete)) + (if (eq? action 'delete) + (sqlite3:execute mdb "DELETE FROM servers WHERE pid=? AND port=? AND hostname=?;" pid pullport hostname) + (sqlite3:execute mdb "UPDATE servers SET state='defunct', heartbeat=strftime('%s','now') WHERE hostname=? AND pid=?;" + hostname pid))) + (define (tasks:server-delete-records-for-this-pid mdb tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE hostname=? AND pid=?;" (conc "defunct" tag) (get-host-name) (current-process-id))) (define (tasks:server-delete-record mdb server-id tag)