ADDED mtserve.scm Index: mtserve.scm ================================================================== --- /dev/null +++ mtserve.scm @@ -0,0 +1,266 @@ +;; Copyright 2006-2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . +;; + +;; (include "common.scm") +;; (include "megatest-version.scm") + +;; fake out readline usage of toplevel-command +(define (toplevel-command . a) #f) + +(use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:) + readline apropos json directory-utils typed-records + srfi-18 extras format (prefix pkts pkts:)) + +(declare (uses common)) +(declare (uses megatest-version)) +(declare (uses margs)) +(declare (uses server)) +;; (declare (uses daemon)) + +(declare (uses db)) +(import db) + +(declare (uses portlogger)) +(import portlogger) + +(declare (uses nmsg-transport)) +(import (prefix nmsg-transport nmsg:)) + +(include "common_records.scm") +(include "key_records.scm") +(include "db_records.scm") +(include "run_records.scm") +(include "megatest-fossil-hash.scm") + +(define *usage-log-file* #f) ;; put path to file for logging usage in this var in the ~/.megatestrc file +(define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file + +;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file +;; +(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.mtserverrc"))) + (if (file-exists? debugcontrolf) + (load debugcontrolf))) + +;; usage logging, careful with this, it is not designed to deal with all real world challenges! +;; +(if (and *usage-log-file* + (file-write-access? *usage-log-file*)) + (with-output-to-file + *usage-log-file* + (lambda () + (print + (if *usage-use-seconds* + (current-seconds) + (time->string + (seconds->local-time (current-seconds)) + "%Yww%V.%w %H:%M:%S")) + " " + (current-user-name) " " + (current-directory) " " + "\"" (string-intersperse (argv) " ") "\"")) + #:append)) + +;; Disabled help items +;; -rollup : (currently disabled) fill run (set by :runname) with latest test(s) +;; from prior runs with same keys +;; -daemonize : fork into background and disconnect from stdin/out + +(define help (conc " +Megatest, documentation at http://www.kiatoa.com/fossils/megatest + version " megatest-version " + license GPL, Copyright Matt Welland 2006-2017 + +Usage: mtserver [options] + -h : this help + -manual : show the Megatest user manual + -version : print megatest version (currently " megatest-version ") + +Launching and managing runs + -run : run all tests or as specified by -testpatt + -server -|hostname : start the server (reduces contention on megatest.db), use + - to automatically figure out hostname + -log logfile : send stdout and stderr to logfile + -list-servers : list the servers + -kill-servers : kill all servers + -repl : start a repl (useful for extending megatest) + -ping run-id|host:port : ping server, exit with 0 if found + -debug N|N,M,O... : enable debug 0-N or N and M and O ... + +Examples + +# Get test path, use '.' to get a single path or a specific path/file pattern +megatest -test-files 'logs/*.log' -target ubuntu/n%/no% -runname w49% -testpatt test_mt% + +Called as " (string-intersperse (argv) " ") " +Version " megatest-version ", built from " megatest-fossil-hash )) + +;; -gui : start a gui interface +;; -config fname : override the runconfigs file with fname + +;; process args +(define remargs (args:get-args + (argv) + (list "-start-dir" + "-server" + "-port" + "-log" + ) + (list "-h" "-help" "--help" + "-manual" + "-version" + "-list-servers" + "-kill-servers" + "-v" ;; verbose 2, more than normal (normal is 1) + "-q" ;; quiet 0, errors/warnings only + + "-diff-rep" + ) + args:arg-hash + 0)) + +;; Add args that use remargs here +;; +(if (and (not (null? remargs)) + (not (or + (args:get-arg "-runstep") + (args:get-arg "-envcap") + (args:get-arg "-envdelta") + ) + )) + (debug:print-error 0 *default-log-port* "Unrecognised arguments: " (string-intersperse (if (list? remargs) remargs (argv)) " "))) + +;; before doing anything else change to the start-dir if provided +;; +(if (args:get-arg "-start-dir") + (if (file-exists? (args:get-arg "-start-dir")) + (let ((fullpath (common:real-path (args:get-arg "-start-dir")))) + (setenv "PWD" fullpath) + (change-directory fullpath)) + (begin + (debug:print-error 0 *default-log-port* "non-existant start dir " (args:get-arg "-start-dir") " specified, exiting.") + (exit 1)))) + +;; immediately set MT_TARGET if -reqtarg or -target are available +;; +(let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target")))) + (if targ (setenv "MT_TARGET" targ))) + +;; The watchdog is to keep an eye on things like db sync etc. +;; + +;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage +(define *watchdog* (make-thread + (lambda () + (handle-exceptions + exn + (begin + (print-call-chain) + (print " message: " ((condition-property-accessor 'exn 'message) exn))) + (common:watchdog))) + "Watchdog thread")) +;;====================================================================== +;; Strive for clean exit handling +;;====================================================================== + +(define (server-exit-procedure) + (on-exit (lambda () + ;; close the databases, ensure the pkt is removed! + 0))) + +;; Copied from the SDL2 examples. +;; +;; Schedule quit! to be automatically called when your program exits normally. +(on-exit server-exit-procedure) + +;; Install a custom exception handler that will call quit! and then +;; call the original exception handler. This ensures that quit! will +;; be called even if an unhandled exception reaches the top level. +(current-exception-handler + (let ((original-handler (current-exception-handler))) + (lambda (exception) + (server-exit-procedure) + (original-handler exception)))) + +;;(if (not (args:get-arg "-server")) +;; (thread-start! *watchdog*)) ;; if starting a server; wait till we get to running state before kicking off watchdog +(let* ((no-watchdog-args + '("-list-runs" + "-testdata-csv" + "-list-servers" + "-server" + "-list-disks" + "-list-targets" + "-show-runconfig" + ;;"-list-db-targets" + "-show-runconfig" + "-show-config" + "-show-cmdinfo" + "-cleanup-db")) + (no-watchdog-args-vals (filter (lambda (x) x) + (map args:get-arg no-watchdog-args))) + (start-watchdog (null? no-watchdog-args-vals))) + ;;(BB> "no-watchdog-args="no-watchdog-args "no-watchdog-args-vals="no-watchdog-args-vals) + (if start-watchdog + (thread-start! *watchdog*))) + + +;; bracket open-output-file with code to make leading directory if it does not exist and handle exceptions +(define (open-logfile logpath) + (condition-case + (let* ((log-dir (or (pathname-directory logpath) "."))) + (if (not (directory-exists? log-dir)) + (system (conc "mkdir -p " log-dir))) + (open-output-file logpath)) + (exn () + (debug:print-error 0 *default-log-port* "Could not open log file for write: "logpath) + (define *didsomething* #t) + (exit 1)))) + +;; this segment will run launch:setup only if -log is not set. This is fairly safe as servers are not +;; manually started and thus should never be started in a non-megatest area. Thus no need to handle situation +;; where (launch:setup) returns #f? +;; +(if (or (args:get-arg "-log")(args:get-arg "-server")) ;; redirect the log always when a server + (handle-exceptions + exn + (begin + (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn)) + ) + (let* ((tl (or (args:get-arg "-log")(launch:setup))) ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified + (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name + (conc tl "/logs/server-" (current-process-id) "-" (get-host-name) ".log"))) + (oup (open-logfile logf))) + (if (not (args:get-arg "-log")) + (hash-table-set! args:arg-hash "-log" logf)) ;; fake out future queries of -log + (debug:print-info 0 *default-log-port* "Sending log output to " logf) + (set! *default-log-port* oup)))) + +(if (or (args:get-arg "-h") + (args:get-arg "-help") + (args:get-arg "--help")) + (begin + (print help) + (exit))) + +(if (args:get-arg "-version") + (begin + (print (common:version-signature)) ;; (print megatest-version) + (exit))) + +(define *didsomething* #f) + ADDED util.scm Index: util.scm ================================================================== --- /dev/null +++ util.scm @@ -0,0 +1,423 @@ +;;====================================================================== +;; Copyright 2018, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . +;; +;;====================================================================== + +;;====================================================================== +;; Tests +;;====================================================================== + +(declare (unit util)) + +(declare (uses common)) + +(module util + * + +(import chicken scheme data-structures extras srfi-13 ports ) + + +;;====================================================================== +;; L O C K I N G M E C H A N I S M S +;;====================================================================== + +;; faux-lock is deprecated. Please use simple-lock below +;; +(define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t)) + (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count + (if (> wait-time 0) + (begin + (thread-sleep! 1) + (if (eq? wait-time 1) ;; only one second left, steal the lock + (begin + (debug:print-info 0 *default-log-port* "stealing lock for " keyname) + (common:faux-unlock keyname force: #t))) + (common:faux-lock keyname wait-time: (- wait-time 1))) + #f) + (begin + (rmt:no-sync-set keyname (conc (current-process-id))) + (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))))) + +(define (common:faux-unlock keyname #!key (force #f)) + (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))) + (begin + (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname)) + #t) + #f)) + +;; simple lock. improve and converge on this one. +;; +(define (common:simple-lock keyname) + (rmt:no-sync-get-lock keyname)) + +(define (common:run-a-command cmd #!key (with-vars #f)) + (let* ((pre-cmd (dtests:get-pre-command)) + (post-cmd (dtests:get-post-command)) + (fullcmd (if (or pre-cmd post-cmd) + (conc pre-cmd cmd post-cmd) + (conc "viewscreen " cmd)))) + (debug:print-info 02 *default-log-port* "Running command: " fullcmd) + (if with-vars + (common:without-vars cmd) + (common:without-vars fullcmd "MT_.*")))) + +;; ideally put all this info into the db, no need to preserve it across moving homehost +;; +;; return list of +;; ( reachable? cpuload update-time ) +(define (common:get-host-info hostname) + (let* ((loadinfo (rmt:get-latest-host-load hostname)) ;; if this host happens to have been recently used by a test reuse the load data + (load (car loadinfo)) + (load-sample-time (cdr loadinfo)) + (load-sample-age (- (current-seconds) load-sample-time)) + (loadinfo-timeout-seconds 6) ;; this was 20 seconds, seems way too lax. Switch to 6 seconds + (host-last-update-timeout-seconds 4) + (host-rec (hash-table-ref/default *host-loads* hostname #f)) + ) + (cond + ((< load-sample-age loadinfo-timeout-seconds) + (list #t + load-sample-time + load)) + ((and host-rec + (< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds))) + (list #t + (host-last-update host-rec) + (host-last-cpuload host-rec ))) + ((common:unix-ping hostname) + (list #t + (current-seconds) + (alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname)))) ;; this is cheaper than you might think. get-normalized-cpu-load is cached for up to 5 seconds + (else + (list #f 0 -1) ;; bad host, don't use! + )))) + +(define (get-uname . params) + (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params))))) + (uname #f)) + (if (null? (car uname-res)) + "unknown" + (caar uname-res)))) + +(define (get-unix-df path) + (let* ((df-results (process:cmd-run->list (conc "df " path))) + (space-rx (regexp "([0-9]+)\\s+([0-9]+)%")) + (freespc #f)) + ;; (write df-results) + (for-each (lambda (l) + (let ((match (string-search space-rx l))) + (if match + (let ((newval (string->number (cadr match)))) + (if (number? newval) + (set! freespc newval)))))) + (car df-results)) + freespc)) + +;; given path get free space, allows override in [setup] +;; with free-space-script /path/to/some/script.sh +;; +(define (get-df path) + (if (configf:lookup *configdat* "setup" "free-space-script") + (with-input-from-pipe + (conc (configf:lookup *configdat* "setup" "free-space-script") " " path) + (lambda () + (let ((res (read-line))) + (if (string? res) + (string->number res))))) + (get-unix-df path))) + +(define (std-exit-procedure) + (on-exit (lambda () 0)) + ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*) + (let ((no-hurry (if *time-to-exit* ;; hurry up + #f + (begin + (set! *time-to-exit* #t) + #t)))) + (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.") + (if (and no-hurry (debug:debug-mode 18)) + (rmt:print-db-stats)) + (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds + (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated + (if *task-db* + (let ((db (cdr *task-db*))) + (if (sqlite3:database? db) + (begin + (sqlite3:interrupt! db) + (sqlite3:finalize! db #t) + ;; (vector-set! *task-db* 0 #f) + (set! *task-db* #f))))) + (http-client#close-all-connections!) + ;; (if (and *runremote* + ;; (remote-conndat *runremote*)) + ;; (begin + ;; (http-client#close-all-connections!))) ;; for http-client + (if (not (eq? *default-log-port* (current-error-port))) + (close-output-port *default-log-port*)) + (set! *default-log-port* (current-error-port))) "Cleanup db exit thread")) + (th2 (make-thread (lambda () + (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...") + (if no-hurry + (begin + (thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff + (begin + (thread-sleep! 2))) + (debug:print 4 *default-log-port* " ... done") + ) + "clean exit"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + ) + ) + + 0) + +;; from metadat lookup MEGATEST_VERSION +;; +(define (common:get-last-run-version) ;; RADT => How does this work in send-receive function??; assume it is the value saved in some DB + (rmt:get-var "MEGATEST_VERSION")) + +(define (common:set-last-run-version) + (rmt:set-var "MEGATEST_VERSION" (common:version-signature))) + +;;====================================================================== +;; D I S K S P A C E +;;====================================================================== + +(define (common:get-disk-space-used fpath) + (with-input-from-pipe (conc "/usr/bin/du -s " fpath) read)) + +(define (common:check-space-in-dir dirpath required) + (let* ((dbspace (if (directory? dirpath) + (get-df dirpath) + 0))) + (list (> dbspace required) + dbspace + required + dirpath))) + +;; check space in dbdir and in megatest dir +;; returns: ok/not dbspace required-space +;; +(define (common:check-db-dir-space) + (let* ((required (string->number + (or (configf:lookup *configdat* "setup" "dbdir-space-required") + "100000"))) + (dbdir (common:get-db-tmp-area)) ;; (db:get-dbdir)) + (tdbspace (common:check-space-in-dir dbdir required)) + (mdbspace (common:check-space-in-dir *toppath* required))) + (sort (list tdbspace mdbspace) (lambda (a b) + (< (cadr a)(cadr b)))))) + +;; check available space in dbdir, exit if insufficient +;; +(define (common:check-db-dir-and-exit-if-insufficient) + (let* ((spacedat (car (common:check-db-dir-space))) ;; look only at worst for now + (is-ok (car spacedat)) + (dbspace (cadr spacedat)) + (required (caddr spacedat)) + (dbdir (cadddr spacedat))) + (if (not is-ok) + (begin + (debug:print-error 0 *default-log-port* "Insufficient space in " dbdir ", require " required ", have " dbspace ", exiting now.") + (exit 1))))) + +;; paths is list of lists ((name path) ... ) +;; +(define (common:get-disk-with-most-free-space disks minsize) + (let ((best #f) + (bestsize 0)) + (for-each + (lambda (disk-num) + (let* ((dirpath (cadr (assoc disk-num disks))) + (freespc (cond + ((not (directory? dirpath)) + (if (common:low-noise-print 300 "disks not a dir " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it.")) + -1) + ((not (file-write-access? dirpath)) + (if (common:low-noise-print 300 "disks not writeable " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it.")) + -1) + ((not (eq? (string-ref dirpath 0) #\/)) + (if (common:low-noise-print 300 "disks not a proper path " disk-num) + (debug:print 0 *default-log-port* "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it.")) + -1) + (else + (get-df dirpath))))) + (if (> freespc bestsize) + (begin + (set! best (cons disk-num dirpath)) + (set! bestsize freespc))))) + (map car disks)) + (if (and best (> bestsize minsize)) + best + #f))) ;; #f means no disk candidate found + +;; see defstruct host at top of file. +;; host: reachable last-update last-used last-cpuload +;; +(define (common:update-host-loads-table hosts-raw) + (let* ((hosts (filter (lambda (x) + (string-match (regexp "^\\S+$") x)) + hosts-raw))) + (for-each + (lambda (hostname) + (let* ((rec (let ((h (hash-table-ref/default *host-loads* hostname #f))) + (if h + h + (let ((h (make-host))) + (hash-table-set! *host-loads* hostname h) + h)))) + (host-info (common:get-host-info hostname)) + (is-reachable (car host-info)) + (last-reached-time (cadr host-info)) + (load (caddr host-info))) + (host-reachable-set! rec is-reachable) + (host-last-update-set! rec last-reached-time) + (host-last-cpuload-set! rec load))) + hosts))) + +;; go through the hosts from least recently used to most recently used, pick the first that meets the load criteral from the +;; [host-rules] section. +;; +(define (common:get-least-loaded-host hosts-raw host-type configdat) + (let* ((rdat (configf:lookup configdat "host-rules" host-type)) + (rules (common:val->alist (or rdat "") convert: #t)) ;; maxnload, maxnjobs, maxjobrate + (maxnload (common:alist-ref/default 'maxnload rules 1.5)) ;; max normalized load + (maxnjobs (common:alist-ref/default 'maxnjobs rules 1.5)) ;; max normalized number of jobs + (maxjobrate (common:alist-ref/default 'maxjobrate rules (/ 1 6))) ;; max rate of submitting jobs to a given host in jobs/second + (hosts (filter (lambda (x) + (string-match (regexp "^\\S+$") x)) + hosts-raw)) + ;; (best-host #f) + (get-rec (lambda (hostname) + ;; (print "get-rec hostname=" hostname) + (let ((h (hash-table-ref/default *host-loads* hostname #f))) + (if h + h + (let ((h (make-host))) + (hash-table-set! *host-loads* hostname h) + h))))) + (best-load 99999) + (curr-time (current-seconds)) + (get-hosts-sorted (lambda (hosts) + (sort hosts (lambda (a b) + (let ((a-rec (get-rec a)) + (b-rec (get-rec b))) + ;; (print "a=" a " a-rec=" a-rec " host-last-used=" (host-last-used a-rec)) + ;; (print "b=" b " b-rec=" b-rec " host-last-used=" (host-last-used b-rec)) + (< (host-last-used a-rec) + (host-last-used b-rec)))))))) + (debug:print 0 *default-log-port* "INFO: hosts-sorted=" (get-hosts-sorted hosts)) + (if (null? hosts) + #f ;; no hosts to select from. All done and giving up now. + (let ((hosts-sorted (get-hosts-sorted hosts))) + (common:update-host-loads-table hosts) + (let loop ((hostname (car hosts-sorted)) + (tal (cdr hosts-sorted)) + (best-host #f)) + (let* ((rec (get-rec hostname)) + (reachable (host-reachable rec)) + (load (host-last-cpuload rec)) + (last-used (host-last-used rec)) + (delta (- curr-time last-used)) + (job-rate (if (> delta 0) + (/ 1 delta) + 999)) ;; jobs per second + (new-best + (cond + ((not reachable) + (debug:print 0 *default-log-port* "Skipping host " hostname " as it cannot be reached.") + best-host) + ((and (< load maxnload) ;; load is acceptable + (< job-rate maxjobrate)) ;; job rate is acceptable + (set! best-load load) + hostname) + (else best-host)))) + (debug:print 0 *default-log-port* "INFO: Trying host " hostname " with load " load ", last used " delta " seconds ago, with job-rate " job-rate " for running a test." ) + (if new-best + (begin ;; found a host, return it + (debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate) + (host-last-used-set! rec curr-time) + new-best) + (if (null? tal) #f (loop (car tal)(cdr tal) best-host))))))))) + +(define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)) + (let* ((loadavg (common:get-cpu-load remote-host)) + (numcpus (if (< 1 numcpus-in) ;; not possible + (common:get-num-cpus remote-host) + numcpus-in)) + (maxload (max maxload-in 0.5)) ;; so maxload must be greater than 0.5 for now BUG - FIXME? + (first (car loadavg)) + (next (cadr loadavg)) + (adjload (* maxload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude fallback is to at least use 1 + (loadjmp (- first next)) + (adjwait (+ (random 10)(/ (- 1000 count) 10) waitdelay))) ;; add some randomness to the time to break any alignment where netbatch dumps many jobs to machines simultaneously + (cond + ((and (> first adjload) + (> count 0)) + (debug:print-info 0 *default-log-port* "server start delayed " adjwait " seconds due to load " first " exceeding max of " adjload " on server " (or remote-host (get-host-name)) " (normalized load-limit: " maxload ") " (if msg msg "")) + (thread-sleep! adjwait) + (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)) + ((and (> loadjmp numcpus) + (> count 0)) + (debug:print-info 0 *default-log-port* "waiting " adjwait " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg "")) + (thread-sleep! adjwait) + (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))))) + +(define (common:wait-for-homehost-load maxload msg) + (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. + #f + (common:get-homehost))) + (hh (if hh-dat (car hh-dat) #f)) + (numcpus (common:get-num-cpus hh))) + (common:wait-for-normalized-load maxload msg hh))) + +(define (common:get-num-cpus remote-host) + (let* ((actual-host (or remote-host (get-host-name)))) + (or (common:get-cached-info actual-host "num-cpus" age: 86400) ;; hosts had better not be changing the number of cpus too often! + (let* ((proc (lambda () + (let loop ((numcpu 0) + (inl (read-line))) + (if (eof-object? inl) + (begin + (common:write-cached-info remote-host "num-cpus" numcpu) + numcpu) + (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) + (+ numcpu 1) + numcpu) + (read-line)))))) + (result (if remote-host + (with-input-from-pipe + (conc "ssh " remote-host " cat /proc/cpuinfo") + proc) + (with-input-from-file "/proc/cpuinfo" proc)))) + (common:write-cached-info actual-host "num-cpus" result) + result)))) + +;; wait for normalized cpu load to drop below maxload +;; +(define (common:wait-for-normalized-load maxload msg remote-host) + (let ((num-cpus (common:get-num-cpus remote-host))) + (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host))) + + +) +