Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -239,10 +239,11 @@ ;; TEST DATA ((read-test-data) (apply db:read-test-data dbstruct params)) ;; MISC + ((get-latest-host-load) (apply db:get-latest-host-load dbstruct params)) ((have-incompletes?) (apply db:have-incompletes? dbstruct params)) ((login) (apply db:login dbstruct params)) ((general-call) (let ((stmtname (car params)) (run-id (cadr params)) (realparams (cddr params))) Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -1143,51 +1143,92 @@ (let ((res (system (conc "ping -c 1 " hostname " > /dev/null")))) (eq? res 0))) ;; ideally put all this info into the db, no need to preserve it across moving homehost ;; -(define (common:get-least-loaded-host hosts-raw) +;; return list of +;; ( reachable? cpuload update-time ) +(define (common:get-host-info hostname) + (let* ((loadinfo (rmt:get-latest-host-load hostname)) + (load (car loadinfo)) + (load-sample-time (cdr loadinfo)) + (load-sample-age (- (current-seconds) load-sample-time)) + (loadinfo-timeout-seconds 20) + (host-last-update-timeout-seconds 10) + (host-rec (hash-table-ref/default *host-loads* hostname #f)) + ) + (cond + ((< load-sample-age loadinfo-timeout-seconds) + ;;(print "BB> chr - 1") + (list #t + load-sample-time + load)) + ((and host-rec + (< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds))) + ;;(print "BB> chr - 2") + (list #t + (host-last-update host-rec) + (host-last-cpuload host-rec ))) + ((common:unix-ping hostname) + ;;(print "BB> chr - 3 host-rec="host-rec" lu="(if host-rec (- (current-seconds) (host-last-update host-rec)) "None")) + (list #t + (current-seconds) + (alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname)))) + (else + (list #f 0 -1))))) + +(define (common:update-host-loads-table hosts-raw) (let* ((hosts (filter (lambda (x) (string-match (regexp "^\\S+$") x)) hosts-raw))) - (if (null? hosts) - #f - ;; - ;; stategy: - ;; sort by last-used and normalized-load - ;; if last-updated > 15 seconds then re-update - ;; take the host with the lowest load with the lowest last-used (i.e. not used for longest time) - ;; - (let ((best-host #f) - (curr-time (current-seconds))) - (for-each - (lambda (hostname) - (let* ((rec (let ((h (hash-table-ref/default *host-loads* hostname #f))) - (if h - h - (let ((h (make-host))) - (hash-table-set! *host-loads* hostname h) - h)))) - ;; if host hasn't been pinged in 15 sec update it's data - (ping-good (if (< (- curr-time (host-last-update rec)) 15) - (host-reachable rec) - (or (host-reachable rec) - (begin - (host-reachable-set! rec (common:unix-ping hostname)) - (host-last-update-set! rec curr-time) - (host-last-cpuload-set! rec (common:get-normalized-cpu-load hostname)) - (host-reachable rec)))))) - (cond - ((not best-host) - (set! best-host hostname)) - ((and ping-good - (< (alist-ref 'adj-core-load (host-last-cpuload rec)) - (alist-ref 'adj-core-load - (host-last-cpuload (hash-table-ref *host-loads* best-host))))) - (set! best-host hostname))))) - hosts) - best-host)))) + (for-each + (lambda (hostname) + (let* ((rec (let ((h (hash-table-ref/default *host-loads* hostname #f))) + (if h + h + (let ((h (make-host))) + (hash-table-set! *host-loads* hostname h) + h)))) + (host-info (common:get-host-info hostname)) + (is-reachable (car host-info)) + (last-reached-time (cadr host-info)) + (load (caddr host-info))) + (host-reachable-set! rec is-reachable) + (host-last-update-set! rec last-reached-time) + (host-last-cpuload-set! rec load))) + hosts))) + +(define (common:get-least-loaded-host hosts-raw) + (let* ((hosts (filter (lambda (x) + (string-match (regexp "^\\S+$") x)) + hosts-raw)) + (best-host #f) + (best-load 99999) + (curr-time (current-seconds))) + (common:update-host-loads-table hosts) + (for-each + (lambda (hostname) + (let* ((rec + (let ((h (hash-table-ref/default *host-loads* hostname #f))) + (if h + h + (let ((h (make-host))) + (hash-table-set! *host-loads* hostname h) + h)))) + (reachable (host-reachable rec)) + (load (host-last-cpuload rec))) + (cond + ((not reachable) #f) + ((< (+ load (/ (random 250) 1000)) ;; add a random factor to keep from getting in a rut + (+ best-load (/ (random 250) 1000)) ) + (set! best-load load) + (set! best-host hostname))))) + hosts) + best-host)) + + + (define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f)) (let* ((loadavg (common:get-cpu-load remote-host)) (first (car loadavg)) (next (cadr loadavg)) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -3476,10 +3476,29 @@ (set! res (cons (vector state status count) res))) db "SELECT state,status,count(state) FROM tests WHERE run_id=? AND testname=? AND item_path='' GROUP BY state,status;" run-id testname) res)) + + +(define (db:get-latest-host-load dbstruct raw-hostname) + (let* ((hostname (string-substitute "\\..*$" "" raw-hostname)) + (res (cons -1 0)) + (mydb (db:dbdat-get-db (db:get-db dbstruct 0))) + ) + (print "BB> hostname="hostname" raw-hostname="raw-hostname" dbstruct="dbstruct" mydb="mydb) + (db:with-db + dbstruct + 0 + #f + (lambda (db) + (sqlite3:for-each-row + (lambda (cpuload update-time) (set! res (cons cpuload update-time))) + db + "SELECT tr.cpuload, tr.update_time FROM test_rundat tr, tests t WHERE t.host=? AND tr.cpuload != -1 AND tr.test_id=t.id ORDER BY tr.update_time DESC LIMIT 1;" + hostname))) res )) + (define (db:set-top-level-from-items dbstruct run-id testname) (let* ((dbdat (db:get-db dbstruct run-id)) (db (db:dbdat-get-db dbdat)) (summ (db:get-state-status-summary db run-id testname)) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -318,11 +318,11 @@ ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) (let loop ((minutes (calc-minutes)) (cpu-load (get-cpu-load)) (disk-free (get-df (current-directory)))) - (let ((new-cpu-load (let* ((load (get-cpu-load)) + (let ((new-cpu-load (let* ((load (common:get-normalized-cpu-load #f)) (delta (abs (- load cpu-load)))) (if (> delta 0.6) ;; don't bother updating with small changes load #f))) (new-disk-free (let* ((df (get-df (current-directory))) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -320,10 +320,15 @@ ;; added run-id to make looking up the correct db possible ;; (define (rmt:general-call stmtname run-id . params) (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params))) + +;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host +(define (rmt:get-latest-host-load hostname) + (rmt:send-receive 'get-latest-host-load 0 (list hostname))) + ;; (define (rmt:sync-inmem->db run-id) ;; (rmt:send-receive 'sync-inmem->db run-id '())) (define (rmt:sdb-qry qry val run-id) ;; add caching if qry is 'getid or 'getstr