Overview
Comment: | Fixed server kill, reduced noise levels |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.60 |
Files: | files | file ages | folders |
SHA1: |
5526a18984498a810bef7972971256a6 |
User & Date: | mrwellan on 2014-10-09 17:09:08 |
Other Links: | branch diff | manifest | tags |
Context
2014-10-10
| ||
10:49 | bumped version check-in: 69da7e37ab user: mrwellan tags: v1.60 | |
2014-10-09
| ||
17:09 | Fixed server kill, reduced noise levels check-in: 5526a18984 user: mrwellan tags: v1.60 | |
00:03 | Merged in refactor-sqlite3-calls. Can refactor in small steps from here on out check-in: c03075b49a user: matt tags: v1.60 | |
Changes
Modified api.scm from [4a3da8fcef] to [859483d7bd].
︙ | ︙ | |||
49 50 51 52 53 54 55 | ;; These are called by the server on recipt of /api calls (define (api:execute-requests dbstruct cmd params) (case (string->symbol cmd) ;; SERVERS ((start-server) (apply server:kind-run params)) | < < < < < < < < < < < | < < < < < | 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | ;; These are called by the server on recipt of /api calls (define (api:execute-requests dbstruct cmd params) (case (string->symbol cmd) ;; SERVERS ((start-server) (apply server:kind-run params)) ((kill-server) (set! *server-run* #f)) ;; KEYS ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) ((get-keys) (db:get-keys dbstruct)) ;; TESTS ((test-toplevel-num-items) (apply db:test-toplevel-num-items dbstruct params)) |
︙ | ︙ | |||
111 112 113 114 115 116 117 | ((get-runs) (apply db:get-runs dbstruct params)) ((get-all-run-ids) (db:get-all-run-ids dbstruct)) ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) | | | 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | ((get-runs) (apply db:get-runs dbstruct params)) ((get-all-run-ids) (db:get-all-run-ids dbstruct)) ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) ((find-and-mark-incomplete) (apply db:find-and-mark-incomplete dbstruct params)) ;; STEPS ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) ;; TEST DATA ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) ((csv->test-data) (apply db:csv->test-data dbstruct params)) |
︙ | ︙ |
Modified db.scm from [8fe161caf5] to [7eb26e4c3a].
︙ | ︙ | |||
419 420 421 422 423 424 425 | ;; read the source table (sqlite3:for-each-row (lambda (a . b) (set! fromdat (cons (apply vector a b) fromdat))) fromdb full-sel) | | | 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 | ;; read the source table (sqlite3:for-each-row (lambda (a . b) (set! fromdat (cons (apply vector a b) fromdat))) fromdb full-sel) (debug:print-info 2 "found " (length fromdat) " records to sync") ;; read the target table (sqlite3:for-each-row (lambda (a . b) (hash-table-set! todat a (apply vector a b))) todb full-sel) |
︙ | ︙ | |||
748 749 750 751 752 753 754 | ;;====================================================================== ;; select end_time-now from ;; (select testname,item_path,event_time+run_duration as ;; end_time,strftime('%s','now') as now from tests where state in ;; ('RUNNING','REMOTEHOSTSTART','LAUNCED')); | | > | < < < | | > > | | | | | | | | | | | | 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 | ;;====================================================================== ;; select end_time-now from ;; (select testname,item_path,event_time+run_duration as ;; end_time,strftime('%s','now') as now from tests where state in ;; ('RUNNING','REMOTEHOSTSTART','LAUNCED')); (define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime) (let* ((db (db:get-db dbstruct run-id)) (incompleted '()) (oldlaunched '()) (toplevels '()) (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) (deadtime (if (and deadtime-str (string->number deadtime-str)) (string->number deadtime-str) 7200))) ;; two hours (if (number? ovr-deadtime)(set! deadtime ovr-deadtime)) ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes ;; ;; HOWEVER: this code in run:test seems to work fine ;; (> (- (current-seconds)(+ (db:test-get-event_time testdat) ;; (db:test-get-run_duration testdat))) ;; 600) ;; (db:delay-if-busy) (sqlite3:for-each-row (lambda (test-id run-dir uname testname item-path) (if (and (equal? uname "n/a") (equal? item-path "")) ;; this is a toplevel test ;; what to do with toplevel? call rollup? (begin (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) (debug:print-info 0 "Found old toplevel test in RUNNING state, test-id=" test-id)) (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))) db "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');" run-id deadtime) ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config ;; ;; (db:delay-if-busy) (sqlite3:for-each-row (lambda (test-id run-dir uname testname item-path) (if (and (equal? uname "n/a") (equal? item-path "")) ;; this is a toplevel test ;; what to do with toplevel? call rollup? (set! toplevels (cons (list test-id run-dir uname testname item-path run-id) toplevels)) (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))) db "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');" run-id) (debug:print-info 18 "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.") ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE. ;; ;; (db:delay-if-busy) (let* (;; (min-incompleted (filter (lambda (x) ;; (let* ((testpath (cadr x)) ;; (tdatpath (conc testpath "/testdat.db")) ;; (dbexists (file-exists? tdatpath))) ;; (or (not dbexists) ;; if no file then something wrong - mark as incomplete ;; (> (- (current-seconds)(file-modification-time tdatpath)) 600)))) ;; no change in 10 minutes to testdat.db - she's dead Jim ;; incompleted)) (min-incompleted-ids (map car incompleted)) ;; do 'em all (all-ids (append min-incompleted-ids (map car oldlaunched)))) (if (> (length all-ids) 0) (begin (debug:print 0 "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as INCOMPLETE") (sqlite3:execute db (conc "UPDATE tests SET state='INCOMPLETE' WHERE id IN (" (string-intersperse (map conc all-ids) ",") ");"))))) ;; Now do rollups for the toplevel tests ;; (for-each (lambda (toptest) (let ((test-name (list-ref toptest 3))) ;; (run-id (list-ref toptest 5))) (db:general-call db 'top-test-set-per-pf-counts (list test-name run-id test-name test-name test-name)))) ;; (list run-id test-name)))) toplevels))) ;; Clean out old junk and vacuum the database ;; ;; Ultimately do something like this: ;; ;; 1. Look at test records either deleted or part of deleted run: |
︙ | ︙ |
Modified http-transport.scm from [7ee6223861] to [6063d55ba0].
︙ | ︙ | |||
373 374 375 376 377 378 379 | (let ((start-time (current-milliseconds)) (sync-time #f) (rem-time #f)) (if *inmemdb* (db:sync-touched *inmemdb* *run-id* force-sync: #t)) (set! sync-time (- (current-milliseconds) start-time)) (set! rem-time (quotient (- 4000 sync-time) 1000)) | | | 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 | (let ((start-time (current-milliseconds)) (sync-time #f) (rem-time #f)) (if *inmemdb* (db:sync-touched *inmemdb* *run-id* force-sync: #t)) (set! sync-time (- (current-milliseconds) start-time)) (set! rem-time (quotient (- 4000 sync-time) 1000)) (debug:print 2 "SYNC: time= " sync-time ", rem-time=" rem-time) ;; ;; set_running after our first pass through and start the db ;; (if (eq? server-state 'available) (begin (tasks:server-set-state! tdb server-id "dbprep") |
︙ | ︙ |
Modified rmt.scm from [d4f16a53b3] to [7baaea28d7].
︙ | ︙ | |||
81 82 83 84 85 86 87 | (rmt:send-receive cmd run-id params)))) (let ((max-avg-qry (string->number (or (configf:lookup *configdat* "server" "server-query-threshold") "800")))) (debug:print-info 4 "no server and read-only query, bypassing normal channel") ;; (if (rmt:write-frequency-over-limit? cmd run-id)(server:kind-run run-id)) (let ((curr-max (rmt:get-max-query-average))) (if (> (cdr curr-max) max-avg-qry) (begin | | | 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | (rmt:send-receive cmd run-id params)))) (let ((max-avg-qry (string->number (or (configf:lookup *configdat* "server" "server-query-threshold") "800")))) (debug:print-info 4 "no server and read-only query, bypassing normal channel") ;; (if (rmt:write-frequency-over-limit? cmd run-id)(server:kind-run run-id)) (let ((curr-max (rmt:get-max-query-average))) (if (> (cdr curr-max) max-avg-qry) (begin (debug:print-info 3 "Max average query, " (inexact->exact (round (cdr curr-max))) "ms (" (car curr-max) ") exceeds " max-avg-qry ", try starting server ...") (server:kind-run run-id)))) (rmt:open-qry-close-locally cmd run-id params))))) (define (rmt:update-db-stats rawcmd params duration) (mutex-lock! *db-stats-mutex*) (let* ((cmd (if (eq? rawcmd 'general-call) (car params) rawcmd)) (stat-vec (hash-table-ref/default *db-stats* cmd #f))) |
︙ | ︙ | |||
397 398 399 400 401 402 403 | (define (rmt:update-run-event_time run-id) (rmt:send-receive 'update-run-event_time #f (list run-id))) (define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit) (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit))) | | | | | 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 | (define (rmt:update-run-event_time run-id) (rmt:send-receive 'update-run-event_time #f (list run-id))) (define (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit) (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit))) (define (rmt:find-and-mark-incomplete run-id ovr-deadtime) (rmt:send-receive 'find-and-mark-incomplete run-id (list run-id ovr-deadtime))) ;;====================================================================== ;; M U L T I R U N Q U E R I E S ;;====================================================================== ;; Need to move this to multi-run section and make associated changes (define (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f)) (let ((run-ids (rmt:get-all-run-ids))) (for-each (lambda (run-id) (rmt:find-and-mark-incomplete run-id ovr-deadtime)) run-ids))) ;; get the previous record for when this test was run where all keys match but runname ;; returns #f if no such test found, returns a single test record if found ;; ;; Run this at the client end since we have to connect to multiple run-id dbs ;; |
︙ | ︙ |