Overview
Comment: | http transport with try-nanomsg changes completed first pass Validation 100% |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | try-nanomsg |
Files: | files | file ages | folders |
SHA1: |
b260c4c5d969da46f3a01ba6188986d4 |
User & Date: | matt on 2014-11-29 20:33:57 |
Other Links: | branch diff | manifest | tags |
Context
2014-11-29
| ||
21:50 | Added a little time spread on client starting servers - try to avoid startup storms check-in: b3a83a0cec user: matt tags: try-nanomsg | |
20:33 | http transport with try-nanomsg changes completed first pass Validation 100% check-in: b260c4c5d9 user: matt tags: try-nanomsg | |
13:43 | Fixed breakage caused by enabling newdashboard. Sigh. check-in: 2f12af3ef0 user: matt tags: try-nanomsg | |
Changes
Modified client.scm from [eb2cd66f13] to [8f8fe7296f].
︙ | |||
51 52 53 54 55 56 57 | 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | - + | ;; 2. We are a run tests, list runs or other interactive process and we must figure out ;; *transport-type* and *runremote* from the monitor.db ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; |
︙ | |||
84 85 86 87 88 89 90 91 92 93 94 95 96 97 | 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 | + | (debug:print-info 2 "connected to " (http-transport:server-dat-make-url start-res)) start-res) (begin ;; login failed but have a server record, clean out the record and try again (debug:print-info 0 "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) (case *transport-type* ((http)(http-transport:close-connections run-id))) (hash-table-delete! *runremote* run-id) (tasks:kill-server-run-id run-id) (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat) " client:setup (server-dat = #t)") (server:try-running run-id) (thread-sleep! 5) ;; give server a little time to start up |
︙ |
Modified common.scm from [7a1802c2d4] to [5faac0528e].
︙ | |||
64 65 66 67 68 69 70 | 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | - + | (define *task-db* #f) ;; (vector db path-to-db) (define *db-access-allowed* #t) ;; flag to allow access (define *db-access-mutex* (make-mutex)) ;; SERVER (define *my-client-signature* #f) ;; (define *transport-type* 'nmsg) |
︙ |
Modified db.scm from [e95ebbcbeb] to [ad92b83fd4].
︙ | |||
615 616 617 618 619 620 621 622 623 624 625 626 627 628 | 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 | + | ;; ;; run-ids: '(1 2 3 ...) or #f (for all) ;; (define (db:multi-db-sync run-ids . options) (let* ((toppath (launch:setup-for-run)) (dbstruct (if toppath (make-dbr:dbstruct path: toppath) #f)) (mtdb (if toppath (db:open-megatest-db))) (allow-cleanup (if run-ids #f #t)) (run-ids (if run-ids run-ids (if toppath (begin (db:delay-if-busy mtdb) (db:get-all-run-ids mtdb))))) (tdbdat (tasks:open-db)) (servers (tasks:get-all-servers (db:delay-if-busy tdbdat)))) |
︙ | |||
661 662 663 664 665 666 667 668 | 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 | + + + + + + + + - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + | (dbstruct (if toppath (make-dbr:dbstruct path: toppath local: #t) #f))) (debug:print 0 "INFO: Propagating " (length testrecs) " records for run-id=" run-id " to run specific db") (db:replace-test-records dbstruct run-id testrecs) (sqlite3:finalize! (db:dbdat-get-db (dbr:dbstruct-get-rundb dbstruct))))) run-ids))) ;; now ensure all newdb data are synced to megatest.db ;; do not use the run-ids list passed in to the function ;; (if (member 'new2old options) (let* ((maindb (make-dbr:dbstruct path: toppath local: #t)) (src-run-ids (db:get-all-run-ids (db:dbdat-get-db (db:get-db maindb 0)))) (all-run-ids (sort (delete-duplicates (cons 0 src-run-ids)) <)) (count 1) (total (length all-run-ids)) (dead-runs '())) |
︙ | |||
991 992 993 994 995 996 997 | 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 | - + | ;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' ;; b. If test dir gone, delete the test record ;; 2. Look at run records ;; a. If have tests that are not deleted, set state='unknown' ;; b. .... ;; (define (db:clean-up dbdat) |
︙ | |||
1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 | 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + | (debug:print-info 0 "Records count after clean: " tot)) count-stmt))) (map sqlite3:finalize! statements) (sqlite3:finalize! count-stmt) ;; (db:find-and-mark-incomplete db) (db:delay-if-busy dbdat) (sqlite3:execute db "VACUUM;"))) ;; Clean out old junk and vacuum the database ;; ;; Ultimately do something like this: ;; ;; 1. Look at test records either deleted or part of deleted run: ;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' ;; b. If test dir gone, delete the test record ;; 2. Look at run records ;; a. If have tests that are not deleted, set state='unknown' ;; b. .... ;; (define (db:clean-up-rundb dbdat) ;; (debug:print 0 "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") (let* ((db (db:dbdat-get-db dbdat)) (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests);")) (statements (map (lambda (stmt) (sqlite3:prepare db stmt)) (list ;; delete all tests that belong to runs that are 'deleted' ;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");") ;; delete all tests that are 'DELETED' "DELETE FROM tests WHERE state='DELETED';" )))) (db:delay-if-busy dbdat) (sqlite3:with-transaction db (lambda () (sqlite3:for-each-row (lambda (tot) (debug:print-info 0 "Records count before clean: " tot)) count-stmt) (map sqlite3:execute statements) (sqlite3:for-each-row (lambda (tot) (debug:print-info 0 "Records count after clean: " tot)) count-stmt))) (map sqlite3:finalize! statements) (sqlite3:finalize! count-stmt) ;; (db:find-and-mark-incomplete db) (db:delay-if-busy dbdat) (sqlite3:execute db "VACUUM;"))) ;; Clean out old junk and vacuum the database ;; ;; Ultimately do something like this: ;; ;; 1. Look at test records either deleted or part of deleted run: ;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' ;; b. If test dir gone, delete the test record ;; 2. Look at run records ;; a. If have tests that are not deleted, set state='unknown' ;; b. .... ;; (define (db:clean-up-maindb dbdat) ;; (debug:print 0 "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db") (let* ((db (db:dbdat-get-db dbdat)) (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM runs);")) (statements (map (lambda (stmt) (sqlite3:prepare db stmt)) (list ;; delete all tests that belong to runs that are 'deleted' ;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");") ;; delete all tests that are 'DELETED' "DELETE FROM runs WHERE state='deleted';" ))) (dead-runs '())) (sqlite3:for-each-row (lambda (run-id) (set! dead-runs (cons run-id dead-runs))) db "SELECT id FROM runs WHERE state='deleted';") (db:delay-if-busy dbdat) (sqlite3:with-transaction db (lambda () (sqlite3:for-each-row (lambda (tot) (debug:print-info 0 "Records count before clean: " tot)) count-stmt) (map sqlite3:execute statements) (sqlite3:for-each-row (lambda (tot) (debug:print-info 0 "Records count after clean: " tot)) count-stmt))) (map sqlite3:finalize! statements) (sqlite3:finalize! count-stmt) ;; (db:find-and-mark-incomplete db) (db:delay-if-busy dbdat) (sqlite3:execute db "VACUUM;") dead-runs)) ;;====================================================================== ;; M E T A G E T A N D S E T V A R S ;;====================================================================== ;; returns number if string->number is successful, string otherwise ;; also updates *global-delta* |
︙ | |||
2321 2322 2323 2324 2325 2326 2327 | 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 | - + | (z3:decode-buffer (base64:base64-decode (string-substitute (regexp "_") "=" msg #t))) (lambda ()(deserialize))) (begin (debug:print 0 "ERROR: reception failed. Received " msg " but cannot translate it.") |
︙ |
Modified http-transport.scm from [fc5c6a3a75] to [4218ba432d].
︙ | |||
281 282 283 284 285 286 287 | 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 | - + + | (set! res (vector success (db:string->obj (handle-exceptions exn (begin (set! success #f) |
︙ | |||
311 312 313 314 315 316 317 | 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 | - + - + | (thread-join! th1) (thread-terminate! th2) (debug:print-info 11 "got res=" res) (if (vector? res) (if (vector-ref res 0) res (begin ;; note: this code also called in nmsg-transport - consider consolidating it |
︙ |
Modified launch.scm from [34882953c1] to [111f911e42].
︙ | |||
456 457 458 459 460 461 462 463 464 465 466 467 468 469 | 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 | + + + + + + + | (set! *configinfo* (find-and-read-config (if (args:get-arg "-config")(args:get-arg "-config") "megatest.config") environ-patt: "env-override" given-toppath: (get-environment-variable "MT_RUN_AREA_HOME") pathenvvar: "MT_RUN_AREA_HOME")) (set! *configdat* (if (car *configinfo*)(car *configinfo*) #f)) (set! *toppath* (if (car *configinfo*)(cadr *configinfo*) #f)) (let* ((tmptransport (configf:lookup *configdat* "server" "transport")) (transport (if tmptransport (string->symbol tmptransport) 'http))) (if (member transport '(http rpc nmsg)) (set! *transport-type* transport) (begin (debug:print 0 "ERROR: Unrecognised transport " transport) (exit)))) (let ((linktree (configf:lookup *configdat* "setup" "linktree"))) ;; link tree is critical (if linktree (if (not (file-exists? linktree)) (begin (handle-exceptions exn (begin |
︙ |
Modified megatest.scm from [16af3583d8] to [f155a05495].
︙ | |||
281 282 283 284 285 286 287 288 289 290 291 | 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 | + - + + + | "-q" ;; quiet 0, errors/warnings only ) args:arg-hash 0)) ;; The watchdog is to keep an eye on things like db sync etc. ;; (define *time-zero* (current-seconds)) (define *watchdog* (make-thread (lambda () (thread-sleep! 0.05) ;; delay for startup |
︙ | |||
308 309 310 311 312 313 314 | 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 | - - + + + + + + + - + | (debug:print-info 0 "Sync of newdb to olddb for run-id " run-id " completed in " sync-time " seconds"))) ;; (if (> sync-time 10) ;; took more than ten seconds, start a server for this run ;; (begin ;; (debug:print-info 0 "Sync is taking a long time, start up a server to assist for run " run-id) ;; (server:kind-run run-id))))) (hash-table-delete! *db-local-sync* run-id))) (mutex-unlock! *db-multi-sync-mutex*)) |
︙ |
Modified rmt.scm from [7ce19b3436] to [fbd1a17708].
︙ | |||
62 63 64 65 66 67 68 69 70 | 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | + - + - + + + + - - - + + + + + + | cinfo ;; NB// can cache the answer for server running for 10 seconds ... ;; ;; (and (not (rmt:write-frequency-over-limit? cmd run-id)) (if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id) (client:setup run-id) #f)))) (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id (define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected ;; clean out old connections |
︙ | |||
333 334 335 336 337 338 339 | 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 | + - + - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + | (debug:print "ERROR: rmt:get-tests-for-run called with bad run-id=" run-id) (print-call-chain (current-error-port)) '()))) ;; IDEA: Threadify these - they spend a lot of time waiting ... ;; (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in) (let ((multi-run-mutex (make-mutex)) |
︙ |
Modified tasks.scm from [42971aa928] to [1cac97501f].
︙ | |||
359 360 361 362 363 364 365 | 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 | - + | (define (tasks:need-server run-id) (let ((forced (configf:lookup *configdat* "server" "required")) (maxqry (cdr (rmt:get-max-query-average run-id))) (threshold (string->number (or (configf:lookup *configdat* "server" "server-query-threshold") "10")))) (cond (forced (if (common:low-noise-print 60 run-id "server required is set") |
︙ |
Modified tests/fullrun/megatest.config from [26d5518426] to [ebc04ead74].
︙ | |||
29 30 31 32 33 34 35 | 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | - - - | waivercommentpatt ^WW\d+ [a-z].* incomplete-timeout 1 # yes, anything else is no run-wait yes |
︙ | |||
122 123 124 125 126 127 128 129 130 131 132 133 134 135 | 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | + + + + + + | blah MAX_ALLOWED_LOAD 200 # XTERM [system xterm] # RUNDEAD [system exit 56] [server] # Use http instead of direct filesystem access transport http # transport fs # transport nmsg synchronous 0 # If the server can't be started on this port it will try the next port until # it succeeds port 8080 # This server will keep running this number of hours after last access. |
︙ |