Megatest

Changes On Branch v1.65-diet2
Login

Changes In Branch v1.65-diet2 Excluding Merge-Ins

This is equivalent to a diff from 275adb0d10 to 82ee02c9c1

2021-01-24
19:30
deal with empty response Closed-Leaf check-in: 82ee02c9c1 user: matt tags: v1.65-diet2
15:38
Merged in latest check-in: 14db3c2571 user: matt tags: v1.65-diet2
2021-01-17
23:34
Start from low load node and add diet one by one check-in: f462c25d37 user: matt tags: v1.65-diet2
14:29
merged diet branch Closed-Leaf check-in: b6a618cc23 user: matt tags: v1.65-pre-69-diet
2020-10-03
17:58
Bad logic on re-calc of prereq needed. The (runs:testdat-prereqs-not-met testdat) is telling you that this needs recalc as it was previously not met. Thus can bypass if *was* met previously (although why would we reach here if it was met previously?). Closed-Leaf check-in: 4e90b83f0b user: matt tags: v1.65-big-refactor (unpublished)
2020-09-05
21:50
Fixed wrong use of optional that should have been key. Closed-Leaf check-in: ff41f9d1e7 user: matt tags: v1.65-prereq-qry-freq
13:41
Try reduced frequency queries for prereq not met. ==/3.5/0.83/PASS/1201/mars/== check-in: 275adb0d10 user: matt tags: v1.65-prereq-qry-freq
11:17
Merged cleanup branch back to v1.65 ==9.4/2.2/1201/WARN/mars== check-in: 0cbf1a0b26 user: matt tags: v1.65

Modified api.scm from [4fa67bb6bd] to [fc875a3d82].



1
2
3
4
5
6
7


;;======================================================================
;; Copyright 2006-2013, Matthew Welland.
;; 
;; This file is part of Megatest.
;; 
;;     Megatest is free software: you can redistribute it and/or modify
;;     it under the terms of the GNU General Public License as published by
>
>







1
2
3
4
5
6
7
8
9


;;======================================================================
;; Copyright 2006-2013, Matthew Welland.
;; 
;; This file is part of Megatest.
;; 
;;     Megatest is free software: you can redistribute it and/or modify
;;     it under the terms of the GNU General Public License as published by
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
				   cmd-in
				   (string->symbol cmd-in)))
            (params            (vector-ref dat 1))
            (start-t           (current-milliseconds))
            (readonly-mode     (dbr:dbstruct-read-only dbstruct))
            (readonly-command  (member cmd api:read-only-queries))
            (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))
            (foo               (begin
                                 (common:telemetry-log (conc "api-in:"(->string cmd))
                                                       payload: `((params . ,params)))
                                 
                                 #t))
            (res    
             (if writecmd-in-readonly-mode
                 (conc "attempt to run write command "cmd" on a read-only database")







|







163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
				   cmd-in
				   (string->symbol cmd-in)))
            (params            (vector-ref dat 1))
            (start-t           (current-milliseconds))
            (readonly-mode     (dbr:dbstruct-read-only dbstruct))
            (readonly-command  (member cmd api:read-only-queries))
            (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))
            #;(foo               (begin
                                 (common:telemetry-log (conc "api-in:"(->string cmd))
                                                       payload: `((params . ,params)))
                                 
                                 #t))
            (res    
             (if writecmd-in-readonly-mode
                 (conc "attempt to run write command "cmd" on a read-only database")
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381

382
383

384


385
386
387

388
389
390
391
392
393
394
395
396
397
398
399
400
401



402
       ;; save all stats
       (let ((delta-t (- (current-milliseconds)
			 start-t)))
	 (hash-table-set! *db-api-call-time* cmd
			  (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '()))))
       (if writecmd-in-readonly-mode
           (begin
             (common:telemetry-log (conc "api-out:"(->string cmd))
                                   payload: `((params . ,params)
                                              (ok-res . #t)))
	     (vector #f res))
           (begin
             (common:telemetry-log (conc "api-out:"(->string cmd))
                                   payload: `((params . ,params)
                                              (ok-res . #f)))
             (vector #t res))))))))

;; http-server  send-response
;;                 api:process-request
;;                    db:*
;;
;; NB// Runs on the server as part of the server loop
;;
(define (api:process-request dbstruct $) ;; the $ is the request vars proc
  (set! *api-process-request-count* (+ *api-process-request-count* 1))

  (let* ((cmd     ($ 'cmd))
	 (paramsj ($ 'params))

	 (params  (db:string->obj paramsj transport: 'http)) ;; incoming data from the POST (or is it a GET?)


	 (resdat  (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result )
	 (success (vector-ref resdat 0))
	 (res     (vector-ref resdat 1))) ;; (vector flag payload), get the payload, ignore the flag (why?)

    (if (not success)
	(debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params))
    (if (> *api-process-request-count* *max-api-process-requests*)
	(set! *max-api-process-requests* *api-process-request-count*))
    (set! *api-process-request-count* (- *api-process-request-count* 1))
    ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds
    ;; (rmt:dat->json-str
    ;;  (if (or (string? res)
    ;;          (list?   res)
    ;;          (number? res)
    ;;          (boolean? res))
    ;;      res 
    ;;      (list "ERROR, not string, list, number or boolean" 1 cmd params res)))))
    (db:obj->string res transport: 'http)))











|




|












>


>
|
>
>
|
|
|
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>

359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
       ;; save all stats
       (let ((delta-t (- (current-milliseconds)
			 start-t)))
	 (hash-table-set! *db-api-call-time* cmd
			  (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '()))))
       (if writecmd-in-readonly-mode
           (begin
             #;(common:telemetry-log (conc "api-out:"(->string cmd))
                                   payload: `((params . ,params)
                                              (ok-res . #t)))
	     (vector #f res))
           (begin
             #;(common:telemetry-log (conc "api-out:"(->string cmd))
                                   payload: `((params . ,params)
                                              (ok-res . #f)))
             (vector #t res))))))))

;; http-server  send-response
;;                 api:process-request
;;                    db:*
;;
;; NB// Runs on the server as part of the server loop
;;
(define (api:process-request dbstruct $) ;; the $ is the request vars proc
  (set! *api-process-request-count* (+ *api-process-request-count* 1))
  (debug:print 4 *default-log-port* "server-id:"  *server-id*)
  (let* ((cmd     ($ 'cmd))
	 (paramsj ($ 'params))
         (key     ($ 'key))   
	 (params  (db:string->obj paramsj transport: 'http))) ;; incoming data from the POST (or is it a GET?)
    (debug:print 4 *default-log-port* "cmd:" cmd " with params " params "key " key)
    (if (equal? key *server-id*)
	(let* ((resdat  (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result )
	       (success (vector-ref resdat 0))
	       (res     (vector-ref resdat 1))) ;; (vector flag payload), get the payload, ignore the flag (why?)
	  (debug:print 4 *default-log-port* "res:" res)
	  (if (not success)
	      (debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params))
	  (if (> *api-process-request-count* *max-api-process-requests*)
	      (set! *max-api-process-requests* *api-process-request-count*))
	  (set! *api-process-request-count* (- *api-process-request-count* 1))
	  ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds
	  ;; (rmt:dat->json-str
	  ;;  (if (or (string? res)
	  ;;          (list?   res)
	  ;;          (number? res)
	  ;;          (boolean? res))
	  ;;      res 
	  ;;      (list "ERROR, not string, list, number or boolean" 1 cmd params res)))))
	  (db:obj->string res transport: 'http))
	(begin
	  (debug:print 0 *default-log-port*   "Server refused to process request. Sever id mismatch. recived " key " expected:  " *server-id* ".\nOther arguments recived: cmd=" cmd " params = " params) 
	  (db:obj->string (conc "Server refused to process request server-id mismatch: " key ", " *server-id*) transport: 'http)))))

Modified archive.scm from [f391351322] to [a5f3e3b4ad].

389
390
391
392
393
394
395

396
397
398
399
400
401
402
              (debug:print-info 1 *default-log-port* "creating triggers after updating linktree")   
       (rmt:create-all-triggers)
))  

(define (archive:ls->list  bup-exe archive-dir internal-path)
  (let ((cmd (conc bup-exe " -d " archive-dir  " ls -l " internal-path "| awk '{print $6}' | sort"))
        (res '()))

    (handle-exceptions
     exn
     #f ;; anything goes wrong - assume the process in NOT running.
     (with-input-from-pipe 
      cmd
      (lambda ()
	(let* ((inl (read-lines)))







>







389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
              (debug:print-info 1 *default-log-port* "creating triggers after updating linktree")   
       (rmt:create-all-triggers)
))  

(define (archive:ls->list  bup-exe archive-dir internal-path)
  (let ((cmd (conc bup-exe " -d " archive-dir  " ls -l " internal-path "| awk '{print $6}' | sort"))
        (res '()))
    (debug:print-info 0 *default-log-port* cmd)
    (handle-exceptions
     exn
     #f ;; anything goes wrong - assume the process in NOT running.
     (with-input-from-pipe 
      cmd
      (lambda ()
	(let* ((inl (read-lines)))
410
411
412
413
414
415
416
417
418

419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
(define (seconds->std-time-str sec)
  (time->string 
   (seconds->local-time sec)
   "%Y-%m-%d-%H%M%S"))
 

(define (archive:get-timestamp-dir bup-exe archive-dir testsuite-name run-id test-partial-path test-last-update)
    (print (seconds->std-time-str test-last-update)) 
    (let* ((internal-path (conc testsuite-name "-" run-id))

           (ts-list (archive:ls->list  bup-exe archive-dir internal-path))
           (ds-flag (vector-ref (seconds->local-time) 8)))
           (let loop ((hed (car ts-list))
                       (tail (cdr ts-list)))
                   (if (and (null? tail) (equal? hed "latest"))
                        #f
                    (if (and (not (null? tail)) (equal? hed "latest"))
                        (loop (car tail) (cdr tail))
                  (let* ((archive-seconds (time-string->seconds hed ds-flag)))
                    (if (< (abs (- archive-seconds test-last-update)) 120)
                    (let* ((test-list (archive:ls->list  bup-exe archive-dir (conc internal-path "/" hed "/" test-partial-path))))
                           (if (> (length test-list) 0)
                               hed
                               (if (not (null? tail)) 
                                 (loop (car tail) (cdr tail))
                                 #f)))
                       (if (null? tail)







|

>









|







411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
(define (seconds->std-time-str sec)
  (time->string 
   (seconds->local-time sec)
   "%Y-%m-%d-%H%M%S"))
 

(define (archive:get-timestamp-dir bup-exe archive-dir testsuite-name run-id test-partial-path test-last-update)
    (debug:print-info 0 *default-log-port* "Test last update time:" (seconds->std-time-str test-last-update)) 
    (let* ((internal-path (conc testsuite-name "-" run-id))
           (archive-update-delay (string->number (or (configf:lookup *configdat* "archive" "test-update-delay") "900" )))  
           (ts-list (archive:ls->list  bup-exe archive-dir internal-path))
           (ds-flag (vector-ref (seconds->local-time) 8)))
           (let loop ((hed (car ts-list))
                       (tail (cdr ts-list)))
                   (if (and (null? tail) (equal? hed "latest"))
                        #f
                    (if (and (not (null? tail)) (equal? hed "latest"))
                        (loop (car tail) (cdr tail))
                  (let* ((archive-seconds (time-string->seconds hed ds-flag)))
                    (if (< (abs (- archive-seconds test-last-update)) archive-update-delay)
                    (let* ((test-list (archive:ls->list  bup-exe archive-dir (conc internal-path "/" hed "/" test-partial-path))))
                           (if (> (length test-list) 0)
                               hed
                               (if (not (null? tail)) 
                                 (loop (car tail) (cdr tail))
                                 #f)))
                       (if (null? tail)
479
480
481
482
483
484
485

486
487
488
489
490
491
492
	      (include-paths           (args:get-arg "-include"))
	      (exclude-pattern         (args:get-arg "-exclude-rx"))
	      (exclude-file            (args:get-arg "-exclude-rx-from")))
	 (if (not archive-timestamp-dir)
               (debug:print-error 0 *default-log-port* "Archive not found for testsuite" (common:get-testsuite-name) " run/test/itempath" test-partial-path)
         (begin    
	 ;; some sanity checks, move an existing path out of the way - iif it is not a toplevel with children

	 (if (and (not toplevel/children)  ;; special handling needed for toplevel with children
		  prev-test-physical-path
		  (common:file-exists? prev-test-physical-path)) ;; what to do? abort or clean up or link it in?
	     (let* ((base (pathname-directory prev-test-physical-path))
		    (dirn (pathname-file      prev-test-physical-path))
		    (newn (conc base "/." dirn)))
	       (debug:print-error 0 *default-log-port* "the old directory " prev-test-physical-path ", still exists! Moving it to " newn)







>







481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
	      (include-paths           (args:get-arg "-include"))
	      (exclude-pattern         (args:get-arg "-exclude-rx"))
	      (exclude-file            (args:get-arg "-exclude-rx-from")))
	 (if (not archive-timestamp-dir)
               (debug:print-error 0 *default-log-port* "Archive not found for testsuite" (common:get-testsuite-name) " run/test/itempath" test-partial-path)
         (begin    
	 ;; some sanity checks, move an existing path out of the way - iif it is not a toplevel with children
         (debug:print-info 0 *default-log-port* "Archive time: " archive-timestamp-dir)
	 (if (and (not toplevel/children)  ;; special handling needed for toplevel with children
		  prev-test-physical-path
		  (common:file-exists? prev-test-physical-path)) ;; what to do? abort or clean up or link it in?
	     (let* ((base (pathname-directory prev-test-physical-path))
		    (dirn (pathname-file      prev-test-physical-path))
		    (newn (conc base "/." dirn)))
	       (debug:print-error 0 *default-log-port* "the old directory " prev-test-physical-path ", still exists! Moving it to " newn)

Modified client.scm from [9da8d7475d] to [dc4c7b41e8].

86
87
88
89
90
91
92
93

94
95
96

97





98
99
100
101
102
103
104
105
106
107
      ;; through them searching for a good one.
      ;;
      (let* ((server-dat (server:get-rand-best areapath)) ;; (server:get-first-best areapath))
	     (runremote  (or area-dat *runremote*)))
	(if (not server-dat) ;; no server found
	    (client:setup-http areapath remaining-tries: (- remaining-tries 1))
	    (let ((host  (cadr  server-dat))
		  (port  (caddr server-dat)))

	      (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
	      (if (and (not area-dat)
		       (not *runremote*))

		  (set! *runremote* (make-remote)))





	      (if (and host port)
		  (let* ((start-res (case *transport-type*
				      ((http)(http-transport:client-connect host port))))
			 (ping-res  (case *transport-type* 
				      ((http)(rmt:login-no-auto-client-setup start-res)))))
		    (if (and start-res
			     ping-res)
			(let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago
			  (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
			  (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))







|
>



>
|
>
>
>
>
>
|

|







86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
      ;; through them searching for a good one.
      ;;
      (let* ((server-dat (server:get-rand-best areapath)) ;; (server:get-first-best areapath))
	     (runremote  (or area-dat *runremote*)))
	(if (not server-dat) ;; no server found
	    (client:setup-http areapath remaining-tries: (- remaining-tries 1))
	    (let ((host  (cadr  server-dat))
		  (port  (caddr server-dat))
                  (server-id (caddr (cddr server-dat))))
	      (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
	      (if (and (not area-dat)
		       (not *runremote*))
                  (begin       
		    (set! *runremote* (make-remote))
                    (let* ((server-info (remote-server-info *runremote*))) 
                      (if server-info
                        (begin
                          (remote-server-url-set! *runremote* (server:record->url server-info))
                          (remote-server-id-set! *runremote* (server:record->id server-info)))))))
	      (if (and host port server-id)
		  (let* ((start-res (case *transport-type*
				      ((http)(http-transport:client-connect host port server-id))))
			 (ping-res  (case *transport-type* 
				      ((http)(rmt:login-no-auto-client-setup start-res)))))
		    (if (and start-res
			     ping-res)
			(let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago
			  (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
			  (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))

Modified common.scm from [a82c407907] to [3a4f8a8942].

294
295
296
297
298
299
300


301
302
303
304
305
306
307
308
    (else "FAIL")))

(define (common:logpro-exit-code->test-status exit-code)
  (status-sym->string (common:logpro-exit-code->status-sym exit-code)))

(defstruct remote
  (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )


  (server-url        (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    (server:expiration-timeout))
  (force-server      #f)
  (ro-mode           #f)  
  (ro-mode-checked   #f)) ;; flag that indicates we have checked for ro-mode







>
>
|







294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
    (else "FAIL")))

(define (common:logpro-exit-code->test-status exit-code)
  (status-sym->string (common:logpro-exit-code->status-sym exit-code)))

(defstruct remote
  (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        #f) ;; (server:check-if-running *toppath*) #f))
  (server-id         #f)
  (server-info       (if *toppath* (server:check-if-running *toppath*)))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    (server:expiration-timeout))
  (force-server      #f)
  (ro-mode           #f)  
  (ro-mode-checked   #f)) ;; flag that indicates we have checked for ro-mode
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
				   (lambda (a b)
				     (< (hash-table-ref all-files a)(hash-table-ref all-files b))))
			     (- num-logs max-allowed))))
	    (for-each
	     (lambda (file)
	       (let* ((fullname (conc "logs/" file)))
		 (if (directory? fullname)
		     (debug:print-error 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.")
		     (handle-exceptions
		      exn
		      (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn)
		      (delete-file* fullname)))))
	     files)
	    (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files."))))))
  







|







567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
				   (lambda (a b)
				     (< (hash-table-ref all-files a)(hash-table-ref all-files b))))
			     (- num-logs max-allowed))))
	    (for-each
	     (lambda (file)
	       (let* ((fullname (conc "logs/" file)))
		 (if (directory? fullname)
		     (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.")
		     (handle-exceptions
		      exn
		      (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn)
		      (delete-file* fullname)))))
	     files)
	    (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files."))))))
  
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
	     (begin
	       (debug:print 0 *default-log-port* "joining threads failed. exn=" exn)
               #t) ;; just ignore it, it might have died in the meantime so joining it will throw an exception
             (thread-join! thread))
           )))
   (hash-table-keys *common:thread-punchlist*)))

(define *common:telemetry-log-state* 'startup)
(define *common:telemetry-log-socket* #f)

(define (common:telemetry-log-open)
  (if (eq? *common:telemetry-log-state* 'startup)
      (let* ((serverhost (configf:lookup *configdat* "telemetry" "host"))
             (serverport (configf:lookup-number *configdat* "telemetry" "port"))
             (user (or (get-environment-variable "USER") "unknown"))
             (host (or (get-environment-variable "HOST") "unknown")))
        (set! *common:telemetry-log-state*
              (handle-exceptions
               exn
               (begin
                 (debug:print-info 0 *default-log-port* "common-telemetry-log open udp port failure")
                 'broken)
               (if (and serverhost serverport user host)
                   (let* ((s (udp-open-socket)))
                     ;;(udp-bind! s #f 0)
                     (udp-connect! s serverhost serverport)
                     (set! *common:telemetry-log-socket* s)
                     'open)
                   'not-needed))))))
  
(define (common:telemetry-log event #!key (payload '()))
  (if (eq? *common:telemetry-log-state* 'startup)
      (common:telemetry-log-open))

  (if (eq? 'open *common:telemetry-log-state*)
      (handle-exceptions
       exn
       (begin







|
|

|



















|







3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
	     (begin
	       (debug:print 0 *default-log-port* "joining threads failed. exn=" exn)
               #t) ;; just ignore it, it might have died in the meantime so joining it will throw an exception
             (thread-join! thread))
           )))
   (hash-table-keys *common:thread-punchlist*)))

#;(define *common:telemetry-log-state* 'startup)
#;(define *common:telemetry-log-socket* #f)

#;(define (common:telemetry-log-open)
  (if (eq? *common:telemetry-log-state* 'startup)
      (let* ((serverhost (configf:lookup *configdat* "telemetry" "host"))
             (serverport (configf:lookup-number *configdat* "telemetry" "port"))
             (user (or (get-environment-variable "USER") "unknown"))
             (host (or (get-environment-variable "HOST") "unknown")))
        (set! *common:telemetry-log-state*
              (handle-exceptions
               exn
               (begin
                 (debug:print-info 0 *default-log-port* "common-telemetry-log open udp port failure")
                 'broken)
               (if (and serverhost serverport user host)
                   (let* ((s (udp-open-socket)))
                     ;;(udp-bind! s #f 0)
                     (udp-connect! s serverhost serverport)
                     (set! *common:telemetry-log-socket* s)
                     'open)
                   'not-needed))))))
  
#;(define (common:telemetry-log event #!key (payload '()))
  (if (eq? *common:telemetry-log-state* 'startup)
      (common:telemetry-log-open))

  (if (eq? 'open *common:telemetry-log-state*)
      (handle-exceptions
       exn
       (begin
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
                   (base64:base64-encode
                    (z3:encode-buffer
                     (with-output-to-string (lambda () (pp payload))))))
                  (msg     (conc user":"host":"start":"(current-process-id)":"(car (argv))":"
                                 toppath":"payload-serialized)))
             (udp-send *common:telemetry-log-socket* msg))))))
  
(define (common:telemetry-log-close)
  (when (or (member *common:telemetry-log-state* '(broken-or-no-server-preclose open)) *common:telemetry-log-socket*)
    (handle-exceptions
     exn
     (begin
       (define *common:telemetry-log-state* 'closed-fail)
       (debug:print-info 0 *default-log-port* "common-telemetry-log closure failure")
       )
     (begin
       (define *common:telemetry-log-state* 'closed)
       (udp-close-socket *common:telemetry-log-socket*)
       (set! *common:telemetry-log-socket* #f)))))








|












3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
                   (base64:base64-encode
                    (z3:encode-buffer
                     (with-output-to-string (lambda () (pp payload))))))
                  (msg     (conc user":"host":"start":"(current-process-id)":"(car (argv))":"
                                 toppath":"payload-serialized)))
             (udp-send *common:telemetry-log-socket* msg))))))
  
#;(define (common:telemetry-log-close)
  (when (or (member *common:telemetry-log-state* '(broken-or-no-server-preclose open)) *common:telemetry-log-socket*)
    (handle-exceptions
     exn
     (begin
       (define *common:telemetry-log-state* 'closed-fail)
       (debug:print-info 0 *default-log-port* "common-telemetry-log closure failure")
       )
     (begin
       (define *common:telemetry-log-state* 'closed)
       (udp-close-socket *common:telemetry-log-socket*)
       (set! *common:telemetry-log-socket* #f)))))

Modified db.scm from [2f649dc1fb] to [c873dc413f].

1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
                              id           INTEGER PRIMARY KEY,
                              test_id      INTEGER,
                              state        TEXT DEFAULT 'new',
                              status       TEXT DEFAULT 'n/a',
                              archive_type TEXT DEFAULT 'bup',
                              du           INTEGER,
                              archive_path TEXT);")))
         (print "creating trigges from init") 
        (db:create-triggers db)    
     db)) ;; )

;;======================================================================
;; A R C H I V E S
;;======================================================================








|







1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
                              id           INTEGER PRIMARY KEY,
                              test_id      INTEGER,
                              state        TEXT DEFAULT 'new',
                              status       TEXT DEFAULT 'n/a',
                              archive_type TEXT DEFAULT 'bup',
                              du           INTEGER,
                              archive_path TEXT);")))
         ;; (print "creating trigges from init") 
        (db:create-triggers db)    
     db)) ;; )

;;======================================================================
;; A R C H I V E S
;;======================================================================

3966
3967
3968
3969
3970
3971
3972
3973




















































































































3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
;;    ;; 	(db:general-call dbdat 'state-status-msg (list state status msg test-id))
;;    ;; 	(db:general-call dbdat 'state-status     (list state status test-id)))
;;    (db:set-state-status-and-roll-up-items dbstruct run-id test-id #f state status msg)
;;    ;; process the test_data table
;;    (if (and test-id state status (equal? status "AUTO")) 
;; 	(db:test-data-rollup dbstruct run-id test-id status))
;;    (mt:process-triggers dbstruct run-id test-id state status)))





















































































































;; state is the priority rollup of all states
;; status is the priority rollup of all completed statesfu
;;
;; if test-name is an integer work off that instead of test-name test-path
;;
(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
  ;; establish info on incoming test followed by info on top level test
  ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met
  (let* ((testdat      (if (number? test-name)
			   (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id
			   (db:get-test-info       dbstruct run-id test-name item-path)))
	 (test-id      (db:test-get-id testdat))
	 (test-name    (if (number? test-name)








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





|







3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
;;    ;; 	(db:general-call dbdat 'state-status-msg (list state status msg test-id))
;;    ;; 	(db:general-call dbdat 'state-status     (list state status test-id)))
;;    (db:set-state-status-and-roll-up-items dbstruct run-id test-id #f state status msg)
;;    ;; process the test_data table
;;    (if (and test-id state status (equal? status "AUTO")) 
;; 	(db:test-data-rollup dbstruct run-id test-id status))
;;    (mt:process-triggers dbstruct run-id test-id state status)))

;; NOT FINISHED 
(define (db:calc-state-status-toplevel state status tl-state tl-status)
  `(,state ,status))

;;   (match state
;;     (("COMPLETED")
;;      (match `(,tl-state ,tl-status)
;;        (("COMPLETED" "PASS") `(,state ,status))
;;        (("COMPLETED" thestatus)
;; 	(case (string->symbol thestatus)
;; 	  ((ABORT CHECK DEAD)
;; 	   (if `("COMPLETED" ,thestatus))
;; 	(match `(,thestatus ,status)
;; 	  (("FAIL" "ABORT") '("COMPLETED" "ABORT"))
;; 	  (("FAIL" "CHECK") '("COMPLETED" "CHECK"))
;; 	  (("FAIL" "DEAD")  '("COMPLETED" "DEAD"))
;; 	  (("WARN" "FAIL")  '("COMPLETED" "FAIL"))
;; 	  (("WARN" "CHECK") '("COMPLETED" "CHECK"))
;; 	  (("WARN" "DEAD")
       
(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
  ;; establish info on incoming test followed by info on top level test
  ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met
  ;; (mutex-lock! *db-transaction-mutex*) ;; why do we need a mutex?
  (let* ((testdat      (if (number? test-name)
			   (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id
			   (db:get-test-info       dbstruct run-id test-name item-path)))
	 (test-id      (db:test-get-id testdat))
	 (test-name    (if (number? test-name)
			   (db:test-get-testname testdat)
			   test-name))
	 (no-sync-db   (db:no-sync-db #f))
	 (rollup-flag  #f)
	 (wait-flag    #f)
	 (rollup-lock-key  (conc run-id "-rollup-" test-name))
	 (waiting-lock-key (conc run-id "-waiting-" test-name)))
    (db:test-set-state-status dbstruct run-id test-id state status #f)
    (if (and test-id state status (equal? status "AUTO")) 
	(db:test-data-rollup dbstruct run-id test-id status))
    (if (member state '("LAUNCHED" "REMOTEHOSTSTART"))
	(db:general-call dbstruct 'set-test-start-time (list test-id)))

    (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item
	(begin
	  ;; is there a rollup lock? If not, take it
	  (sqlite3:with-transaction
	   no-sync-db
	   (lambda ()
	     ;; (debug:print 0 *default-log-port* "EXCEPTION: exn="exn)
	     (let* ((rollup-lock-time  (db:no-sync-get/default no-sync-db rollup-lock-key #f))
		    (waiting-lock-time (db:no-sync-get/default no-sync-db waiting-lock-key #f)))
	       (if rollup-lock-time ;; someone is doing a rollup
		   (if (not waiting-lock-time) ;; no one is waiting
		       (begin
			 (set! wait-flag #t)
			 (set! rollup-flag #t)
			 (db:no-sync-set no-sync-db waiting-lock-key (current-seconds)))) ;; we are going to wait
		   (begin
		     (set! rollup-flag #t)
		     (db:no-sync-set no-sync-db rollup-lock-key (current-seconds)))))))
	  (if wait-flag
	      (let loop ((count 10)) ;; about 20 seconds
		(thread-sleep! 2)
		(if (and (not (db:no-sync-get/default no-sync-db waiting-lock-key #f))
			 (> count 0))
		    (loop (+ count 1))
		    (sqlite3:with-transaction
		     no-sync-db
		     (lambda ()
		       (db:no-sync-set no-sync-db rollup-lock-key (current-seconds))
		       (db:no-sync-del! no-sync-db waiting-lock-key))))))
	  ;; now the rollup
	  (if rollup-flag ;; put this into a thread
	      (begin
		;; (thread-start! (make-thread
		;; 	      (lambda ()
		(db:roll-up-test-state-status dbstruct run-id test-name state status)
		(db:no-sync-del! no-sync-db rollup-lock-key))
	      ;; (conc "thread for run-id: " run-id " test-name: " test-name))))))))
	      )))))
	      
;; I'd like to remove the need for item-path - it is logically not needed here
;; for now we pass in state and status - NOTE: There is a possible race if a test
;; is rapidly re-run while an earlier run is waiting to rollup.
;;
(define (db:roll-up-test-state-status dbstruct run-id test-name state status)
  (let* ((testdat      (if (number? test-name)
			   (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id
			   (db:get-test-info       dbstruct run-id test-name "")))
	 (test-id      (db:test-get-id testdat))
	 (test-name    (if (number? test-name)
			   (db:test-get-testname testdat)
			   test-name))
	 (tl-testdat   (db:get-test-info dbstruct run-id test-name ""))
	 (tl-test-id   (db:test-get-id tl-testdat)))
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       ;; NB// Pass the db so it is part fo the transaction
       ;; item-path is used in get-all-state-status counts to exclude current state/status of THIS test
       ;; but with the state/status being set earlier this is not needed any longer
       (let* ((state-status-counts  (db:get-all-state-status-counts-for-testname dbstruct run-id test-name))
	      (state-statuses       (if (null? state-status-counts)
					'()
					(db:roll-up-rules state-status-counts state status)))
	      (newstate             (if (null? state-statuses)
					state
					(car state-statuses)))
	      (newstatus            (if (null? state-statuses)
					status
					(cadr state-statuses))))
	 (if tl-test-id
	     (db:test-set-state-status db run-id tl-test-id newstate newstatus #f))
	 )))
    #t))

;; state is the priority rollup of all states
;; status is the priority rollup of all completed statesfu
;;
;; if test-name is an integer work off that instead of test-name test-path
;;
(define (db:set-state-status-and-roll-up-items-orig dbstruct run-id test-name item-path state status comment)
  ;; establish info on incoming test followed by info on top level test
  ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met
  (let* ((testdat      (if (number? test-name)
			   (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id
			   (db:get-test-info       dbstruct run-id test-name item-path)))
	 (test-id      (db:test-get-id testdat))
	 (test-name    (if (number? test-name)
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092


4093


4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (let ((tr-res
              (sqlite3:with-transaction
               db
               (lambda ()
                 ;; NB// Pass the db so it is part fo the transaction
                 (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status
                 (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item
                     (let* ((state-status-counts  (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test
			      						  (state-stauses (db:roll-up-rules state-status-counts state status))
                          (newstate (car state-stauses))
                          (newstatus (cadr state-stauses)))
                       (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts)  " state-status-counts: "
							(apply conc
                  (map (lambda (x)
                     (conc
                     		(with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | "))
                              state-status-counts))); end debug:print
   
                       (if tl-test-id
			   (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct
		       ))))))
         (mutex-unlock! *db-transaction-mutex*)
         (if (and test-id state status (equal? status "AUTO")) 
             (db:test-data-rollup dbstruct run-id test-id status))
         tr-res)))))

(define (db:roll-up-rules state-status-counts state status)
		(let* ((running     (length (filter (lambda (x)
                          (member (dbr:counts-state x) *common:running-states*))
                                 state-status-counts)))
           (bad-not-started      (length (filter (lambda (x)
                                      (and (equal? (dbr:counts-state x) "NOT_STARTED") 
                                        (not (member (dbr:counts-status x)  *common:not-started-ok-statuses*))))
																	state-status-counts)))
           (all-curr-states      (common:special-sort  ;; worst -> best (sort of)
                                    (delete-duplicates
                                      (if (and state (not (member state *common:dont-roll-up-states*)))
                                          (cons state (map dbr:counts-state state-status-counts))
                                          (map dbr:counts-state state-status-counts)))
                                                  *common:std-states* >))
           (all-curr-statuses    (common:special-sort  ;; worst -> best
                                    (delete-duplicates
                                      (if (and state status (not (member state *common:dont-roll-up-states*)))
                                          (cons status (map dbr:counts-status state-status-counts))
                                          (map dbr:counts-status state-status-counts)))
                                                   *common:std-statuses* >))
           (non-completes        (filter (lambda (x)
							 										 (not (member x (cons "COMPLETED" *common:dont-roll-up-states*))))
						       									all-curr-states))
			     (preq-fails        (filter (lambda (x)
							 								(equal? x "PREQ_FAIL"))
						       							all-curr-statuses))
           (num-non-completes (length non-completes))
 					 (newstate          (cond
															((> running 0)           "RUNNING")            ;; anything running, call the situation running
                              ((> (length preq-fails) 0) "NOT_STARTED")
															((> bad-not-started 0)   "COMPLETED")          ;; we have an ugly situation, it is completed in the sense we cannot do more.
															((> num-non-completes 0) (car non-completes))  ;;  (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED
															(else                    (car all-curr-states))))
           (newstatus         (cond
                              ((> (length preq-fails) 0)  "PREQ_FAIL")
                              ((or (> bad-not-started 0)
                                   (and (equal? newstate "NOT_STARTED")
                                      (> num-non-completes 0)))
                                            "STARTED")
                              (else (car all-curr-statuses)))))
 					(debug:print-info 2 *default-log-port*
                                         "\n--> probe db:set-state-status-and-roll-up-items: "
                                         "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts)
                                         "\n--> running:             "running
                                         "\n--> bad-not-started:     "bad-not-started
                                         "\n--> non-non-completes:   "num-non-completes
                                         "\n--> non-completes:       "non-completes
                                         "\n--> all-curr-states:     "all-curr-states
                                         "\n--> all-curr-statuses:     "all-curr-statuses
                                         "\n--> newstate              "newstate
                                         "\n--> newstatus            "newstatus
                                         "\n\n")

                        ;; NB// Pass the db so it is part of the transaction
         (list newstate newstatus)))

(define (db:set-state-status-and-roll-up-run dbstruct run-id curr-state curr-status)
    (mutex-lock! *db-transaction-mutex*)
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (let ((tr-res
              (sqlite3:with-transaction
               db
               (lambda ()
                   (let* ((state-status-counts  (db:get-all-state-status-counts-for-run dbstruct run-id))
													(state-stauses (db:roll-up-rules state-status-counts #f #f ))


                          (newstate (car state-stauses))


                          (newstatus (cadr state-stauses))) 
                    (if (or (not (eq? newstate curr-state)) (not (eq?  newstatus curr-status)))
                   (db:set-run-state-status dbstruct run-id newstate newstatus )))))))
         (mutex-unlock! *db-transaction-mutex*)
         tr-res))))


(define (db:get-all-state-status-counts-for-run dbstruct run-id)
 (let* ((test-count-recs (db:with-db
                                  dbstruct #f #f
                                  (lambda (db)







|



|
|
|
















|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|




<
|
|


|








|
>
>
|
>
>
|


|







4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193

4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (let ((tr-res
              (sqlite3:with-transaction
               db
               (lambda ()
                 ;; NB// Pass the db so it is part of the transaction
                 (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status
                 (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item
                     (let* ((state-status-counts  (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test
			    (state-statuses        (db:roll-up-rules state-status-counts state status))
                          (newstate  (car state-statuses))
                          (newstatus (cadr state-statuses)))
                       (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts)  " state-status-counts: "
							(apply conc
                  (map (lambda (x)
                     (conc
                     		(with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | "))
                              state-status-counts))); end debug:print
   
                       (if tl-test-id
			   (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct
		       ))))))
         (mutex-unlock! *db-transaction-mutex*)
         (if (and test-id state status (equal? status "AUTO")) 
             (db:test-data-rollup dbstruct run-id test-id status))
         tr-res)))))

(define (db:roll-up-rules state-status-counts state status)
  (let* ((running              (length (filter (lambda (x)
						 (member (dbr:counts-state x) *common:running-states*))
					       state-status-counts)))
	 (bad-not-started      (length (filter (lambda (x)
						 (and (equal? (dbr:counts-state x) "NOT_STARTED") 
						      (not (member (dbr:counts-status x)  *common:not-started-ok-statuses*))))
					       state-status-counts)))
	 (all-curr-states      (common:special-sort  ;; worst -> best (sort of)
				(delete-duplicates
				 (if (and state (not (member state *common:dont-roll-up-states*)))
				     (cons state (map dbr:counts-state state-status-counts))
				     (map dbr:counts-state state-status-counts)))
				*common:std-states* >))
	 (all-curr-statuses    (common:special-sort  ;; worst -> best
				(delete-duplicates
				 (if (and state status (not (member state *common:dont-roll-up-states*)))
				     (cons status (map dbr:counts-status state-status-counts))
				     (map dbr:counts-status state-status-counts)))
				*common:std-statuses* >))
	 (non-completes        (filter (lambda (x)
					 (not (member x (cons "COMPLETED" *common:dont-roll-up-states*))))
				       all-curr-states))
	 (preq-fails        (filter (lambda (x)
				      (equal? x "PREQ_FAIL"))
				    all-curr-statuses))
	 (num-non-completes (length non-completes))
	 (newstate          (cond
			     ((> running 0)             "RUNNING")            ;; anything running, call the situation running
			     ((> (length preq-fails) 0) "NOT_STARTED")
			     ((> bad-not-started 0)     "COMPLETED")          ;; we have an ugly situation, it is completed in the sense we cannot do more.
			     ((> num-non-completes 0)   (car non-completes))  ;;  (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED
			     (else                      (car all-curr-states))))
	 (newstatus         (cond
			     ((> (length preq-fails) 0)  "PREQ_FAIL")
			     ((or (> bad-not-started 0)
				  (and (equal? newstate "NOT_STARTED")
				       (> num-non-completes 0)))
			      "STARTED")
			     (else (car all-curr-statuses)))))
    (debug:print-info 2 *default-log-port*
		      "\n--> probe db:set-state-status-and-roll-up-items: "
		      "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts)
		      "\n--> running:             "running
		      "\n--> bad-not-started:     "bad-not-started
		      "\n--> non-non-completes:   "num-non-completes
		      "\n--> non-completes:       "non-completes
		      "\n--> all-curr-states:     "all-curr-states
                                         "\n--> all-curr-statuses:     "all-curr-statuses
                                         "\n--> newstate              "newstate
                                         "\n--> newstatus            "newstatus
                                         "\n\n")

    ;; NB// Pass the db so it is part of the transaction
    (list newstate newstatus)))

(define (db:set-state-status-and-roll-up-run dbstruct run-id curr-state curr-status)
    ;; (mutex-lock! *db-transaction-mutex*)
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (let ((tr-res
              (sqlite3:with-transaction
               db
               (lambda ()
                   (let* ((state-status-counts  (db:get-all-state-status-counts-for-run dbstruct run-id))
			  (state-statuses        (db:roll-up-rules state-status-counts #f #f ))
                          (newstate             (if (null? state-statuses)
						    curr-state
						    (car state-statuses)))
			  (newstatus            (if (null? state-statuses)
						    curr-status
						    (cadr state-statuses))))
                    (if (or (not (eq? newstate curr-state)) (not (eq?  newstatus curr-status)))
                   (db:set-run-state-status dbstruct run-id newstate newstatus )))))))
         ;; (mutex-unlock! *db-transaction-mutex*)
         tr-res))))


(define (db:get-all-state-status-counts-for-run dbstruct run-id)
 (let* ((test-count-recs (db:with-db
                                  dbstruct #f #f
                                  (lambda (db)
4146
4147
4148
4149
4150
4151
4152















4153
4154
4155
4156
4157
4158
4159

         (nonmatch-countrec-lambda (lambda (countrec) (not (match-countrec-lambda countrec))))
         
         (unrelated-rec-list   
          (filter nonmatch-countrec-lambda other-items-count-recs)))
    
    (cons updated-count-rec unrelated-rec-list)))
















;; (define (db:get-all-item-states db run-id test-name)
;;   (sqlite3:map-row 
;;    (lambda (a) a)
;;    db
;;    "SELECT DISTINCT state FROM tests WHERE item_path != '' AND state != 'DELETED' AND run_id=? AND testname=?"
;;    run-id test-name))







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293

         (nonmatch-countrec-lambda (lambda (countrec) (not (match-countrec-lambda countrec))))
         
         (unrelated-rec-list   
          (filter nonmatch-countrec-lambda other-items-count-recs)))
    
    (cons updated-count-rec unrelated-rec-list)))

;; full count not including toplevel
;;
(define (db:get-all-state-status-counts-for-testname dbstruct run-id test-name)
  (let* ((test-count-recs (db:with-db
			   dbstruct #f #f
			   (lambda (db)
			     (sqlite3:map-row
			      (lambda (state status count)
				(make-dbr:counts state: state status: status count: count))
			      db
			      "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' GROUP BY state,status;"
                                     run-id test-name)))))
    test-count-recs))


;; (define (db:get-all-item-states db run-id test-name)
;;   (sqlite3:map-row 
;;    (lambda (a) a)
;;    db
;;    "SELECT DISTINCT state FROM tests WHERE item_path != '' AND state != 'DELETED' AND run_id=? AND testname=?"
;;    run-id test-name))
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353

4354
4355
4356
4357
4358
4359
4360
4361
			       flush
			       sync
			       set-verbosity
			       killserver
			       ))

(define (db:login dbstruct calling-path calling-version client-signature)
  (cond 
   ((not (equal? calling-path *toppath*))
    (list #f "Login failed due to mismatch paths: " calling-path ", " *toppath*))
   ;; ((not (equal? *run-id* run-id))
   ;;  (list #f "Login failed due to mismatch run-id: " run-id ", " *run-id*))
   ((not (equal? megatest-version calling-version))
    (list #t (conc "Login warning due to mismatch megatest version: " calling-version ", " megatest-version)))

   (else
    (hash-table-set! *logged-in-clients* client-signature (current-seconds))
    '(#t "successful login"))))

(define (db:general-call dbstruct stmtname params)
  (let ((query (let ((q (alist-ref (if (string? stmtname)
				       (string->symbol stmtname)
				       stmtname)







|






>
|







4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
			       flush
			       sync
			       set-verbosity
			       killserver
			       ))

(define (db:login dbstruct calling-path calling-version client-signature)
   (cond 
   ((not (equal? calling-path *toppath*))
    (list #f "Login failed due to mismatch paths: " calling-path ", " *toppath*))
   ;; ((not (equal? *run-id* run-id))
   ;;  (list #f "Login failed due to mismatch run-id: " run-id ", " *run-id*))
   ((not (equal? megatest-version calling-version))
    (list #t (conc "Login warning due to mismatch megatest version: " calling-version ", " megatest-version)))
   
    (else
    (hash-table-set! *logged-in-clients* client-signature (current-seconds))
    '(#t "successful login"))))

(define (db:general-call dbstruct stmtname params)
  (let ((query (let ((q (alist-ref (if (string? stmtname)
				       (string->symbol stmtname)
				       stmtname)

Modified dcommon.scm from [0db7864f6b] to [dbcf309f44].

711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
				 ;;    	 (iup:attribute-set! servers-matrix (conc "0:" colnum) colname)
				 ;;    	 (set! colnum (+ 1 colnum)))
				 ;;           colnames)
				 (set! rownum 1)
				 (for-each 
				  (lambda (server)
				    (set! colnum 0)
				    (match-let (((mod-time host port start-time pid)
						 server))
				      (let* ((uptime  (- (current-seconds) mod-time))
					     (runtime (if start-time
							  (- mod-time start-time)
							  0))
					     (vals (list "-"  ;; (vector-ref server 0) ;; Id
							 "-"  ;; (vector-ref server 9) ;; MT-Ver







|







711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
				 ;;    	 (iup:attribute-set! servers-matrix (conc "0:" colnum) colname)
				 ;;    	 (set! colnum (+ 1 colnum)))
				 ;;           colnames)
				 (set! rownum 1)
				 (for-each 
				  (lambda (server)
				    (set! colnum 0)
				    (match-let (((mod-time host port start-time server-id pid)
						 server))
				      (let* ((uptime  (- (current-seconds) mod-time))
					     (runtime (if start-time
							  (- mod-time start-time)
							  0))
					     (vals (list "-"  ;; (vector-ref server 0) ;; Id
							 "-"  ;; (vector-ref server 9) ;; MT-Ver

Modified docs/manual/megatest_manual.html from [4c1fe80b6f] to [a02a70016f].

957
958
959
960
961
962
963





964
965
966
967
968
969
970
<div class="imageblock">
<div class="content">
<img src="megatest-system-architecture.png" alt="Static">
</div>
</div>
</div>
</div>





</div>
<div class="sect1">
<h2 id="_todo_road_map">TODO / Road Map</h2>
<div class="sectionbody">
<div class="paragraph"><p>Note: This road-map is a wish list and not a formal plan. Items are in
rough priority but are subject to change. Development is driven by
user requests, developer "itch" and bug reports. Please contact







>
>
>
>
>







957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
<div class="imageblock">
<div class="content">
<img src="megatest-system-architecture.png" alt="Static">
</div>
</div>
</div>
</div>
</div>
<div class="sect1">
<h2 id="_road_map">Road Map</h2>
<div class="sectionbody">
</div>
</div>
<div class="sect1">
<h2 id="_todo_road_map">TODO / Road Map</h2>
<div class="sectionbody">
<div class="paragraph"><p>Note: This road-map is a wish list and not a formal plan. Items are in
rough priority but are subject to change. Development is driven by
user requests, developer "itch" and bug reports. Please contact
1445
1446
1447
1448
1449
1450
1451



1452
1453
1454
1455
1456
1457
1458
1459
</div>
<div class="sect1">
<h2 id="_installation">Installation</h2>
<div class="sectionbody">
<div class="sect2">
<h3 id="_dependencies">Dependencies</h3>
<div class="paragraph"><p>Chicken scheme and a number of "eggs" are required for building



Megatest. In the v1.66 and beyond assistance to create the build
system is built into the Makefile.</p></div>
<div class="listingblock">
<div class="title">Installation steps (overview)</div>
<div class="content monospaced">
<pre>./configure
make chicken
setup.sh make -j install</pre>







>
>
>
|







1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
</div>
<div class="sect1">
<h2 id="_installation">Installation</h2>
<div class="sectionbody">
<div class="sect2">
<h3 id="_dependencies">Dependencies</h3>
<div class="paragraph"><p>Chicken scheme and a number of "eggs" are required for building
Megatest. See the script installall.sh in the utils directory of the
source distribution for an automated way to install everything
needed for building Megatest on Linux.</p></div>
<div class="paragraph"><p>Megatest. In the v1.66 and beyond assistance to create the build
system is built into the Makefile.</p></div>
<div class="listingblock">
<div class="title">Installation steps (overview)</div>
<div class="content monospaced">
<pre>./configure
make chicken
setup.sh make -j install</pre>
3415
3416
3417
3418
3419
3420
3421






































3422
3423
3424
3425
3426
3427
3428
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced"></p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">( key1 key2 &#8230; )</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced"></p></td>
</tr>
</tbody>
</table>
</div>






































</div>
<div class="sect1">
<h2 id="_megatest_internals">Megatest Internals</h2>
<div class="sectionbody">
<div class="imageblock graphviz">
<div class="content">
<img src="server.png" alt="server.png">







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced"></p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">( key1 key2 &#8230; )</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced"></p></td>
</tr>
</tbody>
</table>
</div>
</div>
<div class="sect1">
<h2 id="_test_plan">Test Plan</h2>
<div class="sectionbody">
<div class="sect2">
<h3 id="_tests">Tests</h3>
<div class="paragraph"><p>itemwait|33</p></div>
<div class="paragraph"><p>rerun-downstream-item|20</p></div>
<div class="paragraph"><p>rerunclean|20</p></div>
<div class="paragraph"><p>fullrun|18</p></div>
<div class="paragraph"><p>goodtests|18</p></div>
<div class="paragraph"><p>kill-rerun|17</p></div>
<div class="paragraph"><p>items-runconfigvars|16</p></div>
<div class="paragraph"><p>ro_test|16</p></div>
<div class="paragraph"><p>runconfig-tests|16</p></div>
<div class="paragraph"><p>env-pollution|13</p></div>
<div class="paragraph"><p>itemmap|11</p></div>
<div class="paragraph"><p>testpatt_envvar|10</p></div>
<div class="paragraph"><p>toprun|10</p></div>
<div class="paragraph"><p>chained-waiton|8</p></div>
<div class="paragraph"><p>skip-on-fileexists|8</p></div>
<div class="paragraph"><p>killrun_preqfail|7</p></div>
<div class="paragraph"><p>subrun|6</p></div>
<div class="paragraph"><p>dependencies|5</p></div>
<div class="paragraph"><p>itemwait-simple|4</p></div>
<div class="paragraph"><p>rollup|4</p></div>
<div class="paragraph"><p>end-of-run|3</p></div>
<div class="paragraph"><p>killrun|3</p></div>
<div class="paragraph"><p>listener|3</p></div>
<div class="paragraph"><p>test2|3</p></div>
<div class="paragraph"><p>testpatt|3</p></div>
<div class="paragraph"><p>env-pollution-usecacheno|2</p></div>
<div class="paragraph"><p>set-values|2
envvars|1
listruns-tests|1
subrun-usecases|1</p></div>
</div>
</div>
</div>
<div class="sect1">
<h2 id="_megatest_internals">Megatest Internals</h2>
<div class="sectionbody">
<div class="imageblock graphviz">
<div class="content">
<img src="server.png" alt="server.png">
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
</div>
</div>
</div>
<div id="footnotes"><hr></div>
<div id="footer">
<div id="footer-text">
Version 1.5<br>
Last updated 2020-06-29 08:09:27 PDT
</div>
</div>
</body>
</html>







|




3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
</div>
</div>
</div>
<div id="footnotes"><hr></div>
<div id="footer">
<div id="footer-text">
Version 1.5<br>
Last updated 2020-09-08 08:39:29 PDT
</div>
</div>
</body>
</html>

Modified docs/manual/megatest_manual.txt from [69ab724537] to [cb5cc67576].

114
115
116
117
118
119
120


121
122
123
124
125
126
127
// :leveloffset: 0

include::writing_tests.txt[]

include::howto.txt[]

include::reference.txt[]



Megatest Internals
------------------

["graphviz", "server.png"]
----------------------------------------------------------------------
include::server.dot[]







>
>







114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
// :leveloffset: 0

include::writing_tests.txt[]

include::howto.txt[]

include::reference.txt[]

include::testplan.txt[]

Megatest Internals
------------------

["graphviz", "server.png"]
----------------------------------------------------------------------
include::server.dot[]

Added docs/manual/testplan.txt version [2f7346adda].



































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
// This file is part of Megatest.
// 
//     Megatest is free software: you can redistribute it and/or modify
//     it under the terms of the GNU General Public License as published by
//     the Free Software Foundation, either version 3 of the License, or
//     (at your option) any later version.
// 
//     Megatest is distributed in the hope that it will be useful,
//     but WITHOUT ANY WARRANTY; without even the implied warranty of
//     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//     GNU General Public License for more details.
// 
//     You should have received a copy of the GNU General Public License
//     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

// Copyright 2006-2020, Matthew Welland.

Test Plan
---------

Tests
~~~~~

itemwait|33

rerun-downstream-item|20

rerunclean|20

fullrun|18

goodtests|18

kill-rerun|17

items-runconfigvars|16

ro_test|16

runconfig-tests|16

env-pollution|13

itemmap|11

testpatt_envvar|10

toprun|10

chained-waiton|8

skip-on-fileexists|8

killrun_preqfail|7

subrun|6

dependencies|5

itemwait-simple|4

rollup|4

end-of-run|3

killrun|3

listener|3

test2|3

testpatt|3

env-pollution-usecacheno|2

set-values|2
envvars|1
listruns-tests|1
subrun-usecases|1


Modified http-transport.scm from [67489ed9ab] to [c4772ba536].

244
245
246
247
248
249
250
251





252

253
254
255
256
257
258
259
			 (http-transport:server-dat-get-api-req serverdat)
			 (begin
			   (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
			   (exit 1))))
	 (res        (vector #f "uninitialized"))
	 (success    #t)
	 (sparams    (db:obj->string params transport: 'http))
	 (runremote  (or area-dat *runremote*)))





       (debug:print-info 11 *default-log-port* "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")

       ;; set up the http-client here
       (max-retry-attempts 1)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #f))
       ;; send the data and get the response
       ;; extract the needed info from the http data and 







|
>
>
>
>
>
|
>







244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
			 (http-transport:server-dat-get-api-req serverdat)
			 (begin
			   (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
			   (exit 1))))
	 (res        (vector #f "uninitialized"))
	 (success    #t)
	 (sparams    (db:obj->string params transport: 'http))
	 (runremote  (or area-dat *runremote*))
         (server-id   (if (vector? serverdat) 
                           (http-transport:server-dat-get-server-id serverdat)
                           (begin
			     (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
			     (exit 1)))))
       (debug:print-info 11 *default-log-port* "cmd=" cmd " fullurl=" fullurl " server-id=" server-id " current time:" (current-seconds)) 

       ;; set up the http-client here
       (max-retry-attempts 1)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #f))
       ;; send the data and get the response
       ;; extract the needed info from the http data and 
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309

310
311
312
313
314
315
316
317
318
						    (msg        ((condition-property-accessor 'exn 'message) exn)))
						(set! success #f)
                                                (if (debug:debug-mode 1)
                                                    (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...")
                                                    (begin
                                                      (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
                                                      (debug:print 0 *default-log-port* " message: " msg ", exn=" exn)
                                                      (debug:print 0 *default-log-port* " cmd: " cmd " params: " params)
                                                      (debug:print 0 *default-log-port* " call-chain: " call-chain)))
                                                (if runremote
						    (remote-conndat-set! runremote #f))
						;; Killing associated server to allow clean retry.")
						;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
						(mutex-unlock! *http-mutex*)
					     ;;; (signal (make-composite-condition
					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
					     ;;; "communications failed"
						(db:obj->string #f))
					    (with-input-from-request ;; was dat
					     fullurl 
					     (list (cons 'key (or *server-id* "thekey"))
						   (cons 'cmd cmd)
						   (cons 'params sparams))
					     read-string))
					  transport: 'http)
                                         0)) ;; added this speculatively
			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
			      (close-all-connections!)
			      (mutex-unlock! *http-mutex*)
			      ))
	      (time-out     (lambda ()
			      (thread-sleep! 45)
			      (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!")
			      #f))
	      (th1 (make-thread send-recieve "with-input-from-request"))
	      (th2 (make-thread time-out     "time out")))
	 (thread-start! th1)
	 (thread-start! th2)
	 (thread-join! th1)

	 (thread-terminate! th2)
	 (debug:print-info 11 *default-log-port* "got res=" res)
	 (if (vector? res)
	     (if (vector-ref res 0) ;; this is the first flag or the second flag?
		 res ;; this is the *inner* vector? seriously? why?
                 (if (debug:debug-mode 11)
                     (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it
                       (print-call-chain (current-error-port))
                       (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn))







|












|


















>

<







277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317

318
319
320
321
322
323
324
						    (msg        ((condition-property-accessor 'exn 'message) exn)))
						(set! success #f)
                                                (if (debug:debug-mode 1)
                                                    (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...")
                                                    (begin
                                                      (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
                                                      (debug:print 0 *default-log-port* " message: " msg ", exn=" exn)
                                                      (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey"))
                                                      (debug:print 0 *default-log-port* " call-chain: " call-chain)))
                                                (if runremote
						    (remote-conndat-set! runremote #f))
						;; Killing associated server to allow clean retry.")
						;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
						(mutex-unlock! *http-mutex*)
					     ;;; (signal (make-composite-condition
					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
					     ;;; "communications failed"
						(db:obj->string #f))
					    (with-input-from-request ;; was dat
					     fullurl 
					     (list (cons 'key (or server-id "thekey"))
						   (cons 'cmd cmd)
						   (cons 'params sparams))
					     read-string))
					  transport: 'http)
                                         0)) ;; added this speculatively
			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
			      (close-all-connections!)
			      (mutex-unlock! *http-mutex*)
			      ))
	      (time-out     (lambda ()
			      (thread-sleep! 45)
			      (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!")
			      #f))
	      (th1 (make-thread send-recieve "with-input-from-request"))
	      (th2 (make-thread time-out     "time out")))
	 (thread-start! th1)
	 (thread-start! th2)
	 (thread-join! th1)
          (vector-set! res 0 success)
	 (thread-terminate! th2)

	 (if (vector? res)
	     (if (vector-ref res 0) ;; this is the first flag or the second flag?
		 res ;; this is the *inner* vector? seriously? why?
                 (if (debug:debug-mode 11)
                     (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it
                       (print-call-chain (current-error-port))
                       (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn))
348
349
350
351
352
353
354
355

356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
(define (make-http-transport:server-dat)(make-vector 6))
(define (http-transport:server-dat-get-iface         vec)    (vector-ref  vec 0))
(define (http-transport:server-dat-get-port          vec)    (vector-ref  vec 1))
(define (http-transport:server-dat-get-api-uri       vec)    (vector-ref  vec 2))
(define (http-transport:server-dat-get-api-url       vec)    (vector-ref  vec 3))
(define (http-transport:server-dat-get-api-req       vec)    (vector-ref  vec 4))
(define (http-transport:server-dat-get-last-access   vec)    (vector-ref  vec 5))
(define (http-transport:server-dat-get-socket        vec)    (vector-ref  vec 6))


(define (http-transport:server-dat-make-url vec)
  (if (and (http-transport:server-dat-get-iface vec)
	   (http-transport:server-dat-get-port  vec))
      (conc "http://" 
	    (http-transport:server-dat-get-iface vec)
	    ":"
	    (http-transport:server-dat-get-port  vec))
      #f))

(define (http-transport:server-dat-update-last-access vec)
  (if (vector? vec)
      (vector-set! vec 5 (current-seconds))
      (begin
	(print-call-chain (current-error-port))
	(debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!"))))

;;
;; connect
;;
(define (http-transport:client-connect iface port)
  (let* ((api-url      (conc "http://" iface ":" port "/api"))
	 (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))
	 (api-req      (make-request method: 'POST uri: api-uri))
	 (server-dat   (vector iface port api-uri api-url api-req (current-seconds))))
    server-dat))

;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running) 
  ;; if none running or if > 20 seconds since 







|
>




















|



|







354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
(define (make-http-transport:server-dat)(make-vector 6))
(define (http-transport:server-dat-get-iface         vec)    (vector-ref  vec 0))
(define (http-transport:server-dat-get-port          vec)    (vector-ref  vec 1))
(define (http-transport:server-dat-get-api-uri       vec)    (vector-ref  vec 2))
(define (http-transport:server-dat-get-api-url       vec)    (vector-ref  vec 3))
(define (http-transport:server-dat-get-api-req       vec)    (vector-ref  vec 4))
(define (http-transport:server-dat-get-last-access   vec)    (vector-ref  vec 5))
;(define (http-transport:server-dat-get-socket        vec)    (vector-ref  vec 6))
(define (http-transport:server-dat-get-server-id     vec)    (vector-ref  vec 6))

(define (http-transport:server-dat-make-url vec)
  (if (and (http-transport:server-dat-get-iface vec)
	   (http-transport:server-dat-get-port  vec))
      (conc "http://" 
	    (http-transport:server-dat-get-iface vec)
	    ":"
	    (http-transport:server-dat-get-port  vec))
      #f))

(define (http-transport:server-dat-update-last-access vec)
  (if (vector? vec)
      (vector-set! vec 5 (current-seconds))
      (begin
	(print-call-chain (current-error-port))
	(debug:print-error 0 *default-log-port* "call to http-transport:server-dat-update-last-access with non-vector!!"))))

;;
;; connect
;;
(define (http-transport:client-connect iface port server-id) 
  (let* ((api-url      (conc "http://" iface ":" port "/api"))
	 (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))
	 (api-req      (make-request method: 'POST uri: api-uri))
	 (server-dat   (vector iface port api-uri api-url api-req (current-seconds) server-id)))
    server-dat))

;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running) 
  ;; if none running or if > 20 seconds since 
473
474
475
476
477
478
479



480
481
482
483
484
485
486
487
488
489



490
491
492
493
494
495
496
497
      
      (if (not (equal? sdat (list iface port)))
	  (let ((new-iface (car sdat))
		(new-port  (cadr sdat)))
	    (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
	    (set! iface new-iface)
	    (set! port  new-port)



	    (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds))
	    (flush-output *default-log-port*)))
      
      ;; Transfer *db-last-access* to last-access to use in checking that we are still alive
      (mutex-lock! *heartbeat-mutex*)
      (set! last-access *db-last-access*)
      (mutex-unlock! *heartbeat-mutex*)
      
      (if (common:low-noise-print 120 (conc "server running on " iface ":" port))
	  (begin



	    (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds))
	    (flush-output *default-log-port*)))
      (if (common:low-noise-print 60 "dbstats")
	  (begin
	    (debug:print 0 *default-log-port* "Server stats:")
	    (db:print-current-query-stats)))
      (let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))
	(cond







>
>
>
|









>
>
>
|







480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
      
      (if (not (equal? sdat (list iface port)))
	  (let ((new-iface (car sdat))
		(new-port  (cadr sdat)))
	    (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
	    (set! iface new-iface)
	    (set! port  new-port)
             (if (not *server-id*)
              (set! *server-id* (server:mk-signature)))
            (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv))
	    (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
	    (flush-output *default-log-port*)))
      
      ;; Transfer *db-last-access* to last-access to use in checking that we are still alive
      (mutex-lock! *heartbeat-mutex*)
      (set! last-access *db-last-access*)
      (mutex-unlock! *heartbeat-mutex*)
      
      (if (common:low-noise-print 120 (conc "server running on " iface ":" port))
	  (begin
             (if (not *server-id*)
              (set! *server-id* (server:mk-signature)))
            (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv))   
	    (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
	    (flush-output *default-log-port*)))
      (if (common:low-noise-print 60 "dbstats")
	  (begin
	    (debug:print 0 *default-log-port* "Server stats:")
	    (db:print-current-query-stats)))
      (let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))
	(cond

Modified launch.scm from [d0067277fa] to [4cbc0e59d3].

209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
    ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
    (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)

    (let loop ((minutes   (calc-minutes))
	       (cpu-load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
	       (disk-free (get-df (current-directory)))
               (last-sync (current-seconds)))
      (common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync))
      (let* ((over-time     (> (current-seconds) (+ last-sync update-period)))
             (new-cpu-load  (let* ((load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
                                   (delta (abs (- load cpu-load))))
                              (if (> delta 0.1) ;; don't bother updating with small changes
                                  load
                                  #f)))
             (new-disk-free (let* ((df    (if over-time ;; only get df every 30 seconds







|







209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
    ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
    (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)

    (let loop ((minutes   (calc-minutes))
	       (cpu-load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
	       (disk-free (get-df (current-directory)))
               (last-sync (current-seconds)))
      ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync))
      (let* ((over-time     (> (current-seconds) (+ last-sync update-period)))
             (new-cpu-load  (let* ((load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
                                   (delta (abs (- load cpu-load))))
                              (if (> delta 0.1) ;; don't bother updating with small changes
                                  load
                                  #f)))
             (new-disk-free (let* ((df    (if over-time ;; only get df every 30 seconds
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
             (do-sync       (or new-cpu-load new-disk-free over-time))

             (test-info   (rmt:get-test-info-by-id run-id test-id))
             (state       (db:test-get-state test-info))
             (status      (db:test-get-status test-info))
             (kill-reason  "no kill reason specified")
             (kill-job?    #f))
        (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period))
        (cond
         ((test-get-kill-request run-id test-id)
          (set! kill-reason "KILLING TEST since received kill request (KILLREQ)")
          (set! kill-job? #t))
         ((and runtlim (> (- (current-seconds) start-seconds) runtlim))
          (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim))
          (set! kill-job? #t))
         ((equal? status "DEAD")
          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
          (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.")
          ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING
          (set! kill-job? #f)))

        (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
        (launch:handle-zombie-tests run-id)
        (when do-sync
          ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
          ;;  (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
          (common:telemetry-log "zombie" (conc  "launch:monitor-job - dosync started at "(current-seconds)))
          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
          (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds))))
        
	(if kill-job? 
	    (begin
              (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason)
	      (mutex-lock! m)
	      ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
	      ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second







|


















|

|







231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
             (do-sync       (or new-cpu-load new-disk-free over-time))

             (test-info   (rmt:get-test-info-by-id run-id test-id))
             (state       (db:test-get-state test-info))
             (status      (db:test-get-status test-info))
             (kill-reason  "no kill reason specified")
             (kill-job?    #f))
        ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period))
        (cond
         ((test-get-kill-request run-id test-id)
          (set! kill-reason "KILLING TEST since received kill request (KILLREQ)")
          (set! kill-job? #t))
         ((and runtlim (> (- (current-seconds) start-seconds) runtlim))
          (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim))
          (set! kill-job? #t))
         ((equal? status "DEAD")
          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
          (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.")
          ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING
          (set! kill-job? #f)))

        (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
        (launch:handle-zombie-tests run-id)
        (when do-sync
          ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
          ;;  (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
          ;; (common:telemetry-log "zombie" (conc  "launch:monitor-job - dosync started at "(current-seconds)))
          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
          #;(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds))))
        
	(if kill-job? 
	    (begin
              (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason)
	      (mutex-lock! m)
	      ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
	      ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second

Modified megatest-version.scm from [957a43ff94] to [69c8b1f2d8].

16
17
18
19
20
21
22
23
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..

;; (declare (unit megatest-version))

(define megatest-version 1.6566)







|
16
17
18
19
20
21
22
23
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..

;; (declare (unit megatest-version))

(define megatest-version 1.6581)

Modified megatest.scm from [0e58f17e0f] to [e69eff1234].

850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
      (if out-file (close-output-port out-port))
      (exit) ;; yes, bending the rules here - need to exit since this is a utility
      ))

(if (args:get-arg "-ping")
    (let* ((server-id     (string->number (args:get-arg "-ping"))) ;; extract run-id (i.e. no ":"
	   (host:port     (args:get-arg "-ping")))
      (server:ping (or server-id host:port) do-exit: #t)))

;;======================================================================
;; Capture, save and manipulate environments
;;======================================================================

;; NOTE: Keep these above the section where the server or client code is setup








|







850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
      (if out-file (close-output-port out-port))
      (exit) ;; yes, bending the rules here - need to exit since this is a utility
      ))

(if (args:get-arg "-ping")
    (let* ((server-id     (string->number (args:get-arg "-ping"))) ;; extract run-id (i.e. no ":"
	   (host:port     (args:get-arg "-ping")))
      (server:ping (or server-id host:port) #f do-exit: #t)))

;;======================================================================
;; Capture, save and manipulate environments
;;======================================================================

;; NOTE: Keep these above the section where the server or client code is setup

Modified mt.scm from [e9055c2687] to [283ae4be89].

100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
		   (if last-time
		       (< (current-seconds)(+ last-time 5))
		       #f))))
    (if useres
	(let ((result (vector-ref res 1)))
	  (debug:print 4 *default-log-port* "Using lazy value res: " result)
	  result)
	(let ((newres (rmt:get-prereqs-not-met run-id waitons ref-item-path mode: mode itemmaps: itemmaps)))
	  (hash-table-set! *pre-reqs-met-cache* key (vector (current-seconds) newres))
	  newres))))

(define (mt:get-run-stats dbstruct run-id)
;;  Get run stats from local access, move this ... but where?
  (db:get-run-stats dbstruct run-id))








|







100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
		   (if last-time
		       (< (current-seconds)(+ last-time 5))
		       #f))))
    (if useres
	(let ((result (vector-ref res 1)))
	  (debug:print 4 *default-log-port* "Using lazy value res: " result)
	  result)
	(let ((newres (rmt:get-prereqs-not-met run-id waitons ref-item-path mode itemmaps)))
	  (hash-table-set! *pre-reqs-met-cache* key (vector (current-seconds) newres))
	  newres))))

(define (mt:get-run-stats dbstruct run-id)
;;  Get run stats from local access, move this ... but where?
  (db:get-run-stats dbstruct run-id))

Modified rmt.scm from [39d97c528a] to [0556a6d556].

98
99
100
101
102
103
104





105
106
107
108
109
110
111
    ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity
    ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"];
    ;; DOT INIT_RUNREMOTE -> MUTEXLOCK;
    ;; ensure we have a record for our connection for given area
    (if (not runremote)                   ;; can remove this one. should never get here.         
	(begin
	  (set! *runremote* (make-remote))





	  (set! runremote   *runremote*))) ;; new runremote will come from this on next iteration
    
    ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
    ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
    ;; DOT SET_HOMEHOST -> MUTEXLOCK;
    ;; ensure we have a homehost record
    (if (not (pair? (remote-hh-dat runremote)))  ;; not on homehost







>
>
>
>
>







98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
    ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity
    ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"];
    ;; DOT INIT_RUNREMOTE -> MUTEXLOCK;
    ;; ensure we have a record for our connection for given area
    (if (not runremote)                   ;; can remove this one. should never get here.         
	(begin
	  (set! *runremote* (make-remote))
          (let* ((server-info (remote-server-info *runremote*))) 
            (if server-info
		(begin
			(remote-server-url-set! *runremote* (server:record->url server-info))
			(remote-server-id-set! *runremote* (server:record->id server-info)))))  
	  (set! runremote   *runremote*))) ;; new runremote will come from this on next iteration
    
    ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity
    ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"];
    ;; DOT SET_HOMEHOST -> MUTEXLOCK;
    ;; ensure we have a homehost record
    (if (not (pair? (remote-hh-dat runremote)))  ;; not on homehost
172
173
174
175
176
177
178
179
180





181
182
183
184
185
186
187
     ;;DOT CASE6 [label="init\nremote"];
     ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
     ;;DOT CASE6 -> "rmt:send-receive";
     ;; on homehost and this is a write, we already have a server, but server has died
     ((and (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote)             ;; have a server
           (not (server:ping (remote-server-url runremote))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
      (set! *runremote* (make-remote))





      (remote-force-server-set! runremote (common:force-server?))
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  6")
      (rmt:send-receive cmd rid params attemptnum: attemptnum))

     ;;DOT CASE7 [label="homehost\nwrite"];
     ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7};







|

>
>
>
>
>







177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
     ;;DOT CASE6 [label="init\nremote"];
     ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6};
     ;;DOT CASE6 -> "rmt:send-receive";
     ;; on homehost and this is a write, we already have a server, but server has died
     ((and (cdr (remote-hh-dat runremote))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url runremote)             ;; have a server
           (not (server:ping (remote-server-url runremote) (remote-server-id runremote))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
      (set! *runremote* (make-remote))
      (let* ((server-info (remote-server-info *runremote*))) 
            (if server-info
		(begin
		  (remote-server-url-set! *runremote* (server:record->url server-info))
                  (remote-server-id-set! *runremote* (server:record->id server-info)))))
      (remote-force-server-set! runremote (common:force-server?))
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  6")
      (rmt:send-receive cmd rid params attemptnum: attemptnum))

     ;;DOT CASE7 [label="homehost\nwrite"];
     ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7};
200
201
202
203
204
205
206
207
208

209

210
211
212
213
214
215
216
217
218
219
220
221
222
223
     ;;DOT CASE8 -> "rmt:open-qry-close-locally";
     ;;  on homehost, no server contact made and this is a write, passively start a server 
     ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))           ;; have homehost
           (not (remote-server-url runremote))       ;; no connection yet
	   (not (member cmd api:read-only-queries))) ;; not a read-only query
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  8")
      (let ((server-url  (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
	(if server-url

	    (remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed

	    (if (common:force-server?)
		(server:start-and-wait *toppath*)
		(server:kind-run *toppath*))))
      (remote-force-server-set! runremote (common:force-server?))
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  8.1")
      (rmt:open-qry-close-locally cmd 0 params))

     ;;DOT CASE9 [label="force server\nnot on homehost"];
     ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9};
     ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive";
     ((or (and (remote-force-server runremote)              ;; we are forcing a server and don't yet have a connection to one
	       (not (remote-conndat runremote)))
	  (and (not (cdr (remote-hh-dat runremote)))        ;; not on a homehost 







|
|
>
|
>


|



|







210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
     ;;DOT CASE8 -> "rmt:open-qry-close-locally";
     ;;  on homehost, no server contact made and this is a write, passively start a server 
     ((and (not (remote-force-server runremote))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))           ;; have homehost
           (not (remote-server-url runremote))       ;; no connection yet
	   (not (member cmd api:read-only-queries))) ;; not a read-only query
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  8")
      (let ((server-info  (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
	(if server-info
	    (begin
              (remote-server-url-set! runremote (server:record->url server-info)) ;; the string can be consumed by the client setup if needed
              (remote-server-id-set! runremote (server:record->id server-info)))  
	    (if (common:force-server?)
		(server:start-and-wait *toppath*)
		(server:kind-run *toppath*)))
      (remote-force-server-set! runremote (common:force-server?))
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  8.1")
      (rmt:open-qry-close-locally cmd 0 params)))

     ;;DOT CASE9 [label="force server\nnot on homehost"];
     ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9};
     ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive";
     ((or (and (remote-force-server runremote)              ;; we are forcing a server and don't yet have a connection to one
	       (not (remote-conndat runremote)))
	  (and (not (cdr (remote-hh-dat runremote)))        ;; not on a homehost 
656
657
658
659
660
661
662

663
664
665
666
667
668
669
670
(define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname)
  (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)))
    (apply append 
	   (map (lambda (run-id)
		  (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname)))
	   run-ids))))


(define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f))
  (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps)))

(define (rmt:get-count-tests-running-for-run-id run-id fastmode)
  (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id fastmode)))

(define (rmt:get-not-completed-cnt run-id)
  (rmt:send-receive 'get-not-completed-cnt run-id (list run-id)))







>
|







668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
(define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname)
  (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)))
    (apply append 
	   (map (lambda (run-id)
		  (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname)))
	   run-ids))))

;; NOTE: rmt functions can NEVER have key params as they might be called as local
(define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path mode itemmaps) ;; #!key (mode '(normal))(itemmaps #f))
  (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps)))

(define (rmt:get-count-tests-running-for-run-id run-id fastmode)
  (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id fastmode)))

(define (rmt:get-not-completed-cnt run-id)
  (rmt:send-receive 'get-not-completed-cnt run-id (list run-id)))

Modified runs.scm from [7d76065ab9] to [4de72ed2f2].

831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
;;    => review of a previously seen test is higher priority of never visited test
;; reg - list of previously visited tests
;; tal - list of never visited tests
;;   prefer next hed to be from reg than tal.

(define runs:nothing-left-in-queue-count 0)

(define (runs:lazy-get-prereqs-not-met  testdat run-id waitons hed item-path #!optional (mode '(normal))(itemmaps #f)) ;; mode: testmode itemmaps: itemmaps)
  (if (and (runs:testdat-prereqs-not-met testdat)
	   (< (- (current-seconds) (runs:testdat-last-update testdat)) 10)) ;; only refresh for this test if it has been at least 10 seconds
      (runs:testdat-prereqs-not-met testdat)
      (let* ((res (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: mode itemmaps: itemmaps)))
		    (if (list? res)
			res
			(begin
			  (debug:print 0 *default-log-port*
				       "ERROR: rmt:get-prereqs-not-met returned non-list!\n"
				       "  res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" mode " itemmaps=" itemmaps)
			  '())))))







|



|







831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
;;    => review of a previously seen test is higher priority of never visited test
;; reg - list of previously visited tests
;; tal - list of never visited tests
;;   prefer next hed to be from reg than tal.

(define runs:nothing-left-in-queue-count 0)

(define (runs:lazy-get-prereqs-not-met  testdat run-id waitons hed item-path #!key (mode '(normal))(itemmaps #f)) ;; mode: testmode itemmaps: itemmaps)
  (if (and (runs:testdat-prereqs-not-met testdat)
	   (< (- (current-seconds) (runs:testdat-last-update testdat)) 10)) ;; only refresh for this test if it has been at least 10 seconds
      (runs:testdat-prereqs-not-met testdat)
      (let* ((res (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode itemmaps)))
		    (if (list? res)
			res
			(begin
			  (debug:print 0 *default-log-port*
				       "ERROR: rmt:get-prereqs-not-met returned non-list!\n"
				       "  res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" mode " itemmaps=" itemmaps)
			  '())))))
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
;;	         (tal         (cdr sorted-test-names))
;;	         (reg         '()) ;; registered, put these at the head of tal 
;;	         (reruns      '()))
(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record
			   can-run-more items runname tconfig reglen test-registry test-records itemmaps testdat)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (runs:lazy-get-prereqs-not-met testdat run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
	  #;(let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)))
			    (if (list? res)
				res
				(begin
				  (debug:print 0 *default-log-port*
					       "ERROR: rmt:get-prereqs-not-met returned non-list!\n"
					       "  res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps)
				  '())))
	 (have-itemized   (not (null? (lset-intersection eq? testmode '(itemmatch itemwait)))))
	 ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails           (runs:calc-fails prereqs-not-met))
	 (prereq-fails    (runs:calc-prereq-fail prereqs-not-met))
	 (non-completed   (runs:calc-not-completed prereqs-not-met))
	 (runnables       (runs:calc-runnable prereqs-not-met))
         (unexpanded-prereqs
          (filter (lambda (testname)
                    (let* ((test-rec (hash-table-ref test-records testname))







<
<
<
<
<
<
<
<

<







860
861
862
863
864
865
866








867

868
869
870
871
872
873
874
;;	         (tal         (cdr sorted-test-names))
;;	         (reg         '()) ;; registered, put these at the head of tal 
;;	         (reruns      '()))
(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record
			   can-run-more items runname tconfig reglen test-registry test-records itemmaps testdat)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (runs:lazy-get-prereqs-not-met testdat run-id waitons hed item-path mode: testmode itemmaps: itemmaps))








	 (have-itemized   (not (null? (lset-intersection eq? testmode '(itemmatch itemwait)))))

	 (fails           (runs:calc-fails prereqs-not-met))
	 (prereq-fails    (runs:calc-prereq-fail prereqs-not-met))
	 (non-completed   (runs:calc-not-completed prereqs-not-met))
	 (runnables       (runs:calc-runnable prereqs-not-met))
         (unexpanded-prereqs
          (filter (lambda (testname)
                    (let* ((test-rec (hash-table-ref test-records testname))
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
	 (run-limits-info        (runs:dat-can-run-more-tests runsdat))
	 ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	 (have-resources         (car run-limits-info))
	 (num-running            (list-ref run-limits-info 1))
	 (num-running-in-jobgroup(list-ref run-limits-info 2)) 
	 (max-concurrent-jobs    (list-ref run-limits-info 3))
	 (job-group-limit        (list-ref run-limits-info 4))
	 ;; (prereqs-not-met        (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
	 ;; (prereqs-not-met         (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails                  (if (list? prereqs-not-met) ;; TODO: rename fails to failed-prereqs
				      (runs:calc-fails prereqs-not-met)
				      (begin
					(debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met)
					'())))
	 (non-completed           (filter (lambda (x)             ;; remove hed from not completed list, duh, of course it is not completed!
					    (not (equal? x hed)))







<
<







1106
1107
1108
1109
1110
1111
1112


1113
1114
1115
1116
1117
1118
1119
	 (run-limits-info        (runs:dat-can-run-more-tests runsdat))
	 ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	 (have-resources         (car run-limits-info))
	 (num-running            (list-ref run-limits-info 1))
	 (num-running-in-jobgroup(list-ref run-limits-info 2)) 
	 (max-concurrent-jobs    (list-ref run-limits-info 3))
	 (job-group-limit        (list-ref run-limits-info 4))


	 (fails                  (if (list? prereqs-not-met) ;; TODO: rename fails to failed-prereqs
				      (runs:calc-fails prereqs-not-met)
				      (begin
					(debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met)
					'())))
	 (non-completed           (filter (lambda (x)             ;; remove hed from not completed list, duh, of course it is not completed!
					    (not (equal? x hed)))
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
                   registry-mutex: registry-mutex
                   flags: flags
                   keyvals: keyvals
                   run-info: run-info
                   ;; newtal: newtal
                   all-tests-registry: all-tests-registry
                   ;; itemmaps: itemmaps
                   ;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)
                   ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running
                   )))

    ;; Initialize the test-registery hash with tests that already have a record
    ;; convert state to symbol and use that as the hash value
    (for-each (lambda (trec)
		(let ((id (db:test-get-id        trec))







<







1505
1506
1507
1508
1509
1510
1511

1512
1513
1514
1515
1516
1517
1518
                   registry-mutex: registry-mutex
                   flags: flags
                   keyvals: keyvals
                   run-info: run-info
                   ;; newtal: newtal
                   all-tests-registry: all-tests-registry
                   ;; itemmaps: itemmaps

                   ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running
                   )))

    ;; Initialize the test-registery hash with tests that already have a record
    ;; convert state to symbol and use that as the hash value
    (for-each (lambda (trec)
		(let ((id (db:test-get-id        trec))

Modified server.scm from [39f1590bf7] to [d960326e6f].

91
92
93
94
95
96
97

98
99
100
101
102
103
104
	    
;; Generate a unique signature for this server
(define (server:mk-signature)
  (message-digest-string (md5-primitive) 
			 (with-output-to-string
			   (lambda ()
			     (write (list (current-directory)

					  (argv)))))))

;; When using zmq this would send the message back (two step process)
;; with spiffy or rpc this simply returns the return data to be returned
;; 
(define (server:reply return-addr query-sig success/fail result)
  (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)







>







91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
	    
;; Generate a unique signature for this server
(define (server:mk-signature)
  (message-digest-string (md5-primitive) 
			 (with-output-to-string
			   (lambda ()
			     (write (list (current-directory)
                                          (current-process-id)
					  (argv)))))))

;; When using zmq this would send the message back (two step process)
;; with spiffy or rpc this simply returns the return data to be returned
;; 
(define (server:reply return-addr query-sig success/fail result)
  (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
161
162
163
164
165
166
167

168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190

191
192
193
194
195
196
197
198
    (system (conc "nbfake " cmdln))
    (unsetenv "TARGETHOST_LOGF")
    (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
    (thread-join! log-rotate)
    (pop-directory)))

;; given a path to a server log return: host port startseconds

;;
(define (server:logf-get-start-info logf)
  (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+)"))) ;; SERVER STARTED: host:port AT timesecs
    (handle-exceptions
	exn
      (begin
	(print "failed to get server info from " logf ", exn=" exn)
	(list #f #f #f)) ;; no idea what went wrong, call it a bad server
      (with-input-from-file
	  logf
	(lambda ()
	  (let loop ((inl  (read-line))
		     (lnum 0))
	    (if (not (eof-object? inl))
		(let ((mlst (string-match rx inl)))
		  (if (not mlst)
		      (if (< lnum 500) ;; give up if more than 500 lines of server log read
			  (loop (read-line)(+ lnum 1))
			  (list #f #f #f))
		      (let ((dat  (cdr mlst)))
			(list (car dat) ;; host
			      (string->number (cadr dat)) ;; port
			      (string->number (caddr dat))))))

		(list #f #f #f))))))))

;; get a list of servers with all relevant data
;; ( mod-time host port start-time pid )
;;
(define (server:get-list areapath #!key (limit #f))
  (let ((fname-rx    (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
	(day-seconds (* 24 60 60)))







>
|

|




|










|



|
>
|







162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
    (system (conc "nbfake " cmdln))
    (unsetenv "TARGETHOST_LOGF")
    (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
    (thread-join! log-rotate)
    (pop-directory)))

;; given a path to a server log return: host port startseconds
;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let 

(define (server:logf-get-start-info logf)
  (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+)"))) ;; SERVER STARTED: host:port AT timesecs server id
    (handle-exceptions
	exn
      (begin
	(print "failed to get server info from " logf ", exn=" exn)
	(list #f #f #f #f)) ;; no idea what went wrong, call it a bad server
      (with-input-from-file
	  logf
	(lambda ()
	  (let loop ((inl  (read-line))
		     (lnum 0))
	    (if (not (eof-object? inl))
		(let ((mlst (string-match rx inl)))
		  (if (not mlst)
		      (if (< lnum 500) ;; give up if more than 500 lines of server log read
			  (loop (read-line)(+ lnum 1))
			  (list #f #f #f #f))
		      (let ((dat  (cdr mlst)))
			(list (car dat) ;; host
			      (string->number (cadr dat)) ;; port
			      (string->number (caddr dat))
                              (cadr (cddr dat))))))
		(list #f #f #f #f))))))))

;; get a list of servers with all relevant data
;; ( mod-time host port start-time pid )
;;
(define (server:get-list areapath #!key (limit #f))
  (let ((fname-rx    (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
	(day-seconds (* 24 60 60)))
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
	      '()
	      (let loop ((hed  (car server-logs))
			 (tal  (cdr server-logs))
			 (res '()))
		(let* ((mod-time  (handle-exceptions
				   exn
				   (begin
				     (print "failed to get modification time on " hed ", exn=" exn)
				     (current-seconds)) ;; 0
				   (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
		       (down-time (- (current-seconds) mod-time))
		       (serv-dat  (if (or (< num-serv-logs 10)
				  	  (< down-time 900)) ;; day-seconds))
				      (server:logf-get-start-info hed)
				      '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at
		       (serv-rec (cons mod-time serv-dat))
		       (fmatch   (string-match fname-rx hed))
		       (pid      (if fmatch (string->number (list-ref fmatch 2)) #f))
		       (new-res  (if (null? serv-dat)
				     res
				     (cons (append serv-rec (list pid)) res))))
		  (if (null? tal)
		      (if (and limit
			       (> (length new-res) limit))
			  new-res ;; (take new-res limit)  <= need intelligent sorting before this will work
			  new-res)
		      (loop (car tal)(cdr tal) new-res)))))))))

(define (server:get-num-alive srvlst)
  (let ((num-alive 0))
    (for-each
     (lambda (server)
       (match-let (((mod-time host port start-time pid)
		    server))
	 (let* ((uptime  (- (current-seconds) mod-time))
		(runtime (if start-time
			     (- mod-time start-time)
			     0)))
	   (if (< uptime 5)(set! num-alive (+ num-alive 1))))))
     srvlst)







|












|











|







218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
	      '()
	      (let loop ((hed  (car server-logs))
			 (tal  (cdr server-logs))
			 (res '()))
		(let* ((mod-time  (handle-exceptions
				   exn
				   (begin
				     (debug:print 0 *default-log-port* "failed to get modification time on " hed ", exn=" exn)
				     (current-seconds)) ;; 0
				   (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
		       (down-time (- (current-seconds) mod-time))
		       (serv-dat  (if (or (< num-serv-logs 10)
				  	  (< down-time 900)) ;; day-seconds))
				      (server:logf-get-start-info hed)
				      '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at
		       (serv-rec (cons mod-time serv-dat))
		       (fmatch   (string-match fname-rx hed))
		       (pid      (if fmatch (string->number (list-ref fmatch 2)) #f))
		       (new-res  (if (null? serv-dat)
				     res
				     (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let 
		  (if (null? tal)
		      (if (and limit
			       (> (length new-res) limit))
			  new-res ;; (take new-res limit)  <= need intelligent sorting before this will work
			  new-res)
		      (loop (car tal)(cdr tal) new-res)))))))))

(define (server:get-num-alive srvlst)
  (let ((num-alive 0))
    (for-each
     (lambda (server)
       (match-let (((mod-time host port start-time server-id pid)
		    server))
	 (let* ((uptime  (- (current-seconds) mod-time))
		(runtime (if start-time
			     (- mod-time start-time)
			     0)))
	   (if (< uptime 5)(set! num-alive (+ num-alive 1))))))
     srvlst)
303
304
305
306
307
308
309






310
311
312
313
314
315
316
317
318
319
    (if (and (list? srvrs)
	     (not (null? srvrs)))
	(let* ((len (length srvrs))
	       (idx (random len)))
	  (list-ref srvrs idx))
	#f)))








(define (server:record->url servr)
  (match-let (((mod-time host port start-time pid)
	       servr))
    (if (and host port)
	(conc host ":" port)
	#f)))

(define (server:get-client-signature) ;; BB> why is this proc named "get-"?  it returns nothing -- set! has not return value.
  (if *my-client-signature* *my-client-signature*







>
>
>
>
>
>


|







306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
    (if (and (list? srvrs)
	     (not (null? srvrs)))
	(let* ((len (length srvrs))
	       (idx (random len)))
	  (list-ref srvrs idx))
	#f)))

(define (server:record->id servr)
  (match-let (((mod-time host port start-time server-id pid)
	       servr))
    (if server-id
	server-id
	#f)))

(define (server:record->url servr)
  (match-let (((mod-time host port start-time server-id pid)
	       servr))
    (if (and host port)
	(conc host ":" port)
	#f)))

(define (server:get-client-signature) ;; BB> why is this proc named "get-"?  it returns nothing -- set! has not return value.
  (if *my-client-signature* *my-client-signature*
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427

428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
		  (common:simple-file-release-lock lock-file)))
	(hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds))))))

;; this one seems to be the general entry point
;;
(define (server:start-and-wait areapath #!key (timeout 60))
  (let ((give-up-time (+ (current-seconds) timeout)))
    (let loop ((server-url (server:check-if-running areapath))
	       (try-num    0))
      (if (or server-url
	      (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
	  server-url
	  (let ((num-ok (length (server:get-best (server:get-list areapath)))))
	    (if (and (> try-num 0)  ;; first time through simply wait a little while then try again
		     (< num-ok 1))  ;; if there are no decent candidates for servers then try starting a new one
		(server:kind-run areapath))
	    (thread-sleep! 5)
	    (loop (server:check-if-running areapath)
		  (+ try-num 1)))))))

(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.

(define (server:get-num-servers #!key (numservers 2))
  (let ((ns (string->number
	     (or (configf:lookup *configdat* "server" "numservers") "notanumber"))))
    (or ns numservers)))

;; no longer care if multiple servers are started by accident. older servers will drop off in time.
;;
(define (server:check-if-running areapath) ;;  #!key (numservers "2"))
  (let* ((ns            (server:get-num-servers))
	 (servers       (server:get-best (server:get-list areapath))))
    ;; (print "servers: " servers " ns: " ns)
    (if (or (and servers
		 (null? servers))
	    (not servers)
	    (and (list? servers)
		 (< (length servers) (random ns)))) ;; somewhere between 0 and numservers
        #f
        (let loop ((hed (car servers))
                   (tal (cdr servers)))
          (let ((res (server:check-server hed)))
            (if res
                res
                (if (null? tal)
                    #f
                    (loop (car tal)(cdr tal)))))))))

;; ping the given server
;;
(define (server:check-server server-record)
  (let* ((server-url (server:record->url server-record))

         (res        (case *transport-type*
                       ((http)(server:ping server-url))
                       ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
                       )))
    (if res
        server-url
	#f)))

(define (server:kill servr)
  (match-let (((mod-time hostname port start-time pid)
	       servr))
    (tasks:kill-server hostname pid)))

;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;;       in the same process as the server.
;;
(define (server:ping host-port-in #!key (do-exit #f))
  (let ((host:port (if (not host-port-in) ;; use read-dotserver to find
		       #f ;; (server:check-if-running *toppath*)
		;; (if (number? host-port-in) ;; we were handed a server-id
		;; 	   (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
		;; 	     ;; (print "srec: " srec " host-port-in: " host-port-in)
		;; 	     (if srec
		;; 		 (conc (vector-ref srec 3) ":" (vector-ref srec 4))







|

|

|




















<










|








>

|
















|







385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416

417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
		  (common:simple-file-release-lock lock-file)))
	(hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds))))))

;; this one seems to be the general entry point
;;
(define (server:start-and-wait areapath #!key (timeout 60))
  (let ((give-up-time (+ (current-seconds) timeout)))
    (let loop ((server-info (server:check-if-running areapath))
	       (try-num    0))
      (if (or server-info
	      (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
	  (server:record->url server-info)
	  (let ((num-ok (length (server:get-best (server:get-list areapath)))))
	    (if (and (> try-num 0)  ;; first time through simply wait a little while then try again
		     (< num-ok 1))  ;; if there are no decent candidates for servers then try starting a new one
		(server:kind-run areapath))
	    (thread-sleep! 5)
	    (loop (server:check-if-running areapath)
		  (+ try-num 1)))))))

(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.

(define (server:get-num-servers #!key (numservers 2))
  (let ((ns (string->number
	     (or (configf:lookup *configdat* "server" "numservers") "notanumber"))))
    (or ns numservers)))

;; no longer care if multiple servers are started by accident. older servers will drop off in time.
;;
(define (server:check-if-running areapath) ;;  #!key (numservers "2"))
  (let* ((ns            (server:get-num-servers))
	 (servers       (server:get-best (server:get-list areapath))))

    (if (or (and servers
		 (null? servers))
	    (not servers)
	    (and (list? servers)
		 (< (length servers) (random ns)))) ;; somewhere between 0 and numservers
        #f
        (let loop ((hed (car servers))
                   (tal (cdr servers)))
          (let ((res (server:check-server hed)))
            (if res
                hed
                (if (null? tal)
                    #f
                    (loop (car tal)(cdr tal)))))))))

;; ping the given server
;;
(define (server:check-server server-record)
  (let* ((server-url (server:record->url server-record))
         (server-id (server:record->id server-record)) 
         (res        (case *transport-type*
                       ((http)(server:ping server-url server-id))
                       ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
                       )))
    (if res
        server-url
	#f)))

(define (server:kill servr)
  (match-let (((mod-time hostname port start-time pid)
	       servr))
    (tasks:kill-server hostname pid)))

;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;;       in the same process as the server.
;;
(define (server:ping host-port-in server-id #!key (do-exit #f))
  (let ((host:port (if (not host-port-in) ;; use read-dotserver to find
		       #f ;; (server:check-if-running *toppath*)
		;; (if (number? host-port-in) ;; we were handed a server-id
		;; 	   (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
		;; 	     ;; (print "srec: " srec " host-port-in: " host-port-in)
		;; 	     (if srec
		;; 		 (conc (vector-ref srec 3) ":" (vector-ref srec 4))
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
	  (begin
	    (if host-port-in
		(debug:print 0 *default-log-port*  "ERROR: bad host:port"))
	    (if do-exit (exit 1))
	    #f)
	  (let* ((iface      (car host-port))
		 (port       (cadr host-port))
		 (server-dat (http-transport:client-connect iface port))
		 (login-res  (rmt:login-no-auto-client-setup server-dat)))
	    (if (and (list? login-res)
		     (car login-res))
		(begin
		  ;; (print "LOGIN_OK")
		  (if do-exit (exit 0))
		  #t)







|







474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
	  (begin
	    (if host-port-in
		(debug:print 0 *default-log-port*  "ERROR: bad host:port"))
	    (if do-exit (exit 1))
	    #f)
	  (let* ((iface      (car host-port))
		 (port       (cadr host-port))
		 (server-dat (http-transport:client-connect iface port server-id))
		 (login-res  (rmt:login-no-auto-client-setup server-dat)))
	    (if (and (list? login-res)
		     (car login-res))
		(begin
		  ;; (print "LOGIN_OK")
		  (if do-exit (exit 0))
		  #t)
583
584
585
586
587
588
589

590
591
592




593

594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
                                 (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit...")))

                               (if (not (configf:lookup *configdat* "server" "disable-db-snapshot"))
                                   (common:snapshot-file mtdbfile subdir: ".db-snapshot"))
		               (delete-file* staging-file)
		               (let* ((start-time (current-milliseconds))
                                      (res (system sync-cmd))

                                      (res2 
                                       (cond
                                        ((eq? 0 res)




		                         (delete-file* (conc mtdbfile ".backup"))

                                         (if (eq? 0 (file-size sync-log))
                                             (delete-file sync-log))
		                         (system (conc "/bin/mv " staging-file " " mtdbfile))
                                         
                                         (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000))
                                         (set! off-time (calculate-off-time
                                                         last-sync-seconds
                                                         (cond
                                                          ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1))
                                                           duty-cycle)
                                                          (else
                                                           (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid.  Should be a number between 0 and 1, but "duty-cycle" was specified.  Using default value: "default-duty-cycle)
                                                           default-duty-cycle))))
                                         
                                         (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec")
                                         (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time)
                                         'sync-completed)
                                        (else
                                         (system (conc "/bin/cp "sync-log" "sync-log".fail"))
                                         (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail")
                                         (if (file-exists? (conc mtdbfile ".backup"))
                                             (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile)))
                                         #f))))
                                 (common:simple-file-release-lock lockfile)







>


|
>
>
>
>
|
>

|














|







592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
                                 (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit...")))

                               (if (not (configf:lookup *configdat* "server" "disable-db-snapshot"))
                                   (common:snapshot-file mtdbfile subdir: ".db-snapshot"))
		               (delete-file* staging-file)
		               (let* ((start-time (current-milliseconds))
                                      (res (system sync-cmd))
                                      (dbbackupfile (conc mtdbfile ".backup"))
                                      (res2 
                                       (cond
                                        ((eq? 0 res )
                                         (handle-exceptions
                                            exn
                                            #f
                                         (if (file-exists? dbbackupfile)
		                           (delete-file* dbbackupfile)
                                         )
                                         (if (eq? 0 (file-size sync-log))
                                             (delete-file* sync-log))
		                         (system (conc "/bin/mv " staging-file " " mtdbfile))
                                         
                                         (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000))
                                         (set! off-time (calculate-off-time
                                                         last-sync-seconds
                                                         (cond
                                                          ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1))
                                                           duty-cycle)
                                                          (else
                                                           (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid.  Should be a number between 0 and 1, but "duty-cycle" was specified.  Using default value: "default-duty-cycle)
                                                           default-duty-cycle))))
                                         
                                         (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec")
                                         (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time)
                                         'sync-completed))
                                        (else
                                         (system (conc "/bin/cp "sync-log" "sync-log".fail"))
                                         (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail")
                                         (if (file-exists? (conc mtdbfile ".backup"))
                                             (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile)))
                                         #f))))
                                 (common:simple-file-release-lock lockfile)

Modified sretrieve.scm from [e7efdf8d00] to [c73e7e987b].

635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
           (let* ((parent-dir target-path)
                  (last-dir-name (if  (pathname-extension target-path)  
                                      (conc(pathname-file target-path) "." (pathname-extension target-path))
                                      (pathname-file target-path)))
                  (curr-dir (current-directory))   
                  (start-dir (conc (current-directory) "/" last-dir-name))
                  (execlude (make-exclude-pattern (string-split restrictions ",")))
                   (tmpfile (conc "/tmp/" (current-user-name) "/my-pipe-" (current-process-id))))
                    (if  (file-exists? start-dir)
                    (begin
                         (sauth:print-error (conclast-dir-name " already exist in your work dir."))
                         (sauth:print-error  "Nothing has been retrieved!!  "))
                     (begin
                   ;    (sretrieve:do-as-calling-user
                   ; (lambda ()
                    
                  (if (not (file-exists?  (conc "/tmp/" (current-user-name)))) 
		      (create-directory (conc "/tmp/" (current-user-name)) #t))
                          (change-directory parent-dir)
                            (create-fifo  tmpfile)
                                  (process-fork 
    				   (lambda()
                                       (sleep 1) 
       					(with-output-to-file tmpfile
         				(lambda ()







|







|
|
|







635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
           (let* ((parent-dir target-path)
                  (last-dir-name (if  (pathname-extension target-path)  
                                      (conc(pathname-file target-path) "." (pathname-extension target-path))
                                      (pathname-file target-path)))
                  (curr-dir (current-directory))   
                  (start-dir (conc (current-directory) "/" last-dir-name))
                  (execlude (make-exclude-pattern (string-split restrictions ",")))
                   (tmpfile (conc "/tmp/my-pipe-" (current-process-id))))
                    (if  (file-exists? start-dir)
                    (begin
                         (sauth:print-error (conclast-dir-name " already exist in your work dir."))
                         (sauth:print-error  "Nothing has been retrieved!!  "))
                     (begin
                   ;    (sretrieve:do-as-calling-user
                   ; (lambda ()
                  ; (print tmpfile) 
                  ;(if (not (file-exists?  (conc "/tmp/" (current-user-name)))) 
		  ;    (create-directory (conc "/tmp/" (current-user-name)) #t))
                          (change-directory parent-dir)
                            (create-fifo  tmpfile)
                                  (process-fork 
    				   (lambda()
                                       (sleep 1) 
       					(with-output-to-file tmpfile
         				(lambda ()

Modified tasks.scm from [b621e9649f] to [2899fc0540].

753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772

773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
	       (state      (db:get-value-by-header row header "state"))
	       (status     (db:get-value-by-header row header "status"))
	       (owner      (db:get-value-by-header row header "owner"))
	       (event-time (db:get-value-by-header row header "event_time"))
	       (comment    (db:get-value-by-header row header "comment"))
	       (fail-count (db:get-value-by-header row header "fail_count"))
	       (pass-count (db:get-value-by-header row header "pass_count"))
         (db-contour (db:get-value-by-header row header "contour"))
	       (contour    (if (args:get-arg "-prepend-contour") 
                                 (if (and db-contour (not (equal? db-contour ""))  (string? db-contour )) 
                                           (begin 
                                            (debug:print-info 1 *default-log-port*  "db-contour") 
 						db-contour)
					    (args:get-arg "-contour"))))
         (run-tag (if (args:get-arg "-run-tag")
                            (args:get-arg "-run-tag")
									""))
         (last-update (db:get-value-by-header row header "last_update"))
	       (keytarg    (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target"))
	       			(conc "MT_CONTOUR/MT_AREA/" (string-intersperse (rmt:get-keys) "/")) (string-intersperse (rmt:get-keys) "/"))) ;; e.g. version/iteration/platform

	       (target     (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target")) 
	       			(conc (or (args:get-arg "-prefix-target") (conc contour "/" (common:get-area-name) "/")) (rmt:get-target run-id)) (rmt:get-target run-id)))                 ;; e.g. v1.63/a3e1/ubuntu
	       (spec-id    (pgdb:get-ttype dbh keytarg))
	       (publish-time (if (args:get-arg "-cp-eventtime-to-publishtime")
                            event-time
                           (current-seconds))) 
	       (new-run-id (pgdb:get-run-id dbh spec-id target run-name area-id)))
         (if new-run-id
	         (begin ;; let ((run-record (pgdb:get-run-info dbh new-run-id))
		        (hash-table-set! runs-ht run-id new-run-id)
		;; ensure key fields are up to date
     ;; if last_update == pgdb_last_update do not update smallest-last-update-time  
    (let* ((pgdb-last-update (pgdb:get-run-last-update dbh new-run-id))
           (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
     (if (and  (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
        (hash-table-set! smallest-last-update-time "smallest-time" last-update)))
		(pgdb:refresh-run-info
		 dbh
		 new-run-id
		 state status owner event-time comment fail-count pass-count area-id last-update publish-time)
     (debug:print-info 0 *default-log-port* "Working on run-id " run-id " pgdb-id "  new-run-id )
     (if (not (equal? run-tag ""))
      (task:add-run-tag dbh new-run-id run-tag))
		new-run-id) 
      
	      (if (equal? state "deleted")
          (begin 
          (debug:print-info 1 *default-log-port*  "Warning: Run with id " run-id " was created after previous sync and deleted before the sync") #f)
          (if (handle-exceptions
		        exn
		        (begin (print-call-chain)
              (print ((condition-property-accessor 'exn 'message) exn))     
			      #f)







|



|


|


|


>

|




|













|




|







753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
	       (state      (db:get-value-by-header row header "state"))
	       (status     (db:get-value-by-header row header "status"))
	       (owner      (db:get-value-by-header row header "owner"))
	       (event-time (db:get-value-by-header row header "event_time"))
	       (comment    (db:get-value-by-header row header "comment"))
	       (fail-count (db:get-value-by-header row header "fail_count"))
	       (pass-count (db:get-value-by-header row header "pass_count"))
               (db-contour (db:get-value-by-header row header "contour"))
	       (contour    (if (args:get-arg "-prepend-contour") 
                                 (if (and db-contour (not (equal? db-contour ""))  (string? db-contour )) 
                                           (begin 
                                            (debug:print-info 10 *default-log-port*  "db-contour" db-contour) 
 						db-contour)
					    (args:get-arg "-contour"))))
               (run-tag (if (args:get-arg "-run-tag")
                            (args:get-arg "-run-tag")
									""))
               (last-update (db:get-value-by-header row header "last_update"))
	       (keytarg    (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target"))
	       			(conc "MT_CONTOUR/MT_AREA/" (string-intersperse (rmt:get-keys) "/")) (string-intersperse (rmt:get-keys) "/"))) ;; e.g. version/iteration/platform
               (base-target      (rmt:get-target run-id))
	       (target     (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target")) 
	       			(conc (or (args:get-arg "-prefix-target") (conc contour "/" (common:get-area-name) "/")) base-target) base-target))                 ;; e.g. v1.63/a3e1/ubuntu
	       (spec-id    (pgdb:get-ttype dbh keytarg))
	       (publish-time (if (args:get-arg "-cp-eventtime-to-publishtime")
                            event-time
                           (current-seconds))) 
	       (new-run-id (if (and run-name base-target) (pgdb:get-run-id dbh spec-id target run-name area-id) #f)))
         (if new-run-id
	         (begin ;; let ((run-record (pgdb:get-run-info dbh new-run-id))
		        (hash-table-set! runs-ht run-id new-run-id)
		;; ensure key fields are up to date
     ;; if last_update == pgdb_last_update do not update smallest-last-update-time  
    (let* ((pgdb-last-update (pgdb:get-run-last-update dbh new-run-id))
           (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
     (if (and  (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
        (hash-table-set! smallest-last-update-time "smallest-time" last-update)))
		(pgdb:refresh-run-info
		 dbh
		 new-run-id
		 state status owner event-time comment fail-count pass-count area-id last-update publish-time)
     (debug:print-info 4 *default-log-port* "Working on run-id " run-id " pgdb-id "  new-run-id )
     (if (not (equal? run-tag ""))
      (task:add-run-tag dbh new-run-id run-tag))
		new-run-id) 
      
	      (if (or (not state) (equal? state "deleted"))
          (begin 
          (debug:print-info 1 *default-log-port*  "Warning: Run with id " run-id " was created after previous sync and deleted before the sync") #f)
          (if (handle-exceptions
		        exn
		        (begin (print-call-chain)
              (print ((condition-property-accessor 'exn 'message) exn))     
			      #f)
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
                          #f)))
    (if step-id
      (begin  
        (if pgdb-test-id
           (begin 
                (if  pgdb-step-id
                   (begin
                    (debug:print-info 1 *default-log-port*  "Updating existing test-step with test-id: " test-id " and step-id " step-id " pgdb test id: " pgdb-test-id " pgdb step id " pgdb-step-id )
										(let* ((pgdb-last-update (pgdb:get-test-step-last-update dbh pgdb-step-id)))
         (if (and  (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
        (hash-table-set! smallest-last-update-time "smallest-time" last-update))) 
                    (pgdb:update-test-step dbh pgdb-step-id pgdb-test-id stepname state status event_time comment logfile last-update))
                    (begin
 		      (debug:print-info 1 *default-log-port*  "Inserting test-step with test-id: " test-id " and step-id " step-id  " pgdb test id: " pgdb-test-id)
                     (if (or (not smallest-time) (< last-update smallest-time))
        				      (hash-table-set! smallest-last-update-time "smallest-time" last-update))
                      (pgdb:insert-test-step dbh pgdb-test-id stepname state status event_time comment logfile last-update )
                      (set! pgdb-step-id  (pgdb:get-test-step-id dbh pgdb-test-id stepname state))))
                (hash-table-set! step-ht step-id pgdb-step-id ))
           (debug:print-info 1 *default-log-port*  "Error: Test not cashed")))
      (debug:print-info 1 *default-log-port*  "Error: Could not get test step info for step id " test-step-id ))))	;; this is a wierd senario need to debug      	







|





|







859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
                          #f)))
    (if step-id
      (begin  
        (if pgdb-test-id
           (begin 
                (if  pgdb-step-id
                   (begin
                    (debug:print-info 4 *default-log-port*  "Updating existing test-step with test-id: " test-id " and step-id " step-id " pgdb test id: " pgdb-test-id " pgdb step id " pgdb-step-id )
										(let* ((pgdb-last-update (pgdb:get-test-step-last-update dbh pgdb-step-id)))
         (if (and  (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
        (hash-table-set! smallest-last-update-time "smallest-time" last-update))) 
                    (pgdb:update-test-step dbh pgdb-step-id pgdb-test-id stepname state status event_time comment logfile last-update))
                    (begin
 		      (debug:print-info 4 *default-log-port*  "Inserting test-step with test-id: " test-id " and step-id " step-id  " pgdb test id: " pgdb-test-id)
                     (if (or (not smallest-time) (< last-update smallest-time))
        				      (hash-table-set! smallest-last-update-time "smallest-time" last-update))
                      (pgdb:insert-test-step dbh pgdb-test-id stepname state status event_time comment logfile last-update )
                      (set! pgdb-step-id  (pgdb:get-test-step-id dbh pgdb-test-id stepname state))))
                (hash-table-set! step-ht step-id pgdb-step-id ))
           (debug:print-info 1 *default-log-port*  "Error: Test not cashed")))
      (debug:print-info 1 *default-log-port*  "Error: Could not get test step info for step id " test-step-id ))))	;; this is a wierd senario need to debug      	
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
                                  #f)))
    (if data-id
      (begin
        (if pgdb-test-id
           (begin 
                (if  pgdb-data-id
                   (begin
                    (debug:print-info 1 *default-log-port*  "Updating existing test-data with test-id: " test-id " and  data-id " data-id " pgdb test id: " pgdb-test-id " pgdb data id " pgdb-data-id)
                    (let* ((pgdb-last-update (pgdb:get-test-data-last-update dbh pgdb-data-id)))
         (if (and  (>  last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
        (hash-table-set! smallest-last-update-time "smallest-time" last-update))) 
                    (pgdb:update-test-data dbh pgdb-data-id pgdb-test-id  category variable value expected tol units comment status type last-update))
                    (begin
 		      (debug:print-info 1 *default-log-port*  "Inserting test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id)
                       (if (handle-exceptions
		      exn
		      (begin (print-call-chain)
                              (print ((condition-property-accessor 'exn 'message) exn))     
			#f)
                     
                    (pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type last-update))







|





|







905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
                                  #f)))
    (if data-id
      (begin
        (if pgdb-test-id
           (begin 
                (if  pgdb-data-id
                   (begin
                    (debug:print-info 4 *default-log-port*  "Updating existing test-data with test-id: " test-id " and  data-id " data-id " pgdb test id: " pgdb-test-id " pgdb data id " pgdb-data-id)
                    (let* ((pgdb-last-update (pgdb:get-test-data-last-update dbh pgdb-data-id)))
         (if (and  (>  last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
        (hash-table-set! smallest-last-update-time "smallest-time" last-update))) 
                    (pgdb:update-test-data dbh pgdb-data-id pgdb-test-id  category variable value expected tol units comment status type last-update))
                    (begin
 		      (debug:print-info 4 *default-log-port*  "Inserting test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id)
                       (if (handle-exceptions
		      exn
		      (begin (print-call-chain)
                              (print ((condition-property-accessor 'exn 'message) exn))     
			#f)
                     
                    (pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type last-update))
968
969
970
971
972
973
974


975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
				(begin
                                  ;(print pgdb-run-id)    
                                 (pgdb:get-test-id dbh pgdb-run-id test-name item-path))
                                 #f)))
	 ;; "id"           "run_id"        "testname"  "state"      "status"      "event_time"
	 ;; "host"         "cpuload"       "diskfree"  "uname"      "rundir"      "item_path"
	 ;; "run_duration" "final_logf"    "comment"   "shortdir"   "attemptnum"  "archived"


         (if pgdb-run-id
           (begin
	   (if pgdb-test-id ;; have a record
	     (begin ;; let ((key-name (conc run-id "/" test-name "/" item-path)))
	       (debug:print-info 0 *default-log-port*  "Updating existing test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id "  pgdb-test-id "  pgdb-test-id)
         (let* ((pgdb-last-update (pgdb:get-test-last-update dbh pgdb-test-id)))
         (if (and  (>  last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) ;;if last-update is same as pgdb-last-update then it is safe to assume the records are identical and we can use a larger last update time.
        (hash-table-set! smallest-last-update-time "smallest-time" last-update))) 
	       (pgdb:update-test dbh pgdb-test-id pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid))
	     (begin 
           (debug:print-info 0 *default-log-port*  "Inserting test with run-id: " run-id " and test-id: " test-id  " pgdb run id: " pgdb-run-id)
           (pgdb:insert-test dbh pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid)
            (if (or (not smallest-time) (< last-update smallest-time))
        				(hash-table-set! smallest-last-update-time "smallest-time" last-update))
           (set! pgdb-test-id (pgdb:get-test-id dbh pgdb-run-id test-name item-path))))
           (hash-table-set! test-ht test-id pgdb-test-id))
           (debug:print-info 1 *default-log-port*  "WARNING: Skipping run with run-id:" run-id ". This run was created after privious sync and removed before this sync."))))
     test-ids)))







>
>




|





|







969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
				(begin
                                  ;(print pgdb-run-id)    
                                 (pgdb:get-test-id dbh pgdb-run-id test-name item-path))
                                 #f)))
	 ;; "id"           "run_id"        "testname"  "state"      "status"      "event_time"
	 ;; "host"         "cpuload"       "diskfree"  "uname"      "rundir"      "item_path"
	 ;; "run_duration" "final_logf"    "comment"   "shortdir"   "attemptnum"  "archived"
         (if (or (not item-path) (string-null? item-path))
             (debug:print-info 0 *default-log-port* "Working on Run id : " run-id "and test name : " test-name)) 
         (if pgdb-run-id
           (begin
	   (if pgdb-test-id ;; have a record
	     (begin ;; let ((key-name (conc run-id "/" test-name "/" item-path)))
	       (debug:print-info 4 *default-log-port*  "Updating existing test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id "  pgdb-test-id "  pgdb-test-id)
         (let* ((pgdb-last-update (pgdb:get-test-last-update dbh pgdb-test-id)))
         (if (and  (>  last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) ;;if last-update is same as pgdb-last-update then it is safe to assume the records are identical and we can use a larger last update time.
        (hash-table-set! smallest-last-update-time "smallest-time" last-update))) 
	       (pgdb:update-test dbh pgdb-test-id pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid))
	     (begin 
           (debug:print-info 4 *default-log-port*  "Inserting test with run-id: " run-id " and test-id: " test-id  " pgdb run id: " pgdb-run-id)
           (pgdb:insert-test dbh pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid)
            (if (or (not smallest-time) (< last-update smallest-time))
        				(hash-table-set! smallest-last-update-time "smallest-time" last-update))
           (set! pgdb-test-id (pgdb:get-test-id dbh pgdb-run-id test-name item-path))))
           (hash-table-set! test-ht test-id pgdb-test-id))
           (debug:print-info 1 *default-log-port*  "WARNING: Skipping run with run-id:" run-id ". This run was created after privious sync and removed before this sync."))))
     test-ids)))
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
	   #f)
           (if (not (pgdb:is-area-taged-with-a-tag dbh (vector-ref tag-info 0)  (vector-ref area-info 0)))  
	   (pgdb:insert-area-tag  dbh   (vector-ref tag-info 0)  (vector-ref area-info 0))))))

(define (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) 
  (for-each
     (lambda (run-id)
      (debug:print-info 1 *default-log-port*   "Check if run with " run-id " needs to be synced" )
       (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
run-ids))


;; get runs changed since last sync
;; (define (tasks:sync-test-data dbh cached-info area-info)
;;   (let* ((







|







1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
	   #f)
           (if (not (pgdb:is-area-taged-with-a-tag dbh (vector-ref tag-info 0)  (vector-ref area-info 0)))  
	   (pgdb:insert-area-tag  dbh   (vector-ref tag-info 0)  (vector-ref area-info 0))))))

(define (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) 
  (for-each
     (lambda (run-id)
      (debug:print-info 4 *default-log-port*   "Check if run with " run-id " needs to be synced" )
       (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
run-ids))


;; get runs changed since last sync
;; (define (tasks:sync-test-data dbh cached-info area-info)
;;   (let* ((

Modified tests.scm from [0094b671e6] to [6adeda2896].

19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
;;======================================================================

;;======================================================================
;; Tests
;;======================================================================

(declare (unit tests))
(declare (uses lock-queue))
(declare (uses db))
(declare (uses tdb))
(declare (uses common))
;; (declare (uses dcommon)) ;; needed for the steps processing
(declare (uses items))
(declare (uses runconfig))
;; (declare (uses sdb))







|







19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
;;======================================================================

;;======================================================================
;; Tests
;;======================================================================

(declare (unit tests))
;;(declare (uses lock-queue))
(declare (uses db))
(declare (uses tdb))
(declare (uses common))
;; (declare (uses dcommon)) ;; needed for the steps processing
(declare (uses items))
(declare (uses runconfig))
;; (declare (uses sdb))