Megatest

Changes On Branch v2.0001-matt-test-edits
Login

Changes In Branch v2.0001-matt-test-edits Excluding Merge-Ins

This is equivalent to a diff from 3d2d201a06 to 1b4e76c0e0

2022-02-10
12:19
changed the config hash key for toppath from empty string to toppath check-in: 366b1b75fd user: mmgraham tags: v2.0001
2022-02-09
11:18
Added some expiration handling for run servers Closed-Leaf check-in: 1b4e76c0e0 user: mrwellan tags: v2.0001-matt-test-edits
09:56
Added back use of mutex for transactions check-in: 297a374249 user: mrwellan tags: v2.0001-matt-test-edits
2022-02-03
18:05
tweak waits in runconfigs check-in: 6c303b59b4 user: mrwellan tags: v2.0001-matt-test-edits
2022-02-02
18:07
corrected *configdat* to *runconfigdat* check-in: 3d2d201a06 user: mmgraham tags: v2.0001
16:08
changed to send unquoted cmd to runconfigs-get. When quoted, configf:lookup could not find the entry check-in: 2896749a24 user: mmgraham tags: v2.0001

Modified commonmod.scm from [875119b082] to [bcea10d67c].

381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
*db-write-access*
*db-last-sync*
*db-sync-in-progress*
*db-multi-sync-mutex*
*task-db*
*db-access-allowed*
*db-access-mutex*
*db-transaction-mutex*
*db-cache-path*
*db-with-db-mutex*
*db-api-call-time*
*didsomething*
*no-sync-db*
*my-signature*
*transport-type*







|







381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
*db-write-access*
*db-last-sync*
*db-sync-in-progress*
*db-multi-sync-mutex*
*task-db*
*db-access-allowed*
*db-access-mutex*
;; *db-transaction-mutex*
*db-cache-path*
*db-with-db-mutex*
*db-api-call-time*
*didsomething*
*no-sync-db*
*my-signature*
*transport-type*
960
961
962
963
964
965
966

967
968
969
970
971
972
973
974
975
976
977
978
979
(define *db-stats-mutex*      (make-mutex))
;; db access
(define *db-last-access*      (current-seconds)) ;; last db access, used in server
(define *db-write-access*     #t)
;; db sync
(define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
(define *db-sync-in-progress* #f)                ;; if there is a sync in progress do not try to start another

(define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
;; task db
(define *task-db*             #f) ;; (vector db path-to-db)
(define *db-access-allowed*   #t) ;; flag to allow access
(define *db-access-mutex*     (make-mutex))
(define *db-transaction-mutex* (make-mutex))
(define *db-cache-path*       #f)
(define *db-with-db-mutex*    (make-mutex))
(define *db-api-call-time*    (make-hash-table)) ;; hash of command => (list of times)
;; no sync db
(define *no-sync-db*          #f)

;; SERVER







>





|







960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
(define *db-stats-mutex*      (make-mutex))
;; db access
(define *db-last-access*      (current-seconds)) ;; last db access, used in server
(define *db-write-access*     #t)
;; db sync
(define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
(define *db-sync-in-progress* #f)                ;; if there is a sync in progress do not try to start another
;; multi-sync mutex used in both dbmod and launchmod
(define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
;; task db
(define *task-db*             #f) ;; (vector db path-to-db)
(define *db-access-allowed*   #t) ;; flag to allow access
(define *db-access-mutex*     (make-mutex))
;; (define *db-transaction-mutex* (make-mutex))
(define *db-cache-path*       #f)
(define *db-with-db-mutex*    (make-mutex))
(define *db-api-call-time*    (make-hash-table)) ;; hash of command => (list of times)
;; no sync db
(define *no-sync-db*          #f)

;; SERVER
4436
4437
4438
4439
4440
4441
4442
4443


4444
4445
4446
4447
4448
4449
4450
;; timeout is hms string: 1h 5m 3s, default is 10 minutes
;;
(define (server:expiration-timeout)
  (let ((tmo (configf:lookup *configdat* "server" "timeout")))
    (if (and (string? tmo)
	     (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
        (* 3600 (string->number tmo))
	60))) ;; default is one minute



(define (runs:get-mt-env-alist run-id runname target testname itempath)
  ;;(bb-check-path msg: "runs:set-megatest-env-vars entry")
  `(("MT_TEST_NAME"     . ,testname)
    
    ("MT_ITEMPATH"      . ,itempath)








|
>
>







4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
;; timeout is hms string: 1h 5m 3s, default is 10 minutes
;;
(define (server:expiration-timeout)
  (let ((tmo (configf:lookup *configdat* "server" "timeout")))
    (if (and (string? tmo)
	     (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
        (* 3600 (string->number tmo))
	;; 60 ;; default is one minute
	5
	)))

(define (runs:get-mt-env-alist run-id runname target testname itempath)
  ;;(bb-check-path msg: "runs:set-megatest-env-vars entry")
  `(("MT_TEST_NAME"     . ,testname)
    
    ("MT_ITEMPATH"      . ,itempath)

Modified dashboard.scm from [d302c30c66] to [5c46200846].

875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
	      (if (null? all-test-ids)
		  (hash-table-delete! (dboard:tabdat-allruns-by-id tabdat) run-id)
		  (hash-table-set!    (dboard:tabdat-allruns-by-id tabdat) run-id run-struct))
	      (if (or (null? tal)
		      (> elapsed-time 2)) ;; stop loading data after 5 seconds, on the next call more data *should* be loaded since get-tests-for-run uses last update
		  (begin
		    (when (> elapsed-time 2)   
                      (debug:print 0 *default-log-port* "NOTE: updates are taking a long time, " elapsed-time "s elapsed.")
                      (let* ((old-val (iup:attribute *tim* "TIME"))
                             (new-val (number->string (inexact->exact (floor (* 2  (string->number old-val)))))))
                        (if (< (string->number new-val) 5000)
                            ((debug:print 0 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val)
                            (iup:attribute-set! *tim* "TIME" new-val))))


                      )
		    (dboard:tabdat-allruns-set! tabdat new-res)
		    maxtests)
		  (if (> (dboard:rundat-run-data-offset run-struct) 0)
		      (loop run tal new-res newmaxtests) ;; not done getting data for this run
		      (loop (car tal)(cdr tal) new-res newmaxtests)))))))
    (dboard:tabdat-filters-changed-set! tabdat #f)
    (dboard:update-tree tabdat runs-hash header tb)))







|
|
|
|
|
|
<
<
<







875
876
877
878
879
880
881
882
883
884
885
886
887



888
889
890
891
892
893
894
	      (if (null? all-test-ids)
		  (hash-table-delete! (dboard:tabdat-allruns-by-id tabdat) run-id)
		  (hash-table-set!    (dboard:tabdat-allruns-by-id tabdat) run-id run-struct))
	      (if (or (null? tal)
		      (> elapsed-time 2)) ;; stop loading data after 5 seconds, on the next call more data *should* be loaded since get-tests-for-run uses last update
		  (begin
		    (when (> elapsed-time 2)   
		      (debug:print 0 *default-log-port* "NOTE: updates are taking a long time, " elapsed-time "s elapsed.")
		      (let* ((old-val (iup:attribute *tim* "TIME"))
			     (new-val (number->string (inexact->exact (floor (* 2  (string->number old-val)))))))
			(if (< (string->number new-val) 5000)
			    (debug:print 0 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val)
			    (iup:attribute-set! *tim* "TIME" new-val))))



		    (dboard:tabdat-allruns-set! tabdat new-res)
		    maxtests)
		  (if (> (dboard:rundat-run-data-offset run-struct) 0)
		      (loop run tal new-res newmaxtests) ;; not done getting data for this run
		      (loop (car tal)(cdr tal) new-res newmaxtests)))))))
    (dboard:tabdat-filters-changed-set! tabdat #f)
    (dboard:update-tree tabdat runs-hash header tb)))

Modified dbmod.scm from [8c09a0af38] to [0dbe869d65].

343
344
345
346
347
348
349
350


351
352
353
354
355
356
357
(defstruct dbr:dbdat
  (db          #f)    ;; should rename this to oddb for on disk db
  (inmem       #f)
  (last-sync   0)
  (last-write  (current-seconds))
  (run-id      #f)
  (fname       #f))
  


;; Returns the dbdat for a particular dbfile inside the area
;;
(define (dbr:dbstruct-get-dbdat dbstruct dbfile)
  (hash-table-ref/default (dbr:dbstruct-dbdats dbstruct) dbfile #f))

(define (dbr:dbstruct-dbdat-put! dbstruct dbfile db)
  (hash-table-set! (dbr:dbstruct-dbdats dbstruct) dbfile db))







|
>
>







343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
(defstruct dbr:dbdat
  (db          #f)    ;; should rename this to oddb for on disk db
  (inmem       #f)
  (last-sync   0)
  (last-write  (current-seconds))
  (run-id      #f)
  (fname       #f))

(define *db-transaction-mutex* (make-mutex))

;; Returns the dbdat for a particular dbfile inside the area
;;
(define (dbr:dbstruct-get-dbdat dbstruct dbfile)
  (hash-table-ref/default (dbr:dbstruct-dbdats dbstruct) dbfile #f))

(define (dbr:dbstruct-dbdat-put! dbstruct dbfile db)
  (hash-table-set! (dbr:dbstruct-dbdats dbstruct) dbfile db))
524
525
526
527
528
529
530

531
532
533
534
535
536
537


538
539
540
541
542
543
544
	 (res (proc dbh dbfile)))
    ;; (sqlite3:finalize! dbh)
    res))

;; called before db is open?
;;
(define (db:get-iam-server-lock dbh dbfname host port)

  (sqlite3:with-transaction
   dbh
   (lambda ()
     (let* ((locker (db:get-locker dbh dbfname)))
       (if locker
	   locker
	   (db:take-lock dbh dbfname port))))))


	     
;; (exn sqlite3) 
(define (db:get-locker dbh dbfname)
  (condition-case
   (sqlite3:first-row dbh "SELECT owner_pid,owner_host,owner_port,event_time FROM locks WHERE lockname=?;" dbfname)
   (exn (sqlite3) #f)))








>
|
|
|
|
|
|
|
>
>







526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
	 (res (proc dbh dbfile)))
    ;; (sqlite3:finalize! dbh)
    res))

;; called before db is open?
;;
(define (db:get-iam-server-lock dbh dbfname host port)
  (mutex-lock! *db-transaction-mutex*)
  (let ((res (sqlite3:with-transaction
	      dbh
	      (lambda ()
		(let* ((locker (db:get-locker dbh dbfname)))
		  (if locker
		      locker
		      (db:take-lock dbh dbfname port)))))))
    (mutex-unlock! *db-transaction-mutex*)
    res))
	     
;; (exn sqlite3) 
(define (db:get-locker dbh dbfname)
  (condition-case
   (sqlite3:first-row dbh "SELECT owner_pid,owner_host,owner_port,event_time FROM locks WHERE lockname=?;" dbfname)
   (exn (sqlite3) #f)))

1007
1008
1009
1010
1011
1012
1013

1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034

1035
1036
1037
1038
1039
1040
1041
				   #f)) 
	   (stmth  (sqlite3:prepare db full-ins)))
      ;; (db:delay-if-busy targdb) ;; NO WAITING
      (if (member "last_update" field-names)
	  (debug:print-info 3 *default-log-port* "is-trigger-dropped: " is-trigger-dropped)) 
      (for-each
       (lambda (fromdat-lst)

	 (sqlite3:with-transaction
	  db
	  (lambda ()
	    (for-each ;; 
	     (lambda (fromrow)
	       (let* ((a    (vector-ref fromrow 0))
		      (curr (hash-table-ref/default todat a #f))
		      (same #t))
		 (let loop ((i 0))
		   (if (or (not curr)
			   (not (equal? (vector-ref fromrow i)(vector-ref curr i))))
		       (set! same #f))
		   (if (and same
			    (< i (- num-fields 1)))
		       (loop (+ i 1))))
		 (if (not same)
		     (begin
		       (debug:print 0 *default-log-port* "applying data "fromrow"to table "tablename", numrecs="numrecs)
		       (apply sqlite3:execute stmth (vector->list fromrow))
		       (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0)))))))
	     fromdat-lst))))

       fromdats)
      (sqlite3:finalize! stmth)
      (if (member "last_update" field-names)
	  (db:create-trigger db tablename)))))

;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
;; db's are sqlite3 handles







>




















|
>







1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
				   #f)) 
	   (stmth  (sqlite3:prepare db full-ins)))
      ;; (db:delay-if-busy targdb) ;; NO WAITING
      (if (member "last_update" field-names)
	  (debug:print-info 3 *default-log-port* "is-trigger-dropped: " is-trigger-dropped)) 
      (for-each
       (lambda (fromdat-lst)
	 (mutex-lock! *db-transaction-mutex*)
	 (sqlite3:with-transaction
	  db
	  (lambda ()
	    (for-each ;; 
	     (lambda (fromrow)
	       (let* ((a    (vector-ref fromrow 0))
		      (curr (hash-table-ref/default todat a #f))
		      (same #t))
		 (let loop ((i 0))
		   (if (or (not curr)
			   (not (equal? (vector-ref fromrow i)(vector-ref curr i))))
		       (set! same #f))
		   (if (and same
			    (< i (- num-fields 1)))
		       (loop (+ i 1))))
		 (if (not same)
		     (begin
		       (debug:print 0 *default-log-port* "applying data "fromrow"to table "tablename", numrecs="numrecs)
		       (apply sqlite3:execute stmth (vector->list fromrow))
		       (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0)))))))
	     fromdat-lst)))
	 (mutex-unlock! *db-transaction-mutex*))
       fromdats)
      (sqlite3:finalize! stmth)
      (if (member "last_update" field-names)
	  (db:create-trigger db tablename)))))

;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
;; db's are sqlite3 handles
1513
1514
1515
1516
1517
1518
1519

1520
1521
1522
1523
1524
1525
1526
  (assert *configinfo* "ERROR: db:initialize-db called before configfiles loaded. This is fatal.")
  (let* ((configdat (car *configinfo*))  ;; tut tut, global warning...
	 (keys     (keys:config-get-fields configdat))
	 (havekeys (> (length keys) 0))
	 (keystr   (keys->keystr keys))
	 (fieldstr (keys:make-key/field-string configdat))
	 #;(db       (dbr:dbdat-db dbdat)))

    (for-each (lambda (key)
		(let ((keyn key))
		  (if (member (string-downcase keyn)
			      (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count"
				    "pass_count" "contour"))
		      (begin
			(print "ERROR: your key cannot be named " keyn " as this conflicts with the same named field in the runs table, you must remove your megatest.db and <linktree>/.db before trying again.")







>







1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
  (assert *configinfo* "ERROR: db:initialize-db called before configfiles loaded. This is fatal.")
  (let* ((configdat (car *configinfo*))  ;; tut tut, global warning...
	 (keys     (keys:config-get-fields configdat))
	 (havekeys (> (length keys) 0))
	 (keystr   (keys->keystr keys))
	 (fieldstr (keys:make-key/field-string configdat))
	 #;(db       (dbr:dbdat-db dbdat)))
    (mutex-lock! *db-transaction-mutex*)
    (for-each (lambda (key)
		(let ((keyn key))
		  (if (member (string-downcase keyn)
			      (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count"
				    "pass_count" "contour"))
		      (begin
			(print "ERROR: your key cannot be named " keyn " as this conflicts with the same named field in the runs table, you must remove your megatest.db and <linktree>/.db before trying again.")
1639
1640
1641
1642
1643
1644
1645

1646
1647
1648
1649
1650
1651
1652
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS extradat (id INTEGER PRIMARY KEY, run_id INTEGER, key TEXT, val TEXT);")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS metadat (id INTEGER PRIMARY KEY, var TEXT, val TEXT,
                                  CONSTRAINT metadat_constraint UNIQUE (var));")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS access_log (id INTEGER PRIMARY KEY, user TEXT, accessed TIMESTAMP, args TEXT);")
	;; Must do this *after* running patch db !! No more. 
	;; cannot use db:set-var since it will deadlock, hardwire the code here
	(sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" "MEGATEST_VERSION" (common:version-signature))

	(debug:print-info 11 *default-log-port* "db:initialize END") ;; ))))

	;;======================================================================
	;; R U N   S P E C I F I C   D B 
	;;======================================================================
	
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS tests 







>







1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS extradat (id INTEGER PRIMARY KEY, run_id INTEGER, key TEXT, val TEXT);")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS metadat (id INTEGER PRIMARY KEY, var TEXT, val TEXT,
                                  CONSTRAINT metadat_constraint UNIQUE (var));")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS access_log (id INTEGER PRIMARY KEY, user TEXT, accessed TIMESTAMP, args TEXT);")
	;; Must do this *after* running patch db !! No more. 
	;; cannot use db:set-var since it will deadlock, hardwire the code here
	(sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" "MEGATEST_VERSION" (common:version-signature))
	(mutex-unlock! *db-transaction-mutex*)
	(debug:print-info 11 *default-log-port* "db:initialize END") ;; ))))

	;;======================================================================
	;; R U N   S P E C I F I C   D B 
	;;======================================================================
	
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS tests 
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
	"SELECT state,status,count(id) AS count FROM tests WHERE run_id=? AND NOT(uname='n/a' AND item_path='') GROUP BY state,status;"
	run-id))))

;; Update run_stats for given run_id
;; input data is a list (state status count)
;;
(define (db:update-run-stats dbstruct run-id stats)
  ;; (mutex-lock! *db-transaction-mutex*)
  (db:with-db
   dbstruct
   run-id
   #f

   (lambda (db)
     ;; remove previous data







|







2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
	"SELECT state,status,count(id) AS count FROM tests WHERE run_id=? AND NOT(uname='n/a' AND item_path='') GROUP BY state,status;"
	run-id))))

;; Update run_stats for given run_id
;; input data is a list (state status count)
;;
(define (db:update-run-stats dbstruct run-id stats)
  (mutex-lock! *db-transaction-mutex*)
  (db:with-db
   dbstruct
   run-id
   #f

   (lambda (db)
     ;; remove previous data
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
		(for-each
		 (lambda (dat)
		   (sqlite3:execute stmt1 run-id (car dat)(cadr dat))
		   (apply sqlite3:execute stmt2 run-id dat))
		 stats)))))
       (sqlite3:finalize! stmt1)
       (sqlite3:finalize! stmt2)
       ;; (mutex-unlock! *db-transaction-mutex*)
       res))))

(define (db:get-main-run-stats dbstruct run-id)
  (db:with-db
   dbstruct
   #f ;; this data comes from main
   #f







|







2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
		(for-each
		 (lambda (dat)
		   (sqlite3:execute stmt1 run-id (car dat)(cadr dat))
		   (apply sqlite3:execute stmt2 run-id dat))
		 stats)))))
       (sqlite3:finalize! stmt1)
       (sqlite3:finalize! stmt2)
       (mutex-unlock! *db-transaction-mutex*)
       res))))

(define (db:get-main-run-stats dbstruct run-id)
  (db:with-db
   dbstruct
   #f ;; this data comes from main
   #f
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
;; NOTE: Does NOT return a list of rows (or one row) for the first slot of the vector
;;       this is inconsistent with get-runs but it makes some sense.
;;
(define (db:get-run-info dbstruct run-id)
  ;;(if (hash-table-ref/default *run-info-cache* run-id #f)
  ;;    (hash-table-ref *run-info-cache* run-id)
  (let* ((res       (vector #f #f #f #f))
	 (keys      (db:get-keys dbstruct))
	 (remfields (list "id" "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")) ;;  "area_id"))
	 (header    (append keys remfields))
	 (keystr    (conc (keys->keystr keys) ","
			  (string-intersperse remfields ","))))
    (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr)
    







|







2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
;; NOTE: Does NOT return a list of rows (or one row) for the first slot of the vector
;;       this is inconsistent with get-runs but it makes some sense.
;;
(define (db:get-run-info dbstruct run-id)
  ;;(if (hash-table-ref/default *run-info-cache* run-id #f)
  ;;    (hash-table-ref *run-info-cache* run-id)
  (let* ((res       (make-vector 11 #f))
	 (keys      (db:get-keys dbstruct))
	 (remfields (list "id" "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")) ;;  "area_id"))
	 (header    (append keys remfields))
	 (keystr    (conc (keys->keystr keys) ","
			  (string-intersperse remfields ","))))
    (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr)
    

Modified runsmod.scm from [727372ff23] to [26ea23059a].

1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))
	(runs:dat-regfull-set! runsdat regfull)

	(if (> (- (current-seconds) *last-test-launch*) 5)        ;; be pretty aggressive for five seconds after
	    (runs:too-soon-delay (conc "loop delay " hed) 1 1)    ;; starting a test then apply more delay
	    (runs:too-soon-delay (conc "loop delay " hed) 1 0.1)) 
	
	(if (> num-running 0)
            (set! last-time-some-running (current-seconds)))

        (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
            (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))







|







1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))
	(runs:dat-regfull-set! runsdat regfull)

	(if (> (- (current-seconds) *last-test-launch*) 5)        ;; be pretty aggressive for five seconds after
	    (runs:too-soon-delay (conc "loop delay " hed) 1 0.6)    ;; starting a test then apply more delay
	    (runs:too-soon-delay (conc "loop delay " hed) 1 0.1)) 
	
	(if (> num-running 0)
            (set! last-time-some-running (current-seconds)))

        (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
            (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
	 ))) ;; end loop on sorted test names
    ;; this is the point where everything is launched and now you can mark the run in metadata table as all launched 
    (rmt:set-var run-id (conc "launch-complete-" run-id) "yes")  
        
    ;; now *if* -run-wait we wait for all tests to be done
    ;; Now wait for any RUNNING tests to complete (if in run-wait mode)
    ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
    (thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle
    
    (let wait-loop ((num-running      (rmt:get-count-tests-running-for-run-id run-id))
		    (prev-num-running 0))
      ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")
		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
	       (> num-running 0))







|







1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
	 ))) ;; end loop on sorted test names
    ;; this is the point where everything is launched and now you can mark the run in metadata table as all launched 
    (rmt:set-var run-id (conc "launch-complete-" run-id) "yes")  
        
    ;; now *if* -run-wait we wait for all tests to be done
    ;; Now wait for any RUNNING tests to complete (if in run-wait mode)
    ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
    (thread-sleep! 0.1) ;; I think there is a race condition here. Let states/statuses settle
    
    (let wait-loop ((num-running      (rmt:get-count-tests-running-for-run-id run-id))
		    (prev-num-running 0))
      ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")
		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
	       (> num-running 0))

Modified ulex-dual/dbmgr.scm from [53b181f4c9] to [9a6a086d09].

331
332
333
334
335
336
337

338

339
340
341
342
343
344
345




346
347
348
349
350
351
352

;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
;; sometime in the future
;;
(define (rmt:send-receive-real sinfo apath dbname cmd params)
  (let* ((cdat (rmt:get-conn sinfo apath dbname)))
    (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened")

    (let* ((uconn    (servdat-uconn sinfo)) ;; get the interface to ulex

           ;; then send-receive using the ulex layer to host-port stored in cdat
           (res      (send-receive uconn (conndat-hostport cdat) cmd params)))
      ;; since we accessed the server we can bump the expires time up
      (conndat-expires-set! cdat (+ (current-seconds)
                                    (server:expiration-timeout)
                                    -2)) ;; two second margin for network time misalignments etc.
      res)))





;
;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
;; sometime in the future.
;;
;; Purpose - call the main.db server and request a server be started
;; for the given area path and dbname







>
|
>
|
|
|
|
|
|
|
>
>
>
>







331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358

;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
;; sometime in the future
;;
(define (rmt:send-receive-real sinfo apath dbname cmd params)
  (let* ((cdat (rmt:get-conn sinfo apath dbname)))
    (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened")
    (condition-case
     (let* ((uconn    (servdat-uconn sinfo)) ;; get the interface to ulex
	    (hostport (conndat-hostport cdat))
            ;; then send-receive using the ulex layer to host-port stored in cdat
            (res      (send-receive uconn hostport cmd params)))
       ;; since we accessed the server we can bump the expires time up
       (conndat-expires-set! cdat (+ (current-seconds)
                                     (server:expiration-timeout)
                                     -2)) ;; two second margin for network time misalignments etc.
       res)
     ((exn i/o net)
      (debug:print-info 0 *default-log-port* "IO failure in connection to "hostport
			", resetting connection.")
      

;
;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
;; sometime in the future.
;;
;; Purpose - call the main.db server and request a server be started
;; for the given area path and dbname

Modified ulex-dual/ulex.scm from [ba1e2ab076] to [12cecae7cb].

258
259
260
261
262
263
264













265
266
267
268
269
270
271
272
	 (dat          (list `(host-port . ,my-host-port)
			     `(qrykey . qrykey)
			     `(cmd . ,cmd)
			     `(params . ,params))))
    (cond
     (isme (do-work udata dat)) ;; no transmission needed
     (else













      (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC?
       exn
       (begin
         (print "ULEX send-receive: "cmd", "params", exn="exn)
         (message exn))
       (begin
	 ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
	 (let-values (((inp oup)(tcp-connect host port)))







>
>
>
>
>
>
>
>
>
>
>
>
>
|







258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
	 (dat          (list `(host-port . ,my-host-port)
			     `(qrykey . qrykey)
			     `(cmd . ,cmd)
			     `(params . ,params))))
    (cond
     (isme (do-work udata dat)) ;; no transmission needed
     (else
      (let-values (((inp oup)(tcp-connect host port)))
	(let ((res (if (and inp oup)
		       (begin
			 (write (obj->string dat) oup)
			 (close-output-port oup)
			 (string->obj (read inp)))
		       (begin
			 (print "ERROR: send called but no receiver has been setup. Please call setup first!")
			 #f))))
	  (close-input-port inp)))
	
	
	     
      #;(handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC?
       exn
       (begin
         (print "ULEX send-receive: "cmd", "params", exn="exn)
         (message exn))
       (begin
	 ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
	 (let-values (((inp oup)(tcp-connect host port)))

Modified ulex-simple/dbmgr.scm from [3233b20f1f] to [59496cc87c].

330
331
332
333
334
335
336






337
338
339

340






341
342
343
344
345
346
347
348
349
350
351
352
    (rmt:send-receive-real sinfo apath dbname cmd params)))

;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
;; sometime in the future
;;
(define (rmt:send-receive-real sinfo apath dbname cmd params)
  (let* ((cdat (rmt:get-conn sinfo apath dbname)))






    (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened")
    (let* ((uconn    (servdat-uconn sinfo)) ;; get the interface to ulex
           ;; then send-receive using the ulex layer to host-port stored in cdat

           (res      (send-receive uconn (conndat-hostport cdat) cmd params)))






      ;; since we accessed the server we can bump the expires time up
      (conndat-expires-set! cdat (+ (current-seconds)
                                    (server:expiration-timeout)
                                    -2)) ;; two second margin for network time misalignments etc.
      res)))

;
;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
;; sometime in the future.
;;
;; Purpose - call the main.db server and request a server be started
;; for the given area path and dbname







>
>
>
>
>
>
|
|
|
>
|
>
>
>
>
>
>
|
|
|
|
|







330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
    (rmt:send-receive-real sinfo apath dbname cmd params)))

;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
;; sometime in the future
;;
(define (rmt:send-receive-real sinfo apath dbname cmd params)
  (let* ((cdat (rmt:get-conn sinfo apath dbname)))
    (if (> (current-seconds)(conndat-expires cdat))
	(begin
	  (debug:print-info 0 *default-log-port* "Connection to "apath"/"dbname" expired, reconnecting.")
	  (rmt:drop-conn sinfo apath dbname)
	  (rmt:send-receive-real sinfo apath dbname cmd params))
	(begin
	  (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened")
	  (let* ((uconn    (servdat-uconn sinfo)) ;; get the interface to ulex
		 ;; then send-receive using the ulex layer to host-port stored in cdat
		 (res      (condition-case
			    (send-receive uconn (conndat-hostport cdat) cmd params)
			    ((exn i/o net timeout)
			     ;; here we need to close and reconnect
			     (rmt:drop-conn sinfo apath dbname)
			     (rmt:general-open-connection sinfo apath dbname)
			     (rmt:send-receive-real sinfo apath dbname cmd params)
			     ))))
	    ;; since we accessed the server we can bump the expires time up
	    (conndat-expires-set! cdat (+ (current-seconds)
					  (server:expiration-timeout)
					  -2)) ;; two second margin for network time misalignments etc.
	    res)))))

;
;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
;; sometime in the future.
;;
;; Purpose - call the main.db server and request a server be started
;; for the given area path and dbname