Megatest

Check-in [1bf5c0bd20]
Login
Overview
Comment:Merged in v1.64 changes
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.65
Files: files | file ages | folders
SHA1: 1bf5c0bd2017622e9d5cad94683a7f00b3a7985d
User & Date: mrwellan on 2017-07-17 14:16:15
Other Links: branch diff | manifest | tags
Context
2017-07-31
08:19
Merged v1.64 into v1.65 check-in: 69ed787cf9 user: matt tags: v1.65
2017-07-17
14:16
Merged in v1.64 changes check-in: 1bf5c0bd20 user: mrwellan tags: v1.65
13:00
Re-enabled install of nanomsg check-in: cd95146b0a user: mrwellan tags: v1.64
2017-07-13
17:08
enabled config override usinf -config switch for tlisten check-in: d823b9ddae user: pjhatwal tags: v1.65
Changes

Modified api.scm from [1f6842e15f] to [c4438e36a1].

212
213
214
215
216
217
218

219
220
221
222
223
224
225
                   ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params))
                   ((tasks-get-last)            (apply tasks:get-last dbstruct params))

		   ;; NO SYNC DB
		   ((no-sync-set)               (apply db:no-sync-set         *no-sync-db* params))
		   ((no-sync-get/default)       (apply db:no-sync-get/default *no-sync-db* params))
		   ((no-sync-del!)              (apply db:no-sync-del!        *no-sync-db* params))

		 
                   ;; ARCHIVES
                   ;; ((archive-get-allocations)   
                   ((archive-register-disk)     (apply db:archive-register-disk dbstruct params))
                   ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
                   ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey))








>







212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
                   ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params))
                   ((tasks-get-last)            (apply tasks:get-last dbstruct params))

		   ;; NO SYNC DB
		   ((no-sync-set)               (apply db:no-sync-set         *no-sync-db* params))
		   ((no-sync-get/default)       (apply db:no-sync-get/default *no-sync-db* params))
		   ((no-sync-del!)              (apply db:no-sync-del!        *no-sync-db* params))
		   ((no-sync-get-lock)          (apply db:no-sync-get-lock    *no-sync-db* params))
		 
                   ;; ARCHIVES
                   ;; ((archive-get-allocations)   
                   ((archive-register-disk)     (apply db:archive-register-disk dbstruct params))
                   ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
                   ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey))

Modified common.scm from [3f62499b26] to [c3e712597c].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
;;======================================================================
;; Copyright 2006-2012, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.
;;======================================================================

(use srfi-1 posix regex-case base64 format dot-locking csv-xml z3 sql-de-lite hostinfo md5 message-digest typed-records directory-utils stack
     matchable regex posix srfi-18 extras
     pkts (prefix dbi dbi:))

(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:))

(declare (unit common))











|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
;;======================================================================
;; Copyright 2006-2012, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.
;;======================================================================

(use srfi-1 data-structures posix regex-case base64 format dot-locking csv-xml z3 sql-de-lite hostinfo md5 message-digest typed-records directory-utils stack
     matchable regex posix srfi-18 extras
     pkts (prefix dbi dbi:))

(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:))

(declare (unit common))
40
41
42
43
44
45
46








47
48
49
50
51
52
53
	      exn
	      (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val)
	    (setenv key val))
	  (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val))))

(define home (getenv "HOME"))
(define user (getenv "USER"))









;; GLOBALS

;; CONTEXTS
(defstruct cxt
  (taskdb #f)
  (cmutex (make-mutex)))







>
>
>
>
>
>
>
>







40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
	      exn
	      (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val)
	    (setenv key val))
	  (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val))))

(define home (getenv "HOME"))
(define user (getenv "USER"))

(define (get-file-descriptor-count #!key  (pid (current-process-id )))
  (list
    (length (glob (conc "/proc/" pid "/fd/*")))
    (length  (filter identity (map socket? (glob (conc "/proc/" pid "/fd/*")))))
  )
)


;; GLOBALS

;; CONTEXTS
(defstruct cxt
  (taskdb #f)
  (cmutex (make-mutex)))
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

;; SERVER
(define *my-client-signature* #f)
(define *transport-type*    'http)             ;; override with [server] transport http|rpc|nmsg
(define *runremote*         #f)                ;; if set up for server communication this will hold <host port>
;; (define *max-cache-size*    0)
(define *logged-in-clients* (make-hash-table))
;; (define *server-id*         #f)
(define *server-info*       #f)  ;; good candidate for easily convert to non-global
(define *time-to-exit*      #f)
(define *server-run*        #t)
(define *run-id*            #f)
(define *server-kind-run*   (make-hash-table))
(define *home-host*         #f)
;; (define *total-non-write-delay* 0)







|







123
124
125
126
127
128
129
130
131
132
133
134
135
136
137

;; SERVER
(define *my-client-signature* #f)
(define *transport-type*    'http)             ;; override with [server] transport http|rpc|nmsg
(define *runremote*         #f)                ;; if set up for server communication this will hold <host port>
;; (define *max-cache-size*    0)
(define *logged-in-clients* (make-hash-table))
(define *server-id*         #f)
(define *server-info*       #f)  ;; good candidate for easily convert to non-global
(define *time-to-exit*      #f)
(define *server-run*        #t)
(define *run-id*            #f)
(define *server-kind-run*   (make-hash-table))
(define *home-host*         #f)
;; (define *total-non-write-delay* 0)
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170

(defstruct remote
  (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    (server:get-timeout)) ;; default from server:get-timeout
  (force-server      #f)
  (ro-mode           #f)  
  (ro-mode-checked   #f)) ;; flag that indicates we have checked for ro-mode

;; launching and hosts
(defstruct host
  (reachable    #f)







|







164
165
166
167
168
169
170
171
172
173
174
175
176
177
178

(defstruct remote
  (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    (server:expiration-timeout))
  (force-server      #f)
  (ro-mode           #f)  
  (ro-mode-checked   #f)) ;; flag that indicates we have checked for ro-mode

;; launching and hosts
(defstruct host
  (reachable    #f)
498
499
500
501
502
503
504

505
506
507
508
509
510
511
512
513
    (1 "n/a")
    (2 "PASS")
    (3 "SKIP")
    (4 "WARN")
    (5 "WAIVED")
    (6 "CHECK")
    (7 "STUCK/DEAD")

    (8 "FAIL")
    (9 "ABORT")))

(define *common:ended-states*       ;; states which indicate the test is stopped and will not proceed
  '("COMPLETED" "ARCHIVED" "KILLED" "KILLREQ" "STUCK" "INCOMPLETE"))

(define *common:badly-ended-states* ;; these roll up as CHECK, i.e. results need to be checked
  '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD"))








>
|
|







506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
    (1 "n/a")
    (2 "PASS")
    (3 "SKIP")
    (4 "WARN")
    (5 "WAIVED")
    (6 "CHECK")
    (7 "STUCK/DEAD")
    (8 "DEAD")
    (9 "FAIL")
    (10 "ABORT")))

(define *common:ended-states*       ;; states which indicate the test is stopped and will not proceed
  '("COMPLETED" "ARCHIVED" "KILLED" "KILLREQ" "STUCK" "INCOMPLETE"))

(define *common:badly-ended-states* ;; these roll up as CHECK, i.e. results need to be checked
  '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD"))

2077
2078
2079
2080
2081
2082
2083






2084
2085
2086
2087
2088
2089
2090
2091
  (string-intersperse 
   (map (lambda (x)
          (number->string x 16))
        (map string->number
             (string-split instr)))
   "/"))







(define (common:faux-lock keyname #!key (wait-time 8))
  (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count
      (if (> wait-time 0)
	  (begin
	    (thread-sleep! 1)
	    (if (eq? wait-time 1) ;; only one second left, steal the lock
		(begin
		  (debug:print-info 0 *default-log-port* "stealing lock for " keyname)







>
>
>
>
>
>
|







2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
  (string-intersperse 
   (map (lambda (x)
          (number->string x 16))
        (map string->number
             (string-split instr)))
   "/"))

;;======================================================================
;; L O C K I N G   M E C H A N I S M S 
;;======================================================================

;; faux-lock is deprecated. Please use simple-lock below
;;
(define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t))
  (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count
      (if (> wait-time 0)
	  (begin
	    (thread-sleep! 1)
	    (if (eq? wait-time 1) ;; only one second left, steal the lock
		(begin
		  (debug:print-info 0 *default-log-port* "stealing lock for " keyname)
2099
2100
2101
2102
2103
2104
2105




2106




2107
2108
2109
2110
2111
2112
2113
(define (common:faux-unlock keyname #!key (force #f))
  (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f))))
      (begin
        (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname))
        #t)
      #f))





  




(define (common:in-running-test?)
  (and (args:get-arg "-execute") (get-environment-variable "MT_CMDINFO")))

(define (common:get-color-from-status status)
  (cond
   ((equal? status "PASS")    "green")
   ((equal? status "FAIL")    "red")







>
>
>
>
|
>
>
>
>







2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
(define (common:faux-unlock keyname #!key (force #f))
  (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f))))
      (begin
        (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname))
        #t)
      #f))

;; simple lock. improve and converge on this one.
;;
(define (common:simple-lock keyname)
  (rmt:no-sync-get-lock keyname))

;;======================================================================
;;
;;======================================================================

(define (common:in-running-test?)
  (and (args:get-arg "-execute") (get-environment-variable "MT_CMDINFO")))

(define (common:get-color-from-status status)
  (cond
   ((equal? status "PASS")    "green")
   ((equal? status "FAIL")    "red")

Modified db.scm from [43a8d2d12e] to [5cc0abc545].

1046
1047
1048
1049
1050
1051
1052



1053
1054




1055


1056
1057
1058
1059

1060
1061


1062
1063
1064
1065
1066
1067
1068
1069
	 (tmpdb       (db:get-db dbstruct))
	 (refndb      (dbr:dbstruct-refndb dbstruct)))
    (db:sync-tables (db:sync-all-tables-list dbstruct) last-update tmpdb refndb mtdb)))

;;;; run-ids
;;    if #f use *db-local-sync* : or 'local-sync-flags
;;    if #t use timestamps      : or 'timestamps



(define (db:sync-to-megatest.db dbstruct #!key (no-sync-db #f)) 
  (let* ((start-time         (current-seconds))




	 (last-update        (if no-sync-db


				 (db:no-sync-get/default no-sync-db "LAST_UPDATE" 0)
				 0)) ;; (or (db:get-var dbstruct "LAST_UPDATE") 0))
	 (sync-needed        (> (- start-time last-update) 6))
	 (res                (if sync-needed ;; don't sync if a sync already occurred in the past 6 seconds

				 (begin
				   (if no-sync-db


				       (db:no-sync-set no-sync-db "LAST_UPDATE" start-time))
				   (db:tmp->megatest.db-sync dbstruct last-update))
				 0))
	 (sync-time           (- (current-seconds) start-time)))
      (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id))
      (if (common:low-noise-print 30 "sync new to old")
          (if sync-needed
              (debug:print-info 0 *default-log-port* "Sync of " res " records from newdb to olddb completed in " sync-time " seconds pid="(current-process-id))







>
>
>


>
>
>
>
|
>
>
|
|

|
>


>
>
|







1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
	 (tmpdb       (db:get-db dbstruct))
	 (refndb      (dbr:dbstruct-refndb dbstruct)))
    (db:sync-tables (db:sync-all-tables-list dbstruct) last-update tmpdb refndb mtdb)))

;;;; run-ids
;;    if #f use *db-local-sync* : or 'local-sync-flags
;;    if #t use timestamps      : or 'timestamps
;;
;;  NB// no-sync-db is the db handle, not a flag!
;;
(define (db:sync-to-megatest.db dbstruct #!key (no-sync-db #f)) 
  (let* ((start-time         (current-seconds))
	 (last-full-update   (if no-sync-db
				 (db:no-sync-get/default no-sync-db "LAST_FULL_UPDATE" 0)
				 0))
	 (full-sync-needed   (> (- start-time last-full-update) 3600)) ;; every hour do a full sync
	 (last-update        (if full-sync-needed
				 0
				 (if no-sync-db
				     (db:no-sync-get/default no-sync-db "LAST_UPDATE" 0)
				     0))) ;; (or (db:get-var dbstruct "LAST_UPDATE") 0))
	 (sync-needed        (> (- start-time last-update) 6))
	 (res                (if (or sync-needed ;; don't sync if a sync already occurred in the past 6 seconds
				     full-sync-needed)
				 (begin
				   (if no-sync-db
				       (begin
					 (if full-sync-needed (db:no-sync-set no-sync-db "LAST_FULL_UPDATE" start-time))
					 (db:no-sync-set no-sync-db "LAST_UPDATE" start-time)))
				   (db:tmp->megatest.db-sync dbstruct last-update))
				 0))
	 (sync-time           (- (current-seconds) start-time)))
      (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id))
      (if (common:low-noise-print 30 "sync new to old")
          (if sync-needed
              (debug:print-info 0 *default-log-port* "Sync of " res " records from newdb to olddb completed in " sync-time " seconds pid="(current-process-id))
1847
1848
1849
1850
1851
1852
1853

1854
1855


1856
1857

1858
1859
1860
1861
1862
1863
1864

1865
1866
1867
1868
1869


1870
1871
1872
1873
1874
1875
1876
;;======================================================================
;; no-sync.db - small bits of data to be shared between servers
;;======================================================================

(define (db:open-no-sync-db)
  (let* ((dbpath (db:dbfile-path))
	 (dbname (conc dbpath "/no-sync.db"))

	 (db     (sqlite3:open-database dbname)))
    (sqlite3:set-busy-handler! db (make-busy-timeout 136000))


    (sqlite3:execute db "PRAGMA synchronous = 0;")
    (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));")

    db))

;; if we are not a server create a db handle. this is not finalized
;; so watch for problems. I'm still not clear if it is needed to manually
;; finalize sqlite3 dbs with the sqlite3 egg.
;;
(define (db:no-sync-db db-in)

  (if db-in
      db-in
      (let ((db (db:open-no-sync-db)))
	(set! *no-sync-db* db)
	db)))



(define (db:no-sync-set db var val)
  (sqlite3:execute (db:no-sync-db db) "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val))

(define (db:no-sync-del! db var)
  (sqlite3:execute (db:no-sync-db db) "DELETE FROM no_sync_metadat WHERE var=?;" var))








>


>
>
|
|
>







>
|
|
|
|
|
>
>







1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
;;======================================================================
;; no-sync.db - small bits of data to be shared between servers
;;======================================================================

(define (db:open-no-sync-db)
  (let* ((dbpath (db:dbfile-path))
	 (dbname (conc dbpath "/no-sync.db"))
	 (db-exists (common:file-exists? dbname))
	 (db     (sqlite3:open-database dbname)))
    (sqlite3:set-busy-handler! db (make-busy-timeout 136000))
    (if (not db-exists)
	(begin
	  (sqlite3:execute db "PRAGMA synchronous = 0;")
	  (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));")
	  (sqlite3:execute db "PRAGMA journal_mode=WAL;")))
    db))

;; if we are not a server create a db handle. this is not finalized
;; so watch for problems. I'm still not clear if it is needed to manually
;; finalize sqlite3 dbs with the sqlite3 egg.
;;
(define (db:no-sync-db db-in)
  (mutex-lock! *db-access-mutex*)
  (let ((res (if db-in
                 db-in
                 (let ((db (db:open-no-sync-db)))
                   (set! *no-sync-db* db)
                   db))))
    (mutex-unlock! *db-access-mutex*)
    res))

(define (db:no-sync-set db var val)
  (sqlite3:execute (db:no-sync-db db) "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val))

(define (db:no-sync-del! db var)
  (sqlite3:execute (db:no-sync-db db) "DELETE FROM no_sync_metadat WHERE var=?;" var))

1889
1890
1891
1892
1893
1894
1895




















1896
1897
1898
1899
1900
1901
1902
          (if newres
              newres
              res))
        res)))

(define (db:no-sync-close-db db)
  (db:safely-close-sqlite3-db db))





















;; use a global for some primitive caching, it is just silly to
;; re-read the db over and over again for the keys since they never
;; change

;; why get the keys from the db? why not get from the *configdat*
;; using keys:config-get-fields?







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
          (if newres
              newres
              res))
        res)))

(define (db:no-sync-close-db db)
  (db:safely-close-sqlite3-db db))

;; transaction protected lock aquisition
;; either:
;;    fails    returns  (#f . lock-creation-time)
;;    succeeds (returns (#t . lock-creation-time)
;; use (db:no-sync-del! db keyname) to release the lock
;;
(define (db:no-sync-get-lock db-in keyname)
  (let ((db (db:no-sync-db db-in)))
    (sqlite3:with-transaction
     db
     (lambda ()
       (handle-exceptions
	   exn
	   (let ((lock-time (current-seconds)))
	     (sqlite3:execute db "INSERT INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time)
	     `(#t . ,lock-time))
	 `(#f . ,(sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname)))))))



;; use a global for some primitive caching, it is just silly to
;; re-read the db over and over again for the keys since they never
;; change

;; why get the keys from the db? why not get from the *configdat*
;; using keys:config-get-fields?

Modified http-transport.scm from [3d5c1d714d] to [bddce99bac].

54
55
56
57
58
59
60
61


62

63
64
65
66
67
68
69
	 (hostname        (get-host-name))
	 (ipaddrstr       (let ((ipstr (if (string=? "-" hostn)
					   ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
					   (server:get-best-guess-address hostname)
					   #f)))
			    (if ipstr ipstr hostn))) ;; hostname))) 
	 (start-port      (portlogger:open-run-close portlogger:find-port))
	 (link-tree-path  (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree")))


    (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port)

    (root-path     (if link-tree-path 
		       link-tree-path
		       (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP!
    (handle-directory spiffy-directory-listing)
    (handle-exception (lambda (exn chain)
			(signal (make-composite-condition
				 (make-property-condition 







|
>
>

>







54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
	 (hostname        (get-host-name))
	 (ipaddrstr       (let ((ipstr (if (string=? "-" hostn)
					   ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
					   (server:get-best-guess-address hostname)
					   #f)))
			    (if ipstr ipstr hostn))) ;; hostname))) 
	 (start-port      (portlogger:open-run-close portlogger:find-port))
	 (link-tree-path  (common:get-linktree))
	 (tmp-area        (common:get-db-tmp-area))
	 (start-file      (conc tmp-area "/.server-start")))
    (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port)
    ;; set some parameters for the server
    (root-path     (if link-tree-path 
		       link-tree-path
		       (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP!
    (handle-directory spiffy-directory-listing)
    (handle-exception (lambda (exn chain)
			(signal (make-composite-condition
				 (make-property-condition 
101
102
103
104
105
106
107

108
109
110
111
112
113
114
				   (send-response body: "hey there!\n"
						  headers: '((content-type text/plain))))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ "hey"))
				   (send-response body: "hey there!\n"
						  headers: '((content-type text/plain))))
				  (else (continue))))))))

    (http-transport:try-start-server ipaddrstr start-port)))

;; This is recursively run by http-transport:run until sucessful
;;
(define (http-transport:try-start-server ipaddrstr portnum)
  (let ((config-hostname (configf:lookup *configdat* "server" "hostname"))
	(config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes")))







>







104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
				   (send-response body: "hey there!\n"
						  headers: '((content-type text/plain))))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ "hey"))
				   (send-response body: "hey there!\n"
						  headers: '((content-type text/plain))))
				  (else (continue))))))))
    (with-output-to-file start-file (lambda ()(print (current-process-id))))
    (http-transport:try-start-server ipaddrstr start-port)))

;; This is recursively run by http-transport:run until sucessful
;;
(define (http-transport:try-start-server ipaddrstr portnum)
  (let ((config-hostname (configf:lookup *configdat* "server" "hostname"))
	(config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes")))
236
237
238
239
240
241
242

243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
					      exn
					      (let ((call-chain (get-call-chain))
						    (msg        ((condition-property-accessor 'exn 'message) exn)))
						(set! success #f)
						(debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
						(debug:print 0 *default-log-port* " message: " msg)
						(debug:print 0 *default-log-port* " cmd: " cmd " params: " params)

						(if runremote
						    (remote-conndat-set! runremote #f))
						;; Killing associated server to allow clean retry.")
						;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
						(mutex-unlock! *http-mutex*)
					     ;;; (signal (make-composite-condition
					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
					     ;;; "communications failed"
						(db:obj->string #f))
					    (with-input-from-request ;; was dat
					     fullurl 
					     (list (cons 'key "thekey")
						   (cons 'cmd cmd)
						   (cons 'params sparams))
					     read-string))
					  transport: 'http)
                                         0)) ;; added this speculatively
			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
			      (close-all-connections!)







>











|







240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
					      exn
					      (let ((call-chain (get-call-chain))
						    (msg        ((condition-property-accessor 'exn 'message) exn)))
						(set! success #f)
						(debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
						(debug:print 0 *default-log-port* " message: " msg)
						(debug:print 0 *default-log-port* " cmd: " cmd " params: " params)
                                                (debug:print 0 *default-log-port* " call-chain: " call-chain)
						(if runremote
						    (remote-conndat-set! runremote #f))
						;; Killing associated server to allow clean retry.")
						;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
						(mutex-unlock! *http-mutex*)
					     ;;; (signal (make-composite-condition
					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
					     ;;; "communications failed"
						(db:obj->string #f))
					    (with-input-from-request ;; was dat
					     fullurl 
					     (list (cons 'key (or *server-id* "thekey"))
						   (cons 'cmd cmd)
						   (cons 'params sparams))
					     read-string))
					  transport: 'http)
                                         0)) ;; added this speculatively
			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
			      (close-all-connections!)
298
299
300
301
302
303
304

305
306
307
308
309
310
311
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (handle-exceptions
	    exn
	    (begin
	      (print-call-chain *default-log-port*)
	      (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn)))
	    (close-connection! api-dat)

	    #t))
	#f)))


(define (make-http-transport:server-dat)(make-vector 6))
(define (http-transport:server-dat-get-iface         vec)    (vector-ref  vec 0))
(define (http-transport:server-dat-get-port          vec)    (vector-ref  vec 1))







>







303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (handle-exceptions
	    exn
	    (begin
	      (print-call-chain *default-log-port*)
	      (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn)))
	    (close-connection! api-dat)
            (close-idle-connections!)
	    #t))
	#f)))


(define (make-http-transport:server-dat)(make-vector 6))
(define (http-transport:server-dat-get-iface         vec)    (vector-ref  vec 0))
(define (http-transport:server-dat-get-port          vec)    (vector-ref  vec 1))
345
346
347
348
349
350
351


352
353
354
355
356
357
358
359
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running) 
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")


  (let* ((server-start-time (current-seconds))
	 (server-info (let loop ((start-time (current-seconds))
				 (changed    #t)
				 (last-sdat  "not this"))
                        (let ((sdat #f))
			  (thread-sleep! 0.01)
			  (debug:print-info 0 *default-log-port* "Waiting for server alive signature")
                          (mutex-lock! *heartbeat-mutex*)







>
>
|







351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running) 
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
  (let* ((tmp-area          (common:get-db-tmp-area))
	 (started-file      (conc tmp-area "/.server-started"))
	 (server-start-time (current-seconds))
	 (server-info (let loop ((start-time (current-seconds))
				 (changed    #t)
				 (last-sdat  "not this"))
                        (let ((sdat #f))
			  (thread-sleep! 0.01)
			  (debug:print-info 0 *default-log-port* "Waiting for server alive signature")
                          (mutex-lock! *heartbeat-mutex*)
374
375
376
377
378
379
380
381
382
383



384
385
386
387
388
389
390
				      (exit))
				    (loop start-time
					  (equal? sdat last-sdat)
					  sdat)))))))
         (iface       (car server-info))
         (port        (cadr server-info))
         (last-access 0)
	 (server-timeout (server:get-timeout))
	 (server-going  #f)
	 (server-log-file (args:get-arg "-log"))) ;; always set when we are a server



    (let loop ((count         0)
	       (server-state 'available)
	       (bad-sync-count 0)
	       (start-time     (current-milliseconds)))
      ;; Use this opportunity to sync the tmp db to megatest.db
      (if (not server-going) ;; *dbstruct-db* 
	  (begin







|


>
>
>







382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
				      (exit))
				    (loop start-time
					  (equal? sdat last-sdat)
					  sdat)))))))
         (iface       (car server-info))
         (port        (cadr server-info))
         (last-access 0)
	 (server-timeout (server:expiration-timeout))
	 (server-going  #f)
	 (server-log-file (args:get-arg "-log"))) ;; always set when we are a server

    (with-output-to-file started-file (lambda ()(print (current-process-id))))

    (let loop ((count         0)
	       (server-state 'available)
	       (bad-sync-count 0)
	       (start-time     (current-milliseconds)))
      ;; Use this opportunity to sync the tmp db to megatest.db
      (if (not server-going) ;; *dbstruct-db* 
	  (begin
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
	  (begin
	    (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds))
	    (flush-output *default-log-port*)))
      (if (common:low-noise-print 60 "dbstats")
	  (begin
	    (debug:print 0 *default-log-port* "Server stats:")
	    (db:print-current-query-stats)))
      (let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600))
	     (adjusted-timeout (if (> hrs-since-start 1)  ;; never used!
				   (- server-timeout (inexact->exact (round (* hrs-since-start 60))))  ;; subtract 60 seconds per hour
				   server-timeout)))
	(if (common:low-noise-print 120 "server timeout")
	    (debug:print-info 0 *default-log-port* "Adjusted server timeout: " adjusted-timeout))
	(cond
         ((and *server-run*
	       (> (+ last-access server-timeout)
		  (current-seconds))
	       (< (- (current-seconds) server-start-time) 3600)) ;; do not update log or touch log if we've been running for more than one hour.
          (if (common:low-noise-print 120 "server continuing")
              (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
	      (let ((curr-time (current-seconds)))
		(handle-exceptions
		    exn
		    (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk?")
		  (if (not *server-overloaded*)







|
<
<
<
<
<



|
<







439
440
441
442
443
444
445
446





447
448
449
450

451
452
453
454
455
456
457
	  (begin
	    (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds))
	    (flush-output *default-log-port*)))
      (if (common:low-noise-print 60 "dbstats")
	  (begin
	    (debug:print 0 *default-log-port* "Server stats:")
	    (db:print-current-query-stats)))
      (let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))





	(cond
         ((and *server-run*
	       (> (+ last-access server-timeout)
		  (current-seconds)))

          (if (common:low-noise-print 120 "server continuing")
              (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
	      (let ((curr-time (current-seconds)))
		(handle-exceptions
		    exn
		    (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk?")
		  (if (not *server-overloaded*)
489
490
491
492
493
494
495













496
497
498
499
500
501
502
    (exit)))

;; all routes though here end in exit ...
;;
;; start_server? 
;;
(define (http-transport:launch)













  ;; lets not even bother to start if there are already three or more server files ready to go
  (let* ((num-alive   (server:get-num-alive (server:get-list *toppath*))))
    (if (> num-alive 3)
	(begin
	  (debug:print 0 *default-log-port* "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")
	  (exit))))
  (let* ((th2 (make-thread (lambda ()







>
>
>
>
>
>
>
>
>
>
>
>
>







494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
    (exit)))

;; all routes though here end in exit ...
;;
;; start_server? 
;;
(define (http-transport:launch)
  ;; check that a server start is in progress, pause or exit if so
  (let* ((tmp-area            (common:get-db-tmp-area))
	 (server-start        (conc tmp-area "/.server-start"))
	 (server-started      (conc tmp-area "/.server-started"))
	 (start-time          (common:lazy-modification-time server-start))
	 (started-time        (common:lazy-modification-time server-started))
	 (server-starting     (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting
	 (start-time-old      (> (- (current-seconds) start-time) 5)))
    (if (and (not start-time-old) ;; last server start try was less than five seconds ago
	     (not server-starting))
	(begin
	  (debug:print-info 0 *default-log-port* "NOT starting server, there is either a recently started server or a server in process of starting")
	  (exit))))
  ;; lets not even bother to start if there are already three or more server files ready to go
  (let* ((num-alive   (server:get-num-alive (server:get-list *toppath*))))
    (if (> num-alive 3)
	(begin
	  (debug:print 0 *default-log-port* "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")
	  (exit))))
  (let* ((th2 (make-thread (lambda ()
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
    (thread-start! th2)
    (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor.
    (thread-start! th3)
    (set! *didsomething* #t)
    (thread-join! th2)
    (exit)))

(define (http-transport:server-signal-handler signum)
  (signal-mask! signum)
  (handle-exceptions
   exn
   (debug:print 0 *default-log-port* " ... exiting ...")
   (let ((th1 (make-thread (lambda ()
			     (thread-sleep! 1))
			   "eat response"))
	 (th2 (make-thread (lambda ()
			     (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.")
			     (thread-sleep! 3) ;; give the flush three seconds to do it's stuff
			     (debug:print 0 *default-log-port* "       Done.")
			     (exit 4))
			   "exit on ^C timer")))
     (thread-start! th2)
     (thread-start! th1)
     (thread-join! th2))))

;;======================================================================
;; web pages
;;======================================================================

(define (http-transport:main-page)
  (let ((linkpath (root-path)))







|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
    (thread-start! th2)
    (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor.
    (thread-start! th3)
    (set! *didsomething* #t)
    (thread-join! th2)
    (exit)))

;; (define (http-transport:server-signal-handler signum)
;;   (signal-mask! signum)
;;   (handle-exceptions
;;    exn
;;    (debug:print 0 *default-log-port* " ... exiting ...")
;;    (let ((th1 (make-thread (lambda ()
;; 			     (thread-sleep! 1))
;; 			   "eat response"))
;; 	 (th2 (make-thread (lambda ()
;; 			     (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.")
;; 			     (thread-sleep! 3) ;; give the flush three seconds to do it's stuff
;; 			     (debug:print 0 *default-log-port* "       Done.")
;; 			     (exit 4))
;; 			   "exit on ^C timer")))
;;      (thread-start! th2)
;;      (thread-start! th1)
;;      (thread-join! th2))))

;;======================================================================
;; web pages
;;======================================================================

(define (http-transport:main-page)
  (let ((linkpath (root-path)))

Modified launch.scm from [0fd6309282] to [8499d7ea91].

462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
	  ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ...
	  ;;
	  (setenv "MT_TESTSUITENAME" areaname)
	  (setenv "MT_RUN_AREA_HOME" top-path)
	  (set! *toppath* top-path)
	  (setenv "MT_TEST_RUN_DIR"  work-area)

	  ;; On NFS it can be slow and unreliable to get needed startup information.
	  ;;  i. Check if we are on the homehost, if so, proceed
	  ;; ii. Check if host and port passed in via CMDINFO are valid and if
	  ;;     possible use them.
	  (let ((bestadrs (server:get-best-guess-address (get-host-name)))
		(needcare #f))
	    (if (equal? homehost bestadrs) ;; we are likely on the homehost
		(debug:print-info 0 *default-log-port* "test " test-name " appears to be running on the homehost " homehost)
		(let ((host-port (if serverurl (string-split serverurl ":") #f)))
		  (if (not *runremote*)(set! *runremote* (make-remote))) ;; init *runremote*
		  (if (string? homehost)
		      (if (and host-port
			       (> (length host-port) 1))
			  (let* ((host      (car host-port))
                                 (port      (cadr host-port))
                                 (start-res (http-transport:client-connect host port))
                                 (ping-res  (rmt:login-no-auto-client-setup start-res)))
			    (if (and start-res
				     ping-res)
				;; (begin ;; let ((url  (http-transport:server-dat-make-url start-res)))
				(begin
				  (remote-conndat-set! *runremote* start-res)
				  ;; (remote-server-url-set! *runremote* url)
				  ;; (if (server:ping url)
				  (debug:print-info 0 *default-log-port* "connected to " host ":" port " using CMDINFO data."))
				(begin
				  (debug:print-info 0 *default-log-port* "have CMDINFO data but failed to connect to " host ":" port)
				  (set! *runremote* #f))
				  ;; (remote-conndat-set! *runremote* #f))
				))
			  (begin
			    (set! *runremote* #f)
			    (debug:print-info 0 *default-log-port* (if host-port
								       (conc "received invalid host-port information " host-port)
								       "no host-port information received"))
			    ;; potential for bad situation if simultaneous starting of hundreds of jobs on servers, set needcare.
			    (set! needcare #t)))
		      (begin
			(set! *runremote* #f)
			(debug:print-info 0 *default-log-port* "received no homehost information. Please report this to support as it should not happen.")
			(set! needcare #t)))))
	    (if needcare  ;; due to very slow NFS we will do a brute force mkdir to ensure that the directory inode it truly available on this host
		(let ((logdir (conc top-path "/logs"))) ;; we'll try to create this directory
		  (handle-exceptions
		      exn
		      (debug:print 0 *default-log-port* "Failed to create directory " logdir " expect problems, message: " ((condition-property-accessor 'exn 'message) exn))
		    (create-directory logdir #t)))))
		  
	  ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
	  (let loop ((count 0))
	    (if (or (common:file-exists? top-path)
		    (> count 10))
		(change-directory top-path)
		(begin
		  (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " top-path " not found")







|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
	  ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ...
	  ;;
	  (setenv "MT_TESTSUITENAME" areaname)
	  (setenv "MT_RUN_AREA_HOME" top-path)
	  (set! *toppath* top-path)
	  (setenv "MT_TEST_RUN_DIR"  work-area)

	;;    ;; On NFS it can be slow and unreliable to get needed startup information.
	;;    ;;  i. Check if we are on the homehost, if so, proceed
	;;    ;; ii. Check if host and port passed in via CMDINFO are valid and if
	;;    ;;     possible use them.
	;;    (let ((bestadrs (server:get-best-guess-address (get-host-name)))
	;;    	(needcare #f))
	;;      (if (equal? homehost bestadrs) ;; we are likely on the homehost
	;;    	(debug:print-info 0 *default-log-port* "test " test-name " appears to be running on the homehost " homehost)
	;;    	(let ((host-port (if serverurl (string-split serverurl ":") #f)))
	;;    	  (if (not *runremote*)(set! *runremote* (make-remote))) ;; init *runremote*
	;;    	  (if (string? homehost)
	;;    	      (if (and host-port
	;;    		       (> (length host-port) 1))
	;;    		  (let* ((host      (car host-port))
        ;;                           (port      (cadr host-port))
        ;;                           (start-res (http-transport:client-connect host port))
        ;;                           (ping-res  (rmt:login-no-auto-client-setup start-res)))
	;;    		    (if (and start-res
	;;    			     ping-res)
	;;    			;; (begin ;; let ((url  (http-transport:server-dat-make-url start-res)))
	;;    			(begin
	;;    			  (remote-conndat-set! *runremote* start-res)
	;;    			  ;; (remote-server-url-set! *runremote* url)
	;;    			  ;; (if (server:ping url)
	;;    			  (debug:print-info 0 *default-log-port* "connected to " host ":" port " using CMDINFO data."))
	;;    			(begin
	;;    			  (debug:print-info 0 *default-log-port* "have CMDINFO data but failed to connect to " host ":" port)
	;;    			  (set! *runremote* #f))
	;;    			  ;; (remote-conndat-set! *runremote* #f))
	;;    			))
	;;    		  (begin
	;;    		    (set! *runremote* #f)
	;;    		    (debug:print-info 0 *default-log-port* (if host-port
	;;    							       (conc "received invalid host-port information " host-port)
	;;    							       "no host-port information received"))
	;;    		    ;; potential for bad situation if simultaneous starting of hundreds of jobs on servers, set needcare.
	;;    		    (set! needcare #t)))
	;;    	      (begin
	;;    		(set! *runremote* #f)
	;;    		(debug:print-info 0 *default-log-port* "received no homehost information. Please report this to support as it should not happen.")
	;;    		(set! needcare #t)))))
	;;      (if needcare  ;; due to very slow NFS we will do a brute force mkdir to ensure that the directory inode it truly available on this host
	;;    	(let ((logdir (conc top-path "/logs"))) ;; we'll try to create this directory
	;;    	  (handle-exceptions
	;;    	      exn
	;;    	      (debug:print 0 *default-log-port* "Failed to create directory " logdir " expect problems, message: " ((condition-property-accessor 'exn 'message) exn))
	;;    	    (create-directory logdir #t)))))
	;;    	  
	  ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
	  (let loop ((count 0))
	    (if (or (common:file-exists? top-path)
		    (> count 10))
		(change-directory top-path)
		(begin
		  (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " top-path " not found")
1233
1234
1235
1236
1237
1238
1239













1240
1241
1242
1243
1244
1245
1246
1247
;; 3. create link from run dir to megatest runs area 
;; 4. remotely run the test on allocated host
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex













  (let* ((item-path       (item-list->path itemdat))
	 (contour         #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
    (let loop ((delta        (- (current-seconds) *last-launch*))
	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1)))
      (if (> launch-delay delta)
	  (begin
	    (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
		(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))







>
>
>
>
>
>
>
>
>
>
>
>
>
|







1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
;; 3. create link from run dir to megatest runs area 
;; 4. remotely run the test on allocated host
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
  (let* ( ;; (lock-key        (conc "test-" test-id))
	;; (got-lock        (let loop ((lock        (rmt:no-sync-get-lock lock-key))
	;; 			     (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
	;; 		    (if (car lock)
	;; 			#t
	;; 			(if (> (current-seconds) expire-time)
	;; 			    (begin
	;; 			      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path)
	;; 			      (rmt:no-sync-del! lock-key) ;; destroy the lock
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
	;; 			    (begin
	;; 			      (thread-sleep! 1)
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time))))))
	 (item-path       (item-list->path itemdat))
	 (contour         #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
    (let loop ((delta        (- (current-seconds) *last-launch*))
	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1)))
      (if (> launch-delay delta)
	  (begin
	    (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
		(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
1409
1410
1411
1412
1413
1414
1415

1416
1417
1418
1419
1420
1421
1422
					      cmdstr
					      (conc cmdstr " >> mt_launch.log 2>&1 &")))
					(car fullcmd))
				    (if useshell
					'()
					(cdr fullcmd)))))
        (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork.

	(if (not launchwait) ;; give the OS a little time to allow the process to start
	    (thread-sleep! 0.01))
	(with-output-to-file "mt_launch.log"
	  (lambda ()
	    (print "LAUNCHCMD: " (string-intersperse fullcmd " "))
	    (if (list? launch-results)
		(apply print launch-results)







>







1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
					      cmdstr
					      (conc cmdstr " >> mt_launch.log 2>&1 &")))
					(car fullcmd))
				    (if useshell
					'()
					(cdr fullcmd)))))
        (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork.
	;; (rmt:no-sync-del! lock-key)         ;; release the lock for starting this test
	(if (not launchwait) ;; give the OS a little time to allow the process to start
	    (thread-sleep! 0.01))
	(with-output-to-file "mt_launch.log"
	  (lambda ()
	    (print "LAUNCHCMD: " (string-intersperse fullcmd " "))
	    (if (list? launch-results)
		(apply print launch-results)

Modified megatest.config from [0d1fb252ac] to [83ee488a74].

48
49
50
51
52
53
54


localhost:12345  contact=matt@kiatoa.com
localhost:54321  contact=matt@kiatoa.com

[listener]
script nbfake echo











>
>
48
49
50
51
52
53
54
55
56
localhost:12345  contact=matt@kiatoa.com
localhost:54321  contact=matt@kiatoa.com

[listener]
script nbfake echo


[server]
timeout 1

Modified megatest.scm from [0541df5d54] to [d0fc7271ce].

49
50
51
52
53
54
55





56
57
58



















59
60
61
62
63
64
65

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "megatest-fossil-hash.scm")






(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc")))
  (if (common:file-exists? debugcontrolf)
      (load debugcontrolf)))




















;; Disabled help items
;;  -rollup                 : (currently disabled) fill run (set by :runname)  with latest test(s)
;;                            from prior runs with same keys
;;  -daemonize              : fork into background and disconnect from stdin/out

(define help (conc "







>
>
>
>
>



>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "megatest-fossil-hash.scm")

(define *usage-log-file* #f)    ;; put path to file for logging usage in this var in the ~/.megatestrc file
(define *usage-use-seconds* #f) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file

;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file
;;
(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc")))
  (if (common:file-exists? debugcontrolf)
      (load debugcontrolf)))

;; usage logging, careful with this, it is not designed to deal with all real world challenges!
;;
(if (and (common:file-exists? *usage-log-file*)
	   (file-write-access? *usage-log-file*))
      (with-output-to-file
	  *usage-log-file*
	(lambda ()
	  (print
           (if *usage-use-seconds*
               (current-seconds)
               (time->string
                (seconds->local-time (current-seconds))
                "%Yww%V.%w %H:%M:%S"))
           " "
           (current-user-name) " "
           (current-directory) " "
	    "\"" (string-intersperse (argv) " ") "\""))
	#:append))

;; Disabled help items
;;  -rollup                 : (currently disabled) fill run (set by :runname)  with latest test(s)
;;                            from prior runs with same keys
;;  -daemonize              : fork into background and disconnect from stdin/out

(define help (conc "

Modified rmt.scm from [e5e9042aee] to [07c2956559].

124
125
126
127
128
129
130
131
132
133
134
135
136
137
138

139
140
141
142
143
144
145
146
147
148
149
150
151
      (debug:print 0 *default-log-port* "WARNING: write transaction requested on a readonly area.  cmd="cmd" params="params)
      #f)

     ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
     ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
     ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
     ;;
     ;; ;;DOT CASE4 [label="reset\nconnection"];
     ;; ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4}
     ;; ;;DOT CASE4 -> "rmt:send-receive";
     ;; ;; reset the connection if it has been unused too long
     ;; ((and runremote
     ;;       (remote-conndat runremote)
     ;; 	   (let ((expire-time (+ (- start-time (remote-server-timeout runremote))(random 10)))) ;; Subtract or add the random value? Seems like it should be substract but Neither fixes the "WARNING: failure in with-input-from-request to #<request>.\n message: Server closed connection before sending response"
     ;; 	     (< (http-transport:server-dat-get-last-access (remote-conndat runremote)) expire-time)))

     ;;  (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
     ;;  (http-transport:close-connections area-dat: runremote)
     ;;  (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
     ;;  (mutex-unlock! *rmt-mutex*)
     ;;  (rmt:send-receive cmd rid params attemptnum: attemptnum))

     ;;DOT CASE5 [label="local\nread"];
     ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
     ;;DOT CASE5 -> "rmt:open-qry-close-locally";
     ;; on homehost and this is a read
     ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))       ;; on homehost
           (member cmd api:read-only-queries))   ;; this is a read







|
|
|
|
|
|
|
|
>
|
|
|
|
|
|







124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
      (debug:print 0 *default-log-port* "WARNING: write transaction requested on a readonly area.  cmd="cmd" params="params)
      #f)

     ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
     ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
     ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
     ;;
     ;;DOT CASE4 [label="reset\nconnection"];
     ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4}
     ;;DOT CASE4 -> "rmt:send-receive";
     ;; reset the connection if it has been unused too long
     ((and runremote
           (remote-conndat runremote)
	   (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
	      (+ (http-transport:server-dat-get-last-access (remote-conndat runremote))
		 (remote-server-timeout runremote))))
      (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.")
      (http-transport:close-connections area-dat: runremote)
      (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
      (mutex-unlock! *rmt-mutex*)
      (rmt:send-receive cmd rid params attemptnum: attemptnum))
     
     ;;DOT CASE5 [label="local\nread"];
     ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5};
     ;;DOT CASE5 -> "rmt:open-qry-close-locally";
     ;; on homehost and this is a read
     ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required
	   (cdr (remote-hh-dat runremote))       ;; on homehost
           (member cmd api:read-only-queries))   ;; this is a read
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248

249
250

251
252
253
254
255
256
257


258
259
260
261
262
263
264
265
266
267
268

269
270
271

272
273
274
275
276
277
278
279

     ;;DOT CASE11 [label="send_receive"];
     ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
     ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
     ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
     ;; not on homehost, do server query
     (else
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9")
      (mutex-lock! *rmt-mutex*)
      (let* ((conninfo (remote-conndat runremote))
	     (dat      (case (remote-transport runremote)
			 ((http) (condition-case ;; handling here has caused a lot of problems. However it is needed to deal with attemtped communication to servers that have gone away
                                  (http-transport:client-api-send-receive 0 conninfo cmd params)
                                  ((commfail)(vector #f "communications fail"))
                                  ((exn)(vector #f "other fail" (print-call-chain)))))
			 (else
			  (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported")
			  (exit))))
	     (success  (if (vector? dat) (vector-ref dat 0) #f))
	     (res      (if (vector? dat) (vector-ref dat 1) #f)))
	(if (and (vector? conninfo) (> 5 (vector-length conninfo)))
            (http-transport:server-dat-update-last-access conninfo) ;; refresh access time
            (begin

              (set! conninfo #f)
              (remote-conndat-set! runremote #f))) 

	;; (mutex-unlock! *rmt-mutex*)
        (debug:print-info 13 *default-log-port* "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
	(mutex-unlock! *rmt-mutex*)
	(if success ;; success only tells us that the transport was successful, have to examine the data to see if there was a detected issue at the other end
	    (if (and (vector? res)
		     (eq? (vector-length res) 2)
		     (eq? (vector-ref res 1) 'overloaded)) ;; since we are looking at the data to carry the error we'll use a fairly obtuse combo to minimise the chances of some sort of collision.


		(let ((wait-delay (+ attemptnum (* attemptnum 10))))
		  (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
		  (mutex-lock! *rmt-mutex*)
		  (http-transport:close-connections area-dat: runremote)
		  (set! *runremote* #f) ;; force starting over
		  (mutex-unlock! *rmt-mutex*)
		  (thread-sleep! wait-delay)
		  (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
		res) ;; All good, return res
	    (begin
	      (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)

	      (remote-conndat-set!    runremote #f)
	      (http-transport:close-connections area-dat: runremote)
	      (remote-server-url-set! runremote #f)

	      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9.1")
	      ;; (if (not (server:check-if-running *toppath*))
	      ;; 	  (server:start-and-wait *toppath*))
	      (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))))))))

    ;;DOT }
    
;; (define (rmt:update-db-stats run-id rawcmd params duration)







|

|











|

|
>

|
>







>
>











>
|


>
|







226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286

     ;;DOT CASE11 [label="send_receive"];
     ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11};
     ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"];
     ;;DOT CASE11 -> "RESULT" [label="call succeeded"];
     ;; not on homehost, do server query
     (else
      ;; (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9")
      ;; (mutex-lock! *rmt-mutex*)
      (let* ((conninfo (remote-conndat runremote))
	     (dat      (case (remote-transport runremote)
			 ((http) (condition-case ;; handling here has caused a lot of problems. However it is needed to deal with attemtped communication to servers that have gone away
                                  (http-transport:client-api-send-receive 0 conninfo cmd params)
                                  ((commfail)(vector #f "communications fail"))
                                  ((exn)(vector #f "other fail" (print-call-chain)))))
			 (else
			  (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported")
			  (exit))))
	     (success  (if (vector? dat) (vector-ref dat 0) #f))
	     (res      (if (vector? dat) (vector-ref dat 1) #f)))
	(if (and (vector? conninfo) (< 5 (vector-length conninfo)))
            (http-transport:server-dat-update-last-access conninfo) ;; refresh access time
	    (begin
              (debug:print 0 *default-log-port* "INFO: Should not get here! conninfo=" conninfo)
              (set! conninfo #f)
              (remote-conndat-set! *runremote* #f)
              (http-transport:close-connections  area-dat: runremote)))
	;; (mutex-unlock! *rmt-mutex*)
        (debug:print-info 13 *default-log-port* "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
	(mutex-unlock! *rmt-mutex*)
	(if success ;; success only tells us that the transport was successful, have to examine the data to see if there was a detected issue at the other end
	    (if (and (vector? res)
		     (eq? (vector-length res) 2)
		     (eq? (vector-ref res 1) 'overloaded)) ;; since we are looking at the data to carry the error we'll use a fairly obtuse combo to minimise the chances of some sort of collision.
                ;; this is the case where the returned data is bad or the server is overloaded and we want
                ;; to ease off the queries
		(let ((wait-delay (+ attemptnum (* attemptnum 10))))
		  (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
		  (mutex-lock! *rmt-mutex*)
		  (http-transport:close-connections area-dat: runremote)
		  (set! *runremote* #f) ;; force starting over
		  (mutex-unlock! *rmt-mutex*)
		  (thread-sleep! wait-delay)
		  (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
		res) ;; All good, return res
	    (begin
	      (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
	      (mutex-lock! *rmt-mutex*)
              (remote-conndat-set!    runremote #f)
	      (http-transport:close-connections area-dat: runremote)
	      (remote-server-url-set! runremote #f)
	      (mutex-unlock! *rmt-mutex*)
              (debug:print-info 12 *default-log-port* "rmt:send-receive, case  9.1")
	      ;; (if (not (server:check-if-running *toppath*))
	      ;; 	  (server:start-and-wait *toppath*))
	      (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))))))))

    ;;DOT }
    
;; (define (rmt:update-db-stats run-id rawcmd params duration)
859
860
861
862
863
864
865



866
867
868
869
870
871
872

(define (rmt:no-sync-get/default var default)
  (rmt:send-receive 'no-sync-get/default #f `(,var ,default)))

(define (rmt:no-sync-del! var)
  (rmt:send-receive 'no-sync-del! #f `(,var)))




;;======================================================================
;; A R C H I V E S
;;======================================================================

(define (rmt:archive-get-allocations  testname itempath dneeded)
  (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded)))








>
>
>







866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882

(define (rmt:no-sync-get/default var default)
  (rmt:send-receive 'no-sync-get/default #f `(,var ,default)))

(define (rmt:no-sync-del! var)
  (rmt:send-receive 'no-sync-del! #f `(,var)))

(define (rmt:no-sync-get-lock keyname)
  (rmt:send-receive 'no-sync-get-lock #f `(,keyname)))

;;======================================================================
;; A R C H I V E S
;;======================================================================

(define (rmt:archive-get-allocations  testname itempath dneeded)
  (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded)))

Modified runs.scm from [f6cff660ce] to [bb40df52d8].

323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
					       (exit 4)))))
		       (thread-start! th2)
		       (thread-start! th1)
		       (thread-join! th2)))))
      (set-signal-handler! signal/int sighand)
      (set-signal-handler! signal/term sighand))

    ;; force the starting of a server
    (debug:print 0 *default-log-port* "waiting on server...")
    (server:start-and-wait *toppath*)
    
    (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
    (set! runconf (if (common:file-exists? runconfigf)
		      (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target)
		      (begin
			(debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf)
			#f)))







|
|
|







323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
					       (exit 4)))))
		       (thread-start! th2)
		       (thread-start! th1)
		       (thread-join! th2)))))
      (set-signal-handler! signal/int sighand)
      (set-signal-handler! signal/term sighand))

    ;; force the starting of a server -- removed BB 17ww28 - no longer needed.
    ;;(debug:print 0 *default-log-port* "waiting on server...")
    ;;(server:start-and-wait *toppath*)
    
    (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
    (set! runconf (if (common:file-exists? runconfigf)
		      (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target)
		      (begin
			(debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf)
			#f)))
593
594
595
596
597
598
599

600
601
602
603
604
605
606
      (cdr reg)
      (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal
	  '()
	  reg)))

(define runs:nothing-left-in-queue-count 0)


(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)))
			    (if (list? res)
				res
				(begin
				  (debug:print 0 *default-log-port*







>







593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
      (cdr reg)
      (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal
	  '()
	  reg)))

(define runs:nothing-left-in-queue-count 0)

;; BB: for future reference - suspect target vars are not expanded to env vars at this point (item expansion using [items]\nwhatever [system echo $TARGETVAR] doesnt work right whereas [system echo #{targetvar}] does.. Tal and Randy have tix on this.  on first pass, var not set, on second pass, ok.  
(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)))
			    (if (list? res)
				res
				(begin
				  (debug:print 0 *default-log-port*
1268
1269
1270
1271
1272
1273
1274

1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
			   testmode:    testmode
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))
	(runs:dat-regfull-set! runsdat regfull)


	;; every 15 minutes verify the server is there for this run
	(if (and (common:low-noise-print 240 "try start server"  run-id)
		 (not (or (and *runremote*
			       (remote-server-url *runremote*)
			       (server:ping (remote-server-url *runremote*)))
			  (server:check-if-running *toppath*))))
	    (server:kind-run *toppath*))
	
	(if (> num-running 0)
	  (set! last-time-some-running (current-seconds)))

      (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
	  (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*))







>

|
|
|
|
|
|







1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
			   testmode:    testmode
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))
	(runs:dat-regfull-set! runsdat regfull)

        ;; -- removed BB 17ww28 - no longer needed.
	;; every 15 minutes verify the server is there for this run
	;; (if (and (common:low-noise-print 240 "try start server"  run-id)
	;; 	 (not (or (and *runremote*
	;; 		       (remote-server-url *runremote*)
	;; 		       (server:ping (remote-server-url *runremote*)))
	;; 		  (server:check-if-running *toppath*))))
	;;     (server:kind-run *toppath*))
	
	(if (> num-running 0)
	  (set! last-time-some-running (current-seconds)))

      (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
	  (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*))
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
	  (if (null? tal)
	      #f
	      (loop (car tal)(cdr tal) reg reruns)))
	    
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS
	 ((or (procedure? items)(eq? items 'have-procedure))
	  (let ((can-run-more    (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)))
	    (if (and (list? can-run-more)
		     (car can-run-more))
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)))
		  (if loop-list
		      (apply loop loop-list)))
		;; if can't run more just loop with next possible test







|







1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
	  (if (null? tal)
	      #f
	      (loop (car tal)(cdr tal) reg reruns)))
	    
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS
	 ((or (procedure? items)(eq? items 'have-procedure)) ;; BB - target vars are env vars here? to allow expansion of [items]\nsomething [system echo $SOMETARGVAR], which is wonky
	  (let ((can-run-more    (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)))
	    (if (and (list? can-run-more)
		     (car can-run-more))
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)))
		  (if loop-list
		      (apply loop loop-list)))
		;; if can't run more just loop with next possible test
1958
1959
1960
1961
1962
1963
1964
1965















1966
1967
1968
1969
1970
1971
1972

(define (runs:remove-test-directory test mode) ;; remove-data-only)
  (let* ((run-dir       (db:test-get-rundir test))    ;; run dir is from the link tree
	 (real-dir      (if (common:file-exists? run-dir)
			    ;; (resolve-pathname run-dir)
			    (common:nice-path run-dir)
			    #f))
         (clean-mode    (or mode 'remove-all)))















    (case clean-mode
      ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f))
      ((remove-all)      (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f))
      ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f)))
    (debug:print-info 1 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
    (if (and real-dir 
	     (> (string-length real-dir) 5)







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989

(define (runs:remove-test-directory test mode) ;; remove-data-only)
  (let* ((run-dir       (db:test-get-rundir test))    ;; run dir is from the link tree
	 (real-dir      (if (common:file-exists? run-dir)
			    ;; (resolve-pathname run-dir)
			    (common:nice-path run-dir)
			    #f))
         (clean-mode    (or mode 'remove-all))
         (test-id       (db:test-get-id test))
        ;; (lock-key      (conc "test-" test-id))
        ;; (got-lock      (let loop ((lock        (rmt:no-sync-get-lock lock-key))
	;; 			     (expire-time (+ (current-seconds) 30))) ;; give up on getting the lock and steal it after 15 seconds
	;; 		    (if (car lock)
	;; 			#t
	;; 			(if (> (current-seconds) expire-time)
	;; 			    (begin
	;; 			      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to clean test with id " test-id)
	;; 			      (rmt:no-sync-del! lock-key) ;; destroy the lock
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
	;; 			    (begin
	;; 			      (thread-sleep! 1)
	 ;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)))))))
	 )
    (case clean-mode
      ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f))
      ((remove-all)      (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f))
      ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f)))
    (debug:print-info 1 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
    (if (and real-dir 
	     (> (string-length real-dir) 5)
1998
1999
2000
2001
2002
2003
2004
2005


2006
2007
2008
2009
2010
2011
2012
		(debug:print 0 *default-log-port* "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink")
		(debug:print 0 *default-log-port* "NOTE: the run dir for this test is undefined. Test may have already been deleted."))
	    ))
    ;; Only delete the records *after* removing the directory. If things fail we have a record 
    (case clean-mode
      ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f))
      ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f))
      (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test))))))



;;======================================================================
;; Routines for manipulating runs
;;======================================================================

;; Since many calls to a run require pretty much the same setup 
;; this wrapper is used to reduce the replication of code







|
>
>







2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
		(debug:print 0 *default-log-port* "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink")
		(debug:print 0 *default-log-port* "NOTE: the run dir for this test is undefined. Test may have already been deleted."))
	    ))
    ;; Only delete the records *after* removing the directory. If things fail we have a record 
    (case clean-mode
      ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f))
      ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f))
      (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test))))
    ;; (rmt:no-sync-del! lock-key)
    ))

;;======================================================================
;; Routines for manipulating runs
;;======================================================================

;; Since many calls to a run require pretty much the same setup 
;; this wrapper is used to reduce the replication of code

Modified server.scm from [1cc5f8ba9a] to [90c7962d1c].

441
442
443
444
445
446
447
448

449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
(define (server:login toppath)
  (lambda (toppath)
    (set! *db-last-access* (current-seconds)) ;; might not be needed.
    (if (equal? *toppath* toppath)
	#t
	#f)))

;; timeout is in hours

(define (server:get-timeout)
  (let ((tmo (configf:lookup  *configdat* "server" "timeout")))
    (if (and (string? tmo)
	     (string->number tmo))
	(* 60 60 (string->number tmo))
	;; (* 3 24 60 60) ;; default to three days
	;;(* 60 60 1)     ;; default to one hour
	(* 60 5)          ;; default to five minutes
	)))

(define (server:get-best-guess-address hostname)
  (let ((res #f))
    (for-each 
     (lambda (adr)
       (if (not (eq? (u8vector-ref adr 0) 127))
	   (set! res adr)))







|
>
|
|

|
|
<
<
<
|







441
442
443
444
445
446
447
448
449
450
451
452
453
454



455
456
457
458
459
460
461
462
(define (server:login toppath)
  (lambda (toppath)
    (set! *db-last-access* (current-seconds)) ;; might not be needed.
    (if (equal? *toppath* toppath)
	#t
	#f)))

;; timeout is hms string: 1h 5m 3s, default is 1 minute
;;
(define (server:expiration-timeout)
  (let ((tmo (configf:lookup *configdat* "server" "timeout")))
    (if (and (string? tmo)
	     (common:hms-string->seconds tmo))
        (* 3600 (string->number tmo))



	60)))

(define (server:get-best-guess-address hostname)
  (let ((res #f))
    (for-each 
     (lambda (adr)
       (if (not (eq? (u8vector-ref adr 0) 127))
	   (set! res adr)))
480
481
482
483
484
485
486
487
488



489
490
491
492
493
494
495
496
497
498

499







500



501

502
503
504





505
506
507
508
509
510




511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
        (sync-duration 0) ;; run time of the sync in milliseconds
        (this-wd-num  (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x))))
    (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
    (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
    (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num)
    (if (and legacy-sync (not *time-to-exit*))
	(let* (;;(dbstruct (db:setup))
	       (mtdb     (dbr:dbstruct-mtdb dbstruct))
	       (mtpath   (db:dbdat-get-path mtdb)))



	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
	  (let loop ()
	    ;; sync for filesystem local db writes
	    ;;
	    (mutex-lock! *db-multi-sync-mutex*)
	    (let* ((need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
		   (sync-in-progress *db-sync-in-progress*)
		   (should-sync      (and (not *time-to-exit*)
                                          (> (- (current-seconds) *db-last-sync*) 5))) ;; sync every five seconds minimum
		   (start-time       (current-seconds))

		   (mt-mod-time      (file-modification-time mtpath))







		   (recently-synced  (< (- start-time mt-mod-time) 4))



		   (will-sync        (and (or need-sync should-sync)

					  (not sync-in-progress)
					  (not recently-synced))))
              (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop.  need-sync="need-sync" sync-in-progress="sync-in-progress" should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync)





	      ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
	      ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
	      (if will-sync (set! *db-sync-in-progress* #t))
	      (mutex-unlock! *db-multi-sync-mutex*)
	      (if will-sync
                  (let ((sync-start (current-milliseconds)))




                    (if (< sync-duration 300)
                        (let ((res        (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
                          (set! sync-duration (- (current-milliseconds) sync-start))
                          (if (> res 0) ;; some records were transferred, keep the db alive
                              (begin
                                (mutex-lock! *heartbeat-mutex*)
                                (set! *db-last-access* (current-seconds))
                                (mutex-unlock! *heartbeat-mutex*)
                                (debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
                              (debug:print-info 2 *default-log-port* "sync called but zero records transferred")))
                        ;; TODO: factor this next routine out into a function
                        (with-input-from-pipe ;; this should not block other threads but need to verify this
                         "megatest -sync-to-megatest.db"
                         (lambda ()
                           (let loop ((inl (read-line))
                                      (res #f))
                             (if (eof-object? inl)
                                 (begin
                                   (set! sync-duration (- (current-milliseconds) sync-start))
                                   (cond







|
|
>
>
>








|

>

>
>
>
>
>
>
>
|
>
>
>
|
>


|
>
>
>
>
>






>
>
>
>
|











|







478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
        (sync-duration 0) ;; run time of the sync in milliseconds
        (this-wd-num  (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x))))
    (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
    (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
    (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num)
    (if (and legacy-sync (not *time-to-exit*))
	(let* (;;(dbstruct (db:setup))
	       (mtdb       (dbr:dbstruct-mtdb dbstruct))
	       (mtpath     (db:dbdat-get-path mtdb))
	       (tmp-area   (common:get-db-tmp-area))
	       (start-file (conc tmp-area "/.start-sync"))
	       (end-file   (conc tmp-area "/.end-sync")))
	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
	  (let loop ()
	    ;; sync for filesystem local db writes
	    ;;
	    (mutex-lock! *db-multi-sync-mutex*)
	    (let* ((need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
		   (sync-in-progress *db-sync-in-progress*)
		   (should-sync      (and (not *time-to-exit*)
                                          (> (- (current-seconds) *db-last-sync*) 5))) ;; sync every five seconds minimum, deprecated logic, can probably be removed
		   (start-time       (current-seconds))
                   (cpu-load-adj     (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)))
		   (mt-mod-time      (file-modification-time mtpath))
		   (last-sync-start  (if (common:file-exists? start-file)
					 (file-modification-time start-file)
					 0))
		   (last-sync-end    (if (common:file-exists? end-file)
					 (file-modification-time end-file)
					 10))
                   (sync-period      (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period
		   (recently-synced  (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db!
					  (< mt-mod-time last-sync-start)))
		   (sync-done        (<= last-sync-start last-sync-end))
		   (will-sync        (and (not *time-to-exit*)       ;; do not start a sync if we are in the process of exiting
                                          (or need-sync should-sync)
					  sync-done
					  (not sync-in-progress)
					  (not recently-synced))))
              (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop.  need-sync="need-sync" sync-in-progress=" sync-in-progress
				" should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync
				" sync-done=" sync-done " sync-period=" sync-period)
              (if (and (> sync-period 5)
                       (common:low-noise-print 30 "sync-period"))
                  (debug:print-info 0 *default-log-port* "Increased sync period due to load: " sync-period))
	      ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
	      ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
	      (if will-sync (set! *db-sync-in-progress* #t))
	      (mutex-unlock! *db-multi-sync-mutex*)
	      (if will-sync
                  (let ((sync-start (current-milliseconds)))
		    (with-output-to-file start-file (lambda ()(print (current-process-id))))
		    
		    ;; put lock here
		    
                    (if (< sync-duration 3000) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally
                        (let ((res        (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
                          (set! sync-duration (- (current-milliseconds) sync-start))
                          (if (> res 0) ;; some records were transferred, keep the db alive
                              (begin
                                (mutex-lock! *heartbeat-mutex*)
                                (set! *db-last-access* (current-seconds))
                                (mutex-unlock! *heartbeat-mutex*)
                                (debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
                              (debug:print-info 2 *default-log-port* "sync called but zero records transferred")))
                        ;; TODO: factor this next routine out into a function
                        (with-input-from-pipe ;; this should not block other threads but need to verify this
                         (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*)
                         (lambda ()
                           (let loop ((inl (read-line))
                                      (res #f))
                             (if (eof-object? inl)
                                 (begin
                                   (set! sync-duration (- (current-milliseconds) sync-start))
                                   (cond
541
542
543
544
545
546
547




548
549
550
551
552
553
554
                                   (loop (read-line)
                                         (or num-synced res))))))))))
	      (if will-sync
		  (begin
		    (mutex-lock! *db-multi-sync-mutex*)
		    (set! *db-sync-in-progress* #f)
		    (set! *db-last-sync* start-time)




		    (mutex-unlock! *db-multi-sync-mutex*)))
	      (if (and debug-mode
		       (> (- start-time last-time) 60))
		  (begin
		    (set! last-time start-time)
		    (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
	    







>
>
>
>







563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
                                   (loop (read-line)
                                         (or num-synced res))))))))))
	      (if will-sync
		  (begin
		    (mutex-lock! *db-multi-sync-mutex*)
		    (set! *db-sync-in-progress* #f)
		    (set! *db-last-sync* start-time)
		    (with-output-to-file end-file (lambda ()(print (current-process-id))))

		    ;; release lock here

		    (mutex-unlock! *db-multi-sync-mutex*)))
	      (if (and debug-mode
		       (> (- start-time last-time) 60))
		  (begin
		    (set! last-time start-time)
		    (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
	    

Modified tcmt.scm from [22014ba117] to [06a53b1301].

80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
	       ;; (print "DEBUG: testn=" testn " state=" state " status=" status " prevstat=" prevstat " newstat=" newstat)
	       (if (or (not prevstat)
		       (not (equal? prevstat newstat)))
		   (begin
		     (case (string->symbol newstat)
		       ((UNK)       ) ;; do nothing
		       ((RUNNING)   (print "##teamcity[testStarted name='" tctname "']"))
		       ((PASS SKIP) (print "##teamcity[testFinished name='" tctname "' duration='" (* 1e3 duration) "'" cmtstr details " ]"))
		       (else
			(print "##teamcity[testFailed name='" tctname "' " cmtstr details " ]")))
		     (flush-output)
		     (hash-table-set! data testn newstat)))))
	   tests)))
      run-ids))
    now))







|







80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
	       ;; (print "DEBUG: testn=" testn " state=" state " status=" status " prevstat=" prevstat " newstat=" newstat)
	       (if (or (not prevstat)
		       (not (equal? prevstat newstat)))
		   (begin
		     (case (string->symbol newstat)
		       ((UNK)       ) ;; do nothing
		       ((RUNNING)   (print "##teamcity[testStarted name='" tctname "']"))
		       ((PASS SKIP WARN WAIVED) (print "##teamcity[testFinished name='" tctname "' duration='" (* 1e3 duration) "'" cmtstr details " ]"))
		       (else
			(print "##teamcity[testFailed name='" tctname "' " cmtstr details " ]")))
		     (flush-output)
		     (hash-table-set! data testn newstat)))))
	   tests)))
      run-ids))
    now))

Modified tests/fullrun/megatest.config from [a290fb1cb4] to [9422e2422a].

252
253
254
255
256
257
258


259
260
261
262
263
264
265
                         ((sleeprunner) "sleeprunner") \
                         (else "nbfake"))}

# launcher bsub -q priority -o $MT_TEST_RUN_DIR/openlava.log 

# launcher #{ shell if which bsub > /dev/null;then echo bsub -q priority -o openlava.log;else echo sleeprunner;fi}
# launcher nbfake



[configf:settings trim-trailing-spaces yes]

# Override the rollup for specific tests
[testrollup]
runfirst ls








>
>







252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
                         ((sleeprunner) "sleeprunner") \
                         (else "nbfake"))}

# launcher bsub -q priority -o $MT_TEST_RUN_DIR/openlava.log 

# launcher #{ shell if which bsub > /dev/null;then echo bsub -q priority -o openlava.log;else echo sleeprunner;fi}
# launcher nbfake
maxload 1.1
maxhomehostload 1.1

[configf:settings trim-trailing-spaces yes]

# Override the rollup for specific tests
[testrollup]
runfirst ls

Modified utils/installall.sh from [5277c0ad22] to [00369a2aae].

15
16
17
18
19
20
21
22
23
24
25
26

27
28
29
30
31
32
33
34
35
36
37
38
39

40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58


59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

74
75
76
77



78
79
80
81
82
83
84
#  PURPOSE.

if [[ $OPTION=="" ]]; then
    export OPTION=std
fi

echo You may need to do the following first:
echo sudo apt-get install libreadline-dev
echo sudo apt-get install libwebkitgtk-dev 
echo sudo apt-get install libpangox-1.0-0 zlib1g-dev libfreetype6-dev cmake
echo sudo apt-get install libssl-dev  uuid-dev
echo sudo apt-get install libmotif3 -OR- set KTYPE=26g4

echo
echo Set OPTION to std, currently OPTION=$OPTION
echo
echo Additionally, if you want mysql-client, you will need to make sure
echo mysql_config is in your path
echo for postgres to install dbi libpq-dev
echo
echo You are using PREFIX=$PREFIX
echo You are using proxy="$proxy"
echo 
echo "Set additional_libpath to help find gtk or other libraries, don't forget a leading :"

SYSTEM_TYPE=$(lsb_release -irs |tr ' ' '_' |tr '\n' '-')$(uname -i)-$OPTION

CHICKEN_VERSION=4.11.0
CHICKEN_BASEVER=4.11.0

# Set up variables
#
case $SYSTEM_TYPE in
Ubuntu-16.04-x86_64-std)
	KTYPE=32
	CDVER=5.10
	IUPVER=3.17
	IMVER=3.11
	CHICKEN_VERSION=4.12.0
	CHICKEN_BASEVER=4.12.0
	;;
Ubuntu-16.04-i686-std)
	KTYPE=32
	CDVER=5.10
	IUPVER=3.17
	IMVER=3.11


	;;
SUSE_LINUX_11-x86_64-std)
  KTYPE=26g4 
	CDVER=5.10
	IUPVER=3.17
	IMVER=3.11
  ;;
CentOS_5.11-x86_64-std)
  KTYPE=24g3 
  CDVER=5.4.1
  IUPVER=3.5
  IMVER=3.6.3
  ;; 
esac


echo KTYPE=$KTYPE			  
echo CDVER=$CDVER
echo IUPVER=$IUPVER
echo IMVER=$IMVER	



# NOTES:
#
# Centos with security setup may need to do commands such as following as root:
#
# NB// fix the paths first
#
# for a in /localdisk/chicken/4.8.0/lib/*.so;do chcon -t textrel_shlib_t $a; done 







|
|
|
|
|
>













>



















>
>















>




>
>
>







15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#  PURPOSE.

if [[ $OPTION=="" ]]; then
    export OPTION=std
fi

echo You may need to do the following first:
echo sudo apt install libreadline-dev
echo sudo apt install libwebkitgtk-dev 
echo sudo apt install libpangox-1.0-0 zlib1g-dev libfreetype6-dev cmake
echo sudo apt install libssl-dev  uuid-dev
echo sudo apt install libmotif3 -OR- set KTYPE=26g4
echo sudo apt install cmake
echo
echo Set OPTION to std, currently OPTION=$OPTION
echo
echo Additionally, if you want mysql-client, you will need to make sure
echo mysql_config is in your path
echo for postgres to install dbi libpq-dev
echo
echo You are using PREFIX=$PREFIX
echo You are using proxy="$proxy"
echo 
echo "Set additional_libpath to help find gtk or other libraries, don't forget a leading :"

SYSTEM_TYPE=$(lsb_release -irs |tr ' ' '_' |tr '\n' '-')$(uname -i)-$OPTION

CHICKEN_VERSION=4.11.0
CHICKEN_BASEVER=4.11.0

# Set up variables
#
case $SYSTEM_TYPE in
Ubuntu-16.04-x86_64-std)
	KTYPE=32
	CDVER=5.10
	IUPVER=3.17
	IMVER=3.11
	CHICKEN_VERSION=4.12.0
	CHICKEN_BASEVER=4.12.0
	;;
Ubuntu-16.04-i686-std)
	KTYPE=32
	CDVER=5.10
	IUPVER=3.17
	IMVER=3.11
        CHICKEN_VERSION=4.12.0
        CHICKEN_BASEVER=4.12.0
	;;
SUSE_LINUX_11-x86_64-std)
  KTYPE=26g4 
	CDVER=5.10
	IUPVER=3.17
	IMVER=3.11
  ;;
CentOS_5.11-x86_64-std)
  KTYPE=24g3 
  CDVER=5.4.1
  IUPVER=3.5
  IMVER=3.6.3
  ;; 
esac

echo SYSTEM_TYPE=$SYSTEM_TYPE
echo KTYPE=$KTYPE			  
echo CDVER=$CDVER
echo IUPVER=$IUPVER
echo IMVER=$IMVER	
echo CHICKEN_VERSION=$CHICKEN_VERSION
echo CHICKEN_BASEVER=$CHICKEN_BASEVER

# NOTES:
#
# Centos with security setup may need to do commands such as following as root:
#
# NB// fix the paths first
#
# for a in /localdisk/chicken/4.8.0/lib/*.so;do chcon -t textrel_shlib_t $a; done 
99
100
101
102
103
104
105

106
107
108
109
110
111
112
sleep 5

if [[ $proxy == "" ]]; then 
  echo 'Please set the environment variable "proxy" to host.com:port (e.g. foo.com:1234) to use a proxy'
  echo PROX=""
else
  export http_proxy=http://$proxy

  export PROX="-proxy $proxy"
fi

if [[ $KTYPE == "" ]]; then
  echo 'Using KTYPE=26'
  export KTYPE=26g4
else







>







107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
sleep 5

if [[ $proxy == "" ]]; then 
  echo 'Please set the environment variable "proxy" to host.com:port (e.g. foo.com:1234) to use a proxy'
  echo PROX=""
else
  export http_proxy=http://$proxy
  export https_proxy=http://$proxy
  export PROX="-proxy $proxy"
fi

if [[ $KTYPE == "" ]]; then
  echo 'Using KTYPE=26'
  export KTYPE=26g4
else
151
152
153
154
155
156
157

158
159

160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
    cd chicken-${CHICKEN_VERSION}
    # make PLATFORM=linux PREFIX=$PREFIX spotless
    make PLATFORM=linux PREFIX=$PREFIX
    make PLATFORM=linux PREFIX=$PREFIX install
    cd $BUILDHOME
fi
cd $BUILDHOME

#wget --no-check-certificate https://github.com/nanomsg/nanomsg/archive/1.0.0.tar.gz 
#mv 1.0.0 1.0.0.tar.gz

# if ! [[ -e $PREFIX/lib64/libnanomsg.so.1.0.0 ]]; then
#         wget --no-check-certificate https://github.com/nanomsg/nanomsg/archive/1.0.0.tar.gz 
#         mv 1.0.0 1.0.0.tar.gz
# 	tar xf 1.0.0.tar.gz 
# 	cd nanomsg-1.0.0
# 	./configure --prefix=$PREFIX
# 	make
# 	make install
# fi
# cd $BUILDHOME

export SQLITE3_VERSION=3090200
if ! [[ -e $PREFIX/bin/sqlite3 ]]; then
	echo Install sqlite3
	sqlite3_tgz=sqlite-autoconf-$SQLITE3_VERSION.tar.gz
	if ! [[ -e tgz/$sqlite3_tgz ]]; then
	    wget http://www.sqlite.org/2015/$sqlite3_tgz







>
|
|
>
|
|
|
|
|
|
|
|
|
|







160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
    cd chicken-${CHICKEN_VERSION}
    # make PLATFORM=linux PREFIX=$PREFIX spotless
    make PLATFORM=linux PREFIX=$PREFIX
    make PLATFORM=linux PREFIX=$PREFIX install
    cd $BUILDHOME
fi
cd $BUILDHOME
if [[ ! -e 1.0.0.tar.gz ]];then
  wget --no-check-certificate https://github.com/nanomsg/nanomsg/archive/1.0.0.tar.gz 
  mv 1.0.0 1.0.0.tar.gz
fi
if ! [[ -e $PREFIX/lib64/libnanomsg.so.1.0.0 ]]; then
        wget --no-check-certificate https://github.com/nanomsg/nanomsg/archive/1.0.0.tar.gz 
        mv 1.0.0 1.0.0.tar.gz
	tar xf 1.0.0.tar.gz 
	cd nanomsg-1.0.0
	./configure --prefix=$PREFIX
	make
	make install
fi
cd $BUILDHOME

export SQLITE3_VERSION=3090200
if ! [[ -e $PREFIX/bin/sqlite3 ]]; then
	echo Install sqlite3
	sqlite3_tgz=sqlite-autoconf-$SQLITE3_VERSION.tar.gz
	if ! [[ -e tgz/$sqlite3_tgz ]]; then
	    wget http://www.sqlite.org/2015/$sqlite3_tgz
354
355
356
357
358
359
360

361
362



363
364
365
366
367
368
369
370
371
372
# iup:1.0.2 
CSC_OPTIONS="-I$PREFIX/include -L$PREFIX/lib" $CHICKEN_INSTALL $PROX -D no-library-checks canvas-draw
# CSC_OPTIONS="-I$PREFIX/include -L$CSCLIBS" $CHICKEN_INSTALL $PROX -D no-library-checks -deploy -prefix $DEPLOYTARG canvas-draw

cd $BUILDHOME  

# install ducttape

cd ../ducttape
$CHICKEN_INSTALL




cd $BUILDHOME
echo You may need to add $LD_LIBRARY_PATH to your LD_LIBRARY_PATH variable, a setup-chicken4x.sh 
echo file can be found in the current directory which should work for setting up to run chicken4x


echo Testing iup
$PREFIX/bin/csi -b -eval '(use iup)(print "Success")'









>
|
|
>
>
>










365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
# iup:1.0.2 
CSC_OPTIONS="-I$PREFIX/include -L$PREFIX/lib" $CHICKEN_INSTALL $PROX -D no-library-checks canvas-draw
# CSC_OPTIONS="-I$PREFIX/include -L$CSCLIBS" $CHICKEN_INSTALL $PROX -D no-library-checks -deploy -prefix $DEPLOYTARG canvas-draw

cd $BUILDHOME  

# install ducttape
if [[ -e ../ducttape ]];then
  cd ../ducttape
  $CHICKEN_INSTALL
else
  echo "ducttape egg not found at ../ducttape. You will need to cd into the ducttape directory in the megatest distribution and run \"chicken-install\""
fi

cd $BUILDHOME
echo You may need to add $LD_LIBRARY_PATH to your LD_LIBRARY_PATH variable, a setup-chicken4x.sh 
echo file can be found in the current directory which should work for setting up to run chicken4x


echo Testing iup
$PREFIX/bin/csi -b -eval '(use iup)(print "Success")'


Added utils/watch-close-wait.sh version [76c32422d7].

















>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
psline=$(ps -F -u $USER | grep "mtest" |grep " -run " | egrep " -(target|reqtarg) "| head -1)
id=$(echo $psline|awk '{print $2}')
echo "Watching process for command line: $psline"
echo "  with PID=$id"
while true;do
  echo "CLOSE_WAIT: $(lsof -n | grep CLOSE_WAIT | grep $id | wc -l) ALL OPEN: $(lsof -n |grep $id|wc -l) ALL CLOSE_WAIT: $(netstat -ap 2> /dev/null| grep -i close_wait| wc -l)"
  sleep 1
done