Megatest

Check-in [231a14dc2d]
Login
Overview
Comment:Added code to do rerun-cleans for subruns, and to check again for the number of cpus on a host when it registers 1 cpu (for ssh failures)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.65 | v1.6529
Files: files | file ages | folders
SHA1: 231a14dc2dd954ba25edc51a72e662daa53a192e
User & Date: jmoon18 on 2019-05-31 14:17:13
Other Links: branch diff | manifest | tags
Context
2019-05-31
16:08
Updated version tag check-in: e03d5c75f8 user: jmoon18 tags: v1.65, v1.6529
14:17
Added code to do rerun-cleans for subruns, and to check again for the number of cpus on a host when it registers 1 cpu (for ssh failures) check-in: 231a14dc2d user: jmoon18 tags: v1.65, v1.6529
2019-04-12
16:55
Added log dir output to mtutil run calls check-in: ec43e24ac4 user: jmoon18 tags: v1.65, v1.6528
Changes

Modified common.scm from [b1d85b703a] to [18d6f81858].

1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
		    (debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate)
		    (host-last-used-set! rec curr-time)
		    new-best)
		  (if (null? tal) #f (loop (car tal)(cdr tal) best-host)))))))))

(define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f))
  (let* ((loadavg (common:get-cpu-load remote-host))
	 (numcpus (if (< 1 numcpus-in) ;; not possible
		      (common:get-num-cpus remote-host)
		      numcpus-in))
	 (maxload (if force-maxload
		      maxload-in
		      (max maxload-in 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME?
	 (first   (car loadavg))
	 (next    (cadr loadavg))







|







1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
		    (debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate)
		    (host-last-used-set! rec curr-time)
		    new-best)
		  (if (null? tal) #f (loop (car tal)(cdr tal) best-host)))))))))

(define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f))
  (let* ((loadavg (common:get-cpu-load remote-host))
	 (numcpus (if (<= 1 numcpus-in) ;; not possible to have zero.  If we get 1, it's possible that we got the previous default, and we should check again
		      (common:get-num-cpus remote-host)
		      numcpus-in))
	 (maxload (if force-maxload
		      maxload-in
		      (max maxload-in 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME?
	 (first   (car loadavg))
	 (next    (cadr loadavg))

Modified db.scm from [09f2a4da9d] to [b676e109a8].

228
229
230
231
232
233
234




235

236







237
238
239
240
241
242
243
           (if (not readyexists)
               (common:simple-file-lock-and-wait lockfname))
           (let ((db      (sqlite3:open-database fname)))
             (sqlite3:set-busy-handler! db (make-busy-timeout 136000))
             (sqlite3:execute db "PRAGMA synchronous = 0;")
             (if (not file-exists)
                 (begin




                   

                   (if (and (configf:lookup *configdat* "setup" "use-wal")







                            (string-match "^/tmp/.*" fname)) ;; this is a file in /tmp
                       (sqlite3:execute db "PRAGMA journal_mode=WAL;")
                       (debug:print 2 *default-log-port* "Creating " fname " in NON-WAL mode."))
                   (initproc db)))
             (if (not readyexists)
                 (begin
                   (common:simple-file-release-lock lockfname)







>
>
>
>
|
>
|
>
>
>
>
>
>
>







228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
           (if (not readyexists)
               (common:simple-file-lock-and-wait lockfname))
           (let ((db      (sqlite3:open-database fname)))
             (sqlite3:set-busy-handler! db (make-busy-timeout 136000))
             (sqlite3:execute db "PRAGMA synchronous = 0;")
             (if (not file-exists)
                 (begin
                   (if (and (configf:lookup *configdat* "setup" "tmp_mode") (string-match "^/tmp/.*" fname))
                      (begin
                       (print "DEBUG: Setting tmp_mode for " fname) 
                       (sqlite3:execute db (configf:lookup *configdat* "setup" "tmp_mode"))
                      )
                   )  
                   (if (and (configf:lookup *configdat* "setup" "nfs_mode") (not (string-match "^/tmp/.*" fname)))
                       (begin
                        (print "DEBUG: Setting nfs_mode for " fname)
                        (sqlite3:execute db (configf:lookup *configdat* "setup" "nfs_mode"))
                       )
                   )  
                   (if (and (not (or (configf:lookup *configdat* "setup" "tmp_mode") (configf:lookup *configdat* "setup" "nfs_mode")))  
                            (configf:lookup *configdat* "setup" "use-wal")
                            (string-match "^/tmp/.*" fname)) ;; this is a file in /tmp
                       (sqlite3:execute db "PRAGMA journal_mode=WAL;")
                       (debug:print 2 *default-log-port* "Creating " fname " in NON-WAL mode."))
                   (initproc db)))
             (if (not readyexists)
                 (begin
                   (common:simple-file-release-lock lockfname)

Modified mtut.scm from [0941039f2d] to [1e2f303d96].

1022
1023
1024
1025
1026
1027
1028
1029

1030
1031
1032
1033
1034
1035
1036
1037
1038
                             ;;        (if (args:get-arg "-target") 
                             ;;             (if (string= (args:get-arg "-target") runkey) (area-allowed? area "area-needs-to-be-run" runkey contour #f) #f) 
                             ;;             (area-allowed? area "area-needs-to-be-run" runkey contour #f))))
			    ;;(print "Area Allowed: " (area-allowed? area "area-needs-to-be-run" runkey contour #f))
;Add code to check whether area is valid
			     (if 
                   ;; This code checks whether the target has been passed in via argument, and only runs the specified target
                   (and (< packets-generated (or (string->number (if (configf:lookup mtconf "setup" "max_packets_per_run") (configf:lookup mtconf "setup" "max_packets_per_run") "10000" )) 10000))  (if (args:get-arg "-target") 

                     (if (string= (args:get-arg "-target") runkey) (area-allowed? area "area-needs-to-be-run" runkey contour #f) #f) 
                     (area-allowed? area "area-needs-to-be-run" runkey contour #f)))
       
			     (let* ((script (car cmd))
				(params (cdr cmd))
				(cmd    (conc script " " contour " " area " " runkey " " std-runname " " action " " params))
				(res    (handle-exceptions
					    exn
					    #f







|
>
|
|







1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
                             ;;        (if (args:get-arg "-target") 
                             ;;             (if (string= (args:get-arg "-target") runkey) (area-allowed? area "area-needs-to-be-run" runkey contour #f) #f) 
                             ;;             (area-allowed? area "area-needs-to-be-run" runkey contour #f))))
			    ;;(print "Area Allowed: " (area-allowed? area "area-needs-to-be-run" runkey contour #f))
;Add code to check whether area is valid
			     (if 
                   ;; This code checks whether the target has been passed in via argument, and only runs the specified target
                   (and (< packets-generated (or (string->number (if (configf:lookup mtconf "setup" "max_packets_per_run") (configf:lookup mtconf "setup" "max_packets_per_run") "10000" )) 10000))  
                        (if (args:get-arg "-target") 
                            (if (string= (args:get-arg "-target") runkey) (area-allowed? area "area-needs-to-be-run" runkey contour #f) #f) 
                                (area-allowed? area "area-needs-to-be-run" runkey contour #f)))
       
			     (let* ((script (car cmd))
				(params (cdr cmd))
				(cmd    (conc script " " contour " " area " " runkey " " std-runname " " action " " params))
				(res    (handle-exceptions
					    exn
					    #f

Modified subrun.scm from [ad3bd444b3] to [a4306ac505].

133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
(define (subrun:launch-cmd test-run-dir)
  (if (subrun:subrun-removed? test-run-dir)
      (subrun:unset-subrun-removed test-run-dir))      

  (let* ((log-prefix "run")
         (switches (subrun:selector+log-switches test-run-dir log-prefix))
         (run-wait #t)
         (cmd      (conc "megatest -run "switches" "
                         (if run-wait "-run-wait " ""))))
    cmd))


(define (subrun:sanitize-path inpath)
  (let* ((insane-pattern (irregex "[^[a-zA-Z0-9_\\-]")))
    (regex#string-substitute insane-pattern "_" inpath #t)))







|







133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
(define (subrun:launch-cmd test-run-dir)
  (if (subrun:subrun-removed? test-run-dir)
      (subrun:unset-subrun-removed test-run-dir))      

  (let* ((log-prefix "run")
         (switches (subrun:selector+log-switches test-run-dir log-prefix))
         (run-wait #t)
         (cmd      (conc "megatest -rerun-clean "switches" "
                         (if run-wait "-run-wait " ""))))
    cmd))


(define (subrun:sanitize-path inpath)
  (let* ((insane-pattern (irregex "[^[a-zA-Z0-9_\\-]")))
    (regex#string-substitute insane-pattern "_" inpath #t)))