Megatest

Diff
Login

Differences From Artifact [ddd005b3a5]:

To Artifact [94b42101fa]:


1002
1003
1004
1005
1006
1007
1008






































1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047

1048
1049
1050
1051
1052
1053
1054
  (let* ((dbdat (dbfile:open-db dbstruct run-id dbinit)))
    (set! *db-write-access* (not (dbr:dbdat-read-only dbdat)))
    ;; (mutex-unlock! *db-open-mutex*)
    dbdat))

(define dbfile:db-init-proc (make-parameter #f))







































;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;")
;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no
;;
(define (db:with-db dbstruct run-id r/w proc . params)
  (assert dbstruct "FATAL: db:with-db called with dbstruct "#f)
  (assert (dbr:dbstruct? dbstruct) "FATAL: dbstruct is "dbstruct)
  (let* ((use-mutex (> *api-process-request-count* 25)) ;; risk of db corruption
	 (have-struct (dbr:dbstruct? dbstruct))
         (dbdat     (if have-struct                ;; this stuff just allows us to call with a db handle directly
			(db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id)
			#f))
	 (db        (if have-struct                ;; this stuff just allows us to call with a db handle directly
			(dbr:dbdat-dbh dbdat)
			dbstruct))
	 (fname     (if dbdat
			(dbr:dbdat-dbfile dbdat)
			"nofilenameavailable"))
	 (jfile     (conc fname"-journal"))
	 #;(subdb     (if have-struct
			(dbfile:get-subdb dbstruct run-id)
			#f))
	 ) ;; was 25
    (assert (sqlite3:database? db) "FATAL: db:with-db, db is not a database, db="db", fname="fname)
    (if (file-exists? jfile)
	(begin
	  (dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load")
	  (thread-sleep! 0.2)))
    (if (and use-mutex
	     (common:low-noise-print 120 "over-50-parallel-api-requests"))
	(dbfile:print-err *api-process-request-count* " parallel api requests being processed in process "
			  (current-process-id))) ;;  ", throttling access"))
    (condition-case
	(begin
	  (if use-mutex (mutex-lock! *db-with-db-mutex*))
	  (let ((res (apply proc dbdat db params))) ;; the actual call is here.
	    (if use-mutex (mutex-unlock! *db-with-db-mutex*))
	    ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w))
	    (if dbdat
		(dbfile:add-dbdat dbstruct run-id dbdat))

	    res))
      (exn (io-error)
	   (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
      (exn (corrupt)
	   (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed."))
      (exn (busy)
	   (db:generic-error-printout exn "ERROR: database " fname







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

















|
<
|
<
<

















>







1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064

1065


1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
  (let* ((dbdat (dbfile:open-db dbstruct run-id dbinit)))
    (set! *db-write-access* (not (dbr:dbdat-read-only dbdat)))
    ;; (mutex-unlock! *db-open-mutex*)
    dbdat))

(define dbfile:db-init-proc (make-parameter #f))

;; create a dropping near the db file in a qif dir
;; use count of such files to gate queries (queries in flight)
;;
(define (dbfile:wait-for-qif fname run-id params)
  (let* ((thedir  (pathname-directory fname))
	 (destdir (conc thedir"/qif"))
	 (uniqn   (get-area-path-signature (conc run-id params)))
	 (crumbn  (conc destdir"/"(current-seconds)"-"uniqn"."(current-process-id))))
    (if (not (file-exists? destdir))(create-directory destdir #t))
    (let loop ((count 0))
      (let* ((currlks (glob (conc destdir"/*")))
	     (numqrys (length currlks))
	     (delayval (cond
			((> numqrys 50)
			 (if (> numqrys 50)
			     (for-each
			      (lambda (f)
				(if (> (- (current-seconds)
					  (file-modification-time f))
				       10)
				    (begin
				      (dbfile:print-err "Removing qif file "f" older than 10 seconds")
				      (delete-file* f))))
			      currlks))
			 1)
			((> numqrys 25) 0.25)
			((> numqrys 10) 0.1)
			(else #f))))
	(if (and delayval
		 (< count 5))
	    (begin
	      (thread-sleep! delayval)
	      (loop (+ count 1))))))
    (with-output-to-file crumbn
      (lambda ()
	(print fname" "run-id" "params)))
    crumbn))
  
;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;")
;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no
;;
(define (db:with-db dbstruct run-id r/w proc . params)
  (assert dbstruct "FATAL: db:with-db called with dbstruct "#f)
  (assert (dbr:dbstruct? dbstruct) "FATAL: dbstruct is "dbstruct)
  (let* ((use-mutex (> *api-process-request-count* 25)) ;; risk of db corruption
	 (have-struct (dbr:dbstruct? dbstruct))
         (dbdat     (if have-struct                ;; this stuff just allows us to call with a db handle directly
			(db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id)
			#f))
	 (db        (if have-struct                ;; this stuff just allows us to call with a db handle directly
			(dbr:dbdat-dbh dbdat)
			dbstruct))
	 (fname     (if dbdat
			(dbr:dbdat-dbfile dbdat)
			"nofilenameavailable"))
	 (jfile     (conc fname"-journal")))

	 ;; (crumbfile (dbfile:wait-for-qif fname run-id params)))


    (assert (sqlite3:database? db) "FATAL: db:with-db, db is not a database, db="db", fname="fname)
    (if (file-exists? jfile)
	(begin
	  (dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load")
	  (thread-sleep! 0.2)))
    (if (and use-mutex
	     (common:low-noise-print 120 "over-50-parallel-api-requests"))
	(dbfile:print-err *api-process-request-count* " parallel api requests being processed in process "
			  (current-process-id))) ;;  ", throttling access"))
    (condition-case
	(begin
	  (if use-mutex (mutex-lock! *db-with-db-mutex*))
	  (let ((res (apply proc dbdat db params))) ;; the actual call is here.
	    (if use-mutex (mutex-unlock! *db-with-db-mutex*))
	    ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w))
	    (if dbdat
		(dbfile:add-dbdat dbstruct run-id dbdat))
	    ;; (delete-file* crumbfile)
	    res))
      (exn (io-error)
	   (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
      (exn (corrupt)
	   (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed."))
      (exn (busy)
	   (db:generic-error-printout exn "ERROR: database " fname
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200









1201











































  (let ((gotlock (dbfile:simple-file-lock-and-wait fname expire-time: expire-time)))
    (if gotlock
	(let ((res (proc)))
	  (dbfile:simple-file-release-lock fname)
	  res)
	(assert #t "FATAL: simple file lock never got a lock."))))
  
(define (db:get-cache-stmth dbdat run-id db stmt)
  (let* (;; (dbdat       (dbfile:get-dbdat dbstruct run-id))
	 (stmt-cache  (dbr:dbdat-stmt-cache dbdat))
	 ;; (stmth       (db:hoh-get stmt-cache db stmt))
	 (stmth       (hash-table-ref/default stmt-cache stmt #f)))
    (or stmth
	(let* ((newstmth (sqlite3:prepare db stmt)))
	  ;; (db:hoh-set! stmt-cache db stmt newstmth)
	  (hash-table-set! stmt-cache stmt newstmth)
	  newstmth))))










)


















































|










>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
  (let ((gotlock (dbfile:simple-file-lock-and-wait fname expire-time: expire-time)))
    (if gotlock
	(let ((res (proc)))
	  (dbfile:simple-file-release-lock fname)
	  res)
	(assert #t "FATAL: simple file lock never got a lock."))))
  
(define (db:get-cache-stmth dbdat db stmt)
  (let* (;; (dbdat       (dbfile:get-dbdat dbstruct run-id))
	 (stmt-cache  (dbr:dbdat-stmt-cache dbdat))
	 ;; (stmth       (db:hoh-get stmt-cache db stmt))
	 (stmth       (hash-table-ref/default stmt-cache stmt #f)))
    (or stmth
	(let* ((newstmth (sqlite3:prepare db stmt)))
	  ;; (db:hoh-set! stmt-cache db stmt newstmth)
	  (hash-table-set! stmt-cache stmt newstmth)
	  newstmth))))

(define (db:have-incompletes? dbstruct run-id ovr-deadtime)
  (let* ((incompleted '())
	 (oldlaunched '())
	 (toplevels   '())
	 ;; (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
	 (deadtime  (or ovr-deadtime 72000))) ;; twenty hours
    (db:with-db
     dbstruct run-id #f
     (lambda (dbdat db)
       
       ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
       ;;
       ;; HOWEVER: this code in run:test seems to work fine
       ;;              (> (- (current-seconds)(+ (db:test-get-event_time testdat)
       ;;                     (db:test-get-run_duration testdat)))
       ;;                    600) 
       ;; (db:delay-if-busy dbdat)
       (sqlite3:for-each-row 
        (lambda (test-id run-dir uname testname item-path)
          (if (and (equal? uname "n/a")
                   (equal? item-path "")) ;; this is a toplevel test
              ;; what to do with toplevel? call rollup?
              (begin
                (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels)))
                ;; (print-info "Found old toplevel test in RUNNING state, test-id=" test-id))
              (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))
        (db:get-cache-stmth dbdat db
        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');")
        run-id deadtime)

       ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
       ;;
       ;; (db:delay-if-busy dbdat)
       (sqlite3:for-each-row
        (lambda (test-id run-dir uname testname item-path)
          (if (and (equal? uname "n/a")
                   (equal? item-path "")) ;; this is a toplevel test
              ;; what to do with toplevel? call rollup?
              (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
              (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))
        (db:get-cache-stmth dbdat db
        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');")
        run-id)
       
       ;; (print-info "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")
       (if (and (null? incompleted)
                (null? oldlaunched)
                (null? toplevels))
           #f
           #t)))))


)