Megatest

Check-in [7bb000c05a]
Login
Overview
Comment:Split find and mark incomplete to iterate over runs, fixed logic bug in select statements for find and mark.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.55
Files: files | file ages | folders
SHA1: 7bb000c05a4ba70ed5dd8ea2714420c09215caef
User & Date: matt on 2013-10-22 23:10:55
Other Links: branch diff | manifest | tags
Context
2013-10-22
23:51
Upped the timeout on marking INCOMPLETE in the fullrun test check-in: 5b7a69fb1d user: matt tags: v1.55
23:10
Split find and mark incomplete to iterate over runs, fixed logic bug in select statements for find and mark. check-in: 7bb000c05a user: matt tags: v1.55
2013-10-21
20:25
Added ability to control deadtime from config. Set default deadtime to 1800 seconds check-in: aba5b97c7d user: mrwellan tags: v1.55
Changes

Modified db.scm from [a74f9a32c4] to [0e0b9ca4ad].

462
463
464
465
466
467
468






469
470
471
472
473
474





475


476
477
478
479
480

481


482
483
484
485
486


487
488

489
490
491
492
493
494
495
      ((< mver megatest-version)
       (db:set-var db "MEGATEST_VERSION" megatest-version))))))

;;======================================================================
;; M A I N T E N A N C E
;;======================================================================







(define (db:find-and-mark-incomplete db)
  (let* ((incompleted '())
	 (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
	 (deadtime     (if (and deadtime-str
				(string->number deadtime-str))
			   (string->number deadtime-str)





			   1800)))


    (sqlite3:for-each-row 
     (lambda (test-id)
       (set! incompleted (cons test-id incompleted)))
     db
     "SELECT id FROM tests WHERE event_time<? AND state IN ('RUNNING','REMOTEHOSTSTART');"

     (- (current-seconds) deadtime)) ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes


    (sqlite3:for-each-row
     (lambda (test-id)
       (set! incompleted (cons test-id incompleted)))
     db
     "SELECT id FROM tests WHERE event_time<? AND state IN ('LAUNCHED');"


     (- (current-seconds)(* 60 60 24))) ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
    ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE.

    (if (> (length incompleted) 0)
	(begin
	  (debug:print 0 "WARNING: Marking test(s); " (string-intersperse (map conc incompleted) ", ") " as INCOMPLETE")
	  (sqlite3:execute 
	   db
	   (conc "UPDATE tests SET state='INCOMPLETE' WHERE id IN (" 
		 (string-intersperse (map conc incompleted) ",")







>
>
>
>
>
>
|





>
>
>
>
>
|
>
>
|
|
|
|
|
>
|
>
>
|
|
|
|
|
>
>
|

>







462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
      ((< mver megatest-version)
       (db:set-var db "MEGATEST_VERSION" megatest-version))))))

;;======================================================================
;; M A I N T E N A N C E
;;======================================================================

;;  select end_time-now from
;;      (select testname,item_path,event_time+run_duration as
;;                          end_time,strftime('%s','now') as now from tests where state in
;;      ('RUNNING','REMOTEHOSTSTART','LAUNCED'));


(define (db:find-and-mark-incomplete db #!key (ovr-deadtime #f))
  (let* ((incompleted '())
	 (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
	 (deadtime     (if (and deadtime-str
				(string->number deadtime-str))
			   (string->number deadtime-str)
			   7200)) ;; two hours
	 (run-ids      (db:get-run-ids db))) ;; iterate over runs to divy up the calls
    (if (number? ovr-deadtime)(set! deadtime ovr-deadtime))
    (for-each
     (lambda (run-id)

       ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
       ;;
       (sqlite3:for-each-row 
	(lambda (test-id)
	  (set! incompleted (cons test-id incompleted)))
	db
	"SELECT id FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time - run_duration) > ? AND state IN ('RUNNING','REMOTEHOSTSTART');"
	run-id deadtime)

       ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
       ;;
       (sqlite3:for-each-row
	(lambda (test-id)
	  (set! incompleted (cons test-id incompleted)))
	db
	"SELECT id FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time - run_duration) > ? AND state IN ('LAUNCHED');"
	run-id (* 60 60 24)))
     run-ids)
       
    ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE.
    ;;
    (if (> (length incompleted) 0)
	(begin
	  (debug:print 0 "WARNING: Marking test(s); " (string-intersperse (map conc incompleted) ", ") " as INCOMPLETE")
	  (sqlite3:execute 
	   db
	   (conc "UPDATE tests SET state='INCOMPLETE' WHERE id IN (" 
		 (string-intersperse (map conc incompleted) ",")
875
876
877
878
879
880
881








882
883
884
885
886
887
888
			    "unlocked"
			    "locked")))) ;; semi-failsafe
    (sqlite3:execute db "UPDATE runs SET state=? WHERE id=?;" newlockval run-id)
    (sqlite3:execute db "INSERT INTO access_log (user,accessed,args) VALUES(?,strftime('%s','now'),?);"
		     user (conc newlockval " " run-id))
    (debug:print-info 1 "" newlockval " run number " run-id)))









;;======================================================================
;; K E Y S
;;======================================================================

;; get key val pairs for a given run-id
;; ( (FIELDNAME1 keyval1) (FIELDNAME2 keyval2) ... )
(define (db:get-key-val-pairs db run-id)







>
>
>
>
>
>
>
>







894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
			    "unlocked"
			    "locked")))) ;; semi-failsafe
    (sqlite3:execute db "UPDATE runs SET state=? WHERE id=?;" newlockval run-id)
    (sqlite3:execute db "INSERT INTO access_log (user,accessed,args) VALUES(?,strftime('%s','now'),?);"
		     user (conc newlockval " " run-id))
    (debug:print-info 1 "" newlockval " run number " run-id)))

(define (db:get-run-ids db)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (id)
       (set! res (cons id res)))
     db 
     "SELECT id FROM runs;")))

;;======================================================================
;; K E Y S
;;======================================================================

;; get key val pairs for a given run-id
;; ( (FIELDNAME1 keyval1) (FIELDNAME2 keyval2) ... )
(define (db:get-key-val-pairs db run-id)

Modified runs.scm from [31ee3d42e6] to [e64acf4e6f].

721
722
723
724
725
726
727





728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
		    ))))))))

;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... >
(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry)
  ;; At this point the list of parent tests is expanded 
  ;; NB// Should expand items here and then insert into the run queue.
  (debug:print 5 "test-records: " test-records ", flags: " (hash-table->alist flags))





  (let ((run-info              (cdb:remote-run db:get-run-info #f run-id))
	(tests-info            (mt:get-tests-for-run run-id #f '() '())) ;;  qryvals: "id,testname,item_path"))
	(sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
	(test-registry         (make-hash-table))
	(registry-mutex        (make-mutex))
	(num-retries           0)
	(max-retries           (config-lookup *configdat* "setup" "maxretries"))
	(max-concurrent-jobs   (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
				 (if (and mcj (string->number mcj))
				     (string->number mcj)
				     1))) ;; length of the register queue ahead
	(reglen                (if (number? reglen-in) reglen-in 1))
	(last-time-incomplete  (- (current-seconds) 610)))

    ;; Initialize the test-registery hash with tests that already have a record
    ;; convert state to symbol and use that as the hash value
    (for-each (lambda (trec)
		(let ((id (db:test-get-id        trec))
		      (tn (db:test-get-testname  trec))
		      (ip (db:test-get-item-path trec))
		      (st (db:test-get-state     trec)))
		  (if (not (equal? st "DELETED"))
		      (hash-table-set! test-registry (runs:make-full-test-name tn ip) (string->symbol st)))))
	      tests-info)
    (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100))

    (let loop ((hed         (car sorted-test-names))
	       (tal         (cdr sorted-test-names))
	       (reg         '()) ;; registered, put these at the head of tal 
	       (reruns      '()))
      (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns))

      ;; Here we mark any old defunct tests as incomplete. Do this every five minutes
      (if (> (current-seconds)(+ last-time-incomplete 300))
	  (begin
	    (set! last-time-incomplete (current-seconds))
	    (cdb:remote-run db:find-and-mark-incomplete #f)))

      ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns)
      (let* ((test-record (hash-table-ref test-records hed))
	     (test-name   (tests:testqueue-get-testname test-record))







>
>
>
>
>












|



















|
|







721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
		    ))))))))

;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... >
(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry)
  ;; At this point the list of parent tests is expanded 
  ;; NB// Should expand items here and then insert into the run queue.
  (debug:print 5 "test-records: " test-records ", flags: " (hash-table->alist flags))

  ;; Do mark-and-find clean up of db before starting runing of quue
  ;;
  (cdb:remote-run db:find-and-mark-incomplete #f)

  (let ((run-info              (cdb:remote-run db:get-run-info #f run-id))
	(tests-info            (mt:get-tests-for-run run-id #f '() '())) ;;  qryvals: "id,testname,item_path"))
	(sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
	(test-registry         (make-hash-table))
	(registry-mutex        (make-mutex))
	(num-retries           0)
	(max-retries           (config-lookup *configdat* "setup" "maxretries"))
	(max-concurrent-jobs   (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
				 (if (and mcj (string->number mcj))
				     (string->number mcj)
				     1))) ;; length of the register queue ahead
	(reglen                (if (number? reglen-in) reglen-in 1))
	(last-time-incomplete  (current-seconds)))

    ;; Initialize the test-registery hash with tests that already have a record
    ;; convert state to symbol and use that as the hash value
    (for-each (lambda (trec)
		(let ((id (db:test-get-id        trec))
		      (tn (db:test-get-testname  trec))
		      (ip (db:test-get-item-path trec))
		      (st (db:test-get-state     trec)))
		  (if (not (equal? st "DELETED"))
		      (hash-table-set! test-registry (runs:make-full-test-name tn ip) (string->symbol st)))))
	      tests-info)
    (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100))

    (let loop ((hed         (car sorted-test-names))
	       (tal         (cdr sorted-test-names))
	       (reg         '()) ;; registered, put these at the head of tal 
	       (reruns      '()))
      (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns))

      ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
      (if (> (current-seconds)(+ last-time-incomplete 900))
	  (begin
	    (set! last-time-incomplete (current-seconds))
	    (cdb:remote-run db:find-and-mark-incomplete #f)))

      ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns)
      (let* ((test-record (hash-table-ref test-records hed))
	     (test-name   (tests:testqueue-get-testname test-record))