Megatest

Check-in [59034f6b4d]
Login
Overview
Comment:Monitor based runs partially working
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 59034f6b4ddd9342bad1f09e33d616de74e5d37c
User & Date: matt on 2011-10-23 19:10:12
Other Links: manifest | tags
Context
2011-10-23
20:42
Lots of little bugs fixed related to monitor. Note that now multiple runs must be accomodated from the same launcher check-in: 98de2c2f8d user: matt tags: trunk
19:10
Monitor based runs partially working check-in: 59034f6b4d user: matt tags: trunk
16:42
Blindly duplicated run logic to use in monitor based launching check-in: f97980cf8c user: matt tags: trunk
Changes

Modified launch.scm from [9df8e123db] to [aa7b36aced].

362
363
364
365
366
367
368


369







370
371
372
373
374
375
376
	(launcher   (config-lookup *configdat* "jobtools"     "launcher"))
	(runscript  (config-lookup test-conf   "setup"        "runscript"))
	(ezsteps    (> (length (hash-table-ref/default test-conf "ezsteps" '())) 0)) ;; don't send all the steps, could be big
	(diskspace  (config-lookup test-conf   "requirements" "diskspace"))
	(memory     (config-lookup test-conf   "requirements" "memory"))
	(hosts      (config-lookup *configdat* "jobtools"     "workhosts"))
	(remote-megatest (config-lookup *configdat* "setup" "executable"))


	(local-megatest  (car (argv)))







	(test-sig   (conc "=" test-name ":" (item-list->path itemdat) "=")) ;; test-path is the full path including the item-path
	(work-area  #f)
	(toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
	(diskpath   #f)
	(cmdparms   #f)
	(fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	(mt-bindir-path #f))







>
>
|
>
>
>
>
>
>
>







362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
	(launcher   (config-lookup *configdat* "jobtools"     "launcher"))
	(runscript  (config-lookup test-conf   "setup"        "runscript"))
	(ezsteps    (> (length (hash-table-ref/default test-conf "ezsteps" '())) 0)) ;; don't send all the steps, could be big
	(diskspace  (config-lookup test-conf   "requirements" "diskspace"))
	(memory     (config-lookup test-conf   "requirements" "memory"))
	(hosts      (config-lookup *configdat* "jobtools"     "workhosts"))
	(remote-megatest (config-lookup *configdat* "setup" "executable"))
	;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to 
	;;                allow running from dashboard 
	(local-megatest  (let* ((lm  (car (argv)))
				(dir (pathname-directory lm))
				(exe (pathname-strip-directory lm)))
			   (conc dir "/"
				 (case (string->symbol exe)
				   ((dboard) "megatest")
				   ((dashboard) "megatest")
				   (else exe)))))
	(test-sig   (conc "=" test-name ":" (item-list->path itemdat) "=")) ;; test-path is the full path including the item-path
	(work-area  #f)
	(toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
	(diskpath   #f)
	(cmdparms   #f)
	(fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	(mt-bindir-path #f))

Modified runs.scm from [fe2d069603] to [28791daf01].

774
775
776
777
778
779
780
781






























782
783
784
785
786
787
788
789





























790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836

837
838
839
840
841
842
843
	(loop (hash-table-keys *waiting-queue*)))))))

;;======================================================================
;; New methodology. These routines will replace the above in time. For
;; now the code is duplicated. This stuff is initially used in the monitor
;; based code.
;;======================================================================































;; This is a duplicate of run-tests (which has been deprecated). Use this one instead of run tests.
;; keyvals
(define (runs:run-tests db target runname testpatts itempatts flags)
  (let* ((keys        (db-get-keys db))
	 (keyvallst   (keys:target->keyval keys target))
	 (run-id      (register-run db keys))  ;;  test-name)))
	 (deferred    '()) ;; delay running these since they have a waiton clause
	 (keepgoing   (hash-table-ref/default flags "-keepgoing")))





























    ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if
    ;; -keepgoing is specified
    (if (and (eq? *passnum* 0)
	     keepgoing)
	(begin
	  ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to 
	  ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends 
	  ;; on test A but test B reached the point on being registered as NOT_STARTED and test
	  ;; A failed for some reason then on re-run using -keepgoing the run can never complete.
	  (db:delete-tests-in-state db run-id "NOT_STARTED")
	  (db:set-tests-state-status db run-id test-names #f "FAIL" "NOT_STARTED" "FAIL")))
    (set! *passnum* (+ *passnum* 1))
    (let loop ((numtimes 0))
      (for-each 
       (lambda (test-name)
	 (if (runs:can-run-more-tests db)
	     (run-one-test db run-id test-name keyvallst)
	     ;; add some delay 
	     ;(sleep 2)
	     ))
       (tests:sort-by-priority-and-waiton test-names))
      ;; (run-waiting-tests db)
      (if keepgoing
	  (let ((estrem (db:estimated-tests-remaining db run-id)))
	    (if (and (> estrem 0)
		     (eq? *globalexitstatus* 0))
		(begin
		  (debug:print 1 "Keep going, estimated " estrem " tests remaining to run, will continue in 3 seconds ...")
		  (sleep 3)
		  (run-waiting-tests db)
		  (loop (+ numtimes 1)))))))))

(define (run-one-test db run-id runname test-name keyvallst item-patts flags)
  (debug:print 1 "Launching test " test-name)
  ;; All these vars might be referenced by the testconfig file reader
  (setenv "MT_TEST_NAME" test-name) ;; 
  (setenv "MT_RUNNAME"   runname)
  (set-megatest-env-vars db run-id) ;; these may be needed by the launching process
  (change-directory *toppath*)
  (let* ((test-path    (conc *toppath* "/tests/" test-name)) ;; could use test:get-testconfig here ...
	 (test-configf (conc test-path "/testconfig"))
	 (testexists   (and (file-exists? test-configf)(file-read-access? test-configf)))
	 (test-conf    (if testexists (read-config test-configf #f #t) (make-hash-table)))
	 (waiton       (let ((w (config-lookup test-conf "requirements" "waiton")))
			 (if (string? w)(string-split w)'())))
	 (force        (hash-table-ref/default flags "-force"))
	 (rerun        (hash-table-ref/default flags "-rerun"))

	 ;; Are these tags still used? I don't think so...
	 ;;(tags         (let ((t (config-lookup test-conf "setup" "tags")))
	 ;;       	 ;; we want our tags to be separated by commas and fully delimited by commas
	 ;;       	 ;; so that queries with "like" can tie to the commas at either end of each tag
	 ;;       	 ;; while also allowing the end user to freely use spaces and commas to separate tags
	 ;;       	 (if (string? t)(string-substitute (regexp "[,\\s]+") "," (conc "," t ",") #t)
	 ;;       	     '()))))








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


|


|

|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
















|
<
<













|












|
|
>







774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865


866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
	(loop (hash-table-keys *waiting-queue*)))))))

;;======================================================================
;; New methodology. These routines will replace the above in time. For
;; now the code is duplicated. This stuff is initially used in the monitor
;; based code.
;;======================================================================

;; register a test run with the db
(define (runs:register-run db keys keyvallst runname state status user)
  (let* ((keystr    (keys->keystr keys))
	 (comma     (if (> (length keys) 0) "," ""))
	 (andstr    (if (> (length keys) 0) " AND " ""))
	 (valslots  (keys->valslots keys)) ;; ?,?,? ...
	 (keyvals   (map cadr keyvallst))
	 (allvals   (append (list runname state status user) keyvals))
	 (qryvals   (append (list runname) keyvals))
	 (key=?str  (string-intersperse (map (lambda (k)(conc (key:get-fieldname k) "=?")) keys) " AND ")))
    (debug:print 3 "keys: " keys " allvals: " allvals " keyvals: " keyvals)
    (debug:print 2 "NOTE: using target " (string-intersperse keyvals "/") " for this run")
    (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and"
	(let ((res #f))
	  (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time" comma keystr ") VALUES (?,?,?,?,strftime('%s','now')" comma valslots ");")
		 allvals)
	  (apply sqlite3:for-each-row 
	   (lambda (id)
	     (set! res id))
	   db
	   (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");")))
	     ;(debug:print 4 "qry: " qry) 
	     qry)
	   qryvals)
	  (sqlite3:execute db "UPDATE runs SET state=?,status=? WHERE id=?;" state status res)
	  res) 
	(begin
	  (debug:print 0 "ERROR: Called without all necessary keys")
	  #f))))

;; This is a duplicate of run-tests (which has been deprecated). Use this one instead of run tests.
;; keyvals
(define (runs:run-tests db target runname test-patts item-patts user flags)
  (let* ((keys        (db-get-keys db))
	 (keyvallst   (keys:target->keyval keys target))
	 (run-id      (runs:register-run db keys keyvallst runname "new" "n/a" user))  ;;  test-name)))
	 (deferred    '()) ;; delay running these since they have a waiton clause
	 (keepgoing   (hash-table-ref/default flags "-keepgoing" #f))
	 (test-names  '()))
    ;; look up all tests matching the comma separated list of globs in
    ;; test-patts (using % as wildcard)
    (for-each 
     (lambda (patt)
       (let ((tests (glob (conc *toppath* "/tests/" (string-translate patt "%" "*")))))
	 (set! tests (filter (lambda (test)(file-exists? (conc test "/testconfig"))) tests))
	 (set! test-names (append test-names 
				  (map (lambda (testp)
					 (last (string-split testp "/")))
				       tests)))))
     (string-split test-patts ","))

    ;; now remove duplicates
    (set! test-names (delete-duplicates test-names))

    (debug:print 0 "INFO: test names " test-names)

    ;; now add non-directly referenced dependencies (i.e. waiton)
    ;; could cache all these since they need to be read again ...
    ;; FIXME SOMEDAY
    (for-each 
     (lambda (test-name)
       (let* ((config (test:get-testconfig test-name #f))
	      (waiton (config-lookup config "requirements" "waiton")))
	 (if (and waiton (not (member waiton test-names)))
	     (set! test-names (append test-names (list waiton))))))
     test-names)

    ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if
    ;; -keepgoing is specified
    (if (and (eq? *passnum* 0)
	     keepgoing)
	(begin
	  ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to 
	  ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends 
	  ;; on test A but test B reached the point on being registered as NOT_STARTED and test
	  ;; A failed for some reason then on re-run using -keepgoing the run can never complete.
	  (db:delete-tests-in-state db run-id "NOT_STARTED")
	  (db:set-tests-state-status db run-id test-names #f "FAIL" "NOT_STARTED" "FAIL")))
    (set! *passnum* (+ *passnum* 1))
    (let loop ((numtimes 0))
      (for-each 
       (lambda (test-name)
	 (if (runs:can-run-more-tests db)
	     (run:test db run-id runname test-name keyvallst item-patts flags)


	     ))
       (tests:sort-by-priority-and-waiton test-names))
      ;; (run-waiting-tests db)
      (if keepgoing
	  (let ((estrem (db:estimated-tests-remaining db run-id)))
	    (if (and (> estrem 0)
		     (eq? *globalexitstatus* 0))
		(begin
		  (debug:print 1 "Keep going, estimated " estrem " tests remaining to run, will continue in 3 seconds ...")
		  (sleep 3)
		  (run-waiting-tests db)
		  (loop (+ numtimes 1)))))))))

(define (run:test db run-id runname test-name keyvallst item-patts flags)
  (debug:print 1 "Launching test " test-name)
  ;; All these vars might be referenced by the testconfig file reader
  (setenv "MT_TEST_NAME" test-name) ;; 
  (setenv "MT_RUNNAME"   runname)
  (set-megatest-env-vars db run-id) ;; these may be needed by the launching process
  (change-directory *toppath*)
  (let* ((test-path    (conc *toppath* "/tests/" test-name)) ;; could use test:get-testconfig here ...
	 (test-configf (conc test-path "/testconfig"))
	 (testexists   (and (file-exists? test-configf)(file-read-access? test-configf)))
	 (test-conf    (if testexists (read-config test-configf #f #t) (make-hash-table)))
	 (waiton       (let ((w (config-lookup test-conf "requirements" "waiton")))
			 (if (string? w)(string-split w)'())))
	 (force        (hash-table-ref/default flags "-force" #f))
	 (rerun        (hash-table-ref/default flags "-rerun" #f))
	 (keepgoing    (hash-table-ref/default flags "-keepgoing" #f))
	 ;; Are these tags still used? I don't think so...
	 ;;(tags         (let ((t (config-lookup test-conf "setup" "tags")))
	 ;;       	 ;; we want our tags to be separated by commas and fully delimited by commas
	 ;;       	 ;; so that queries with "like" can tie to the commas at either end of each tag
	 ;;       	 ;; while also allowing the end user to freely use spaces and commas to separate tags
	 ;;       	 (if (string? t)(string-substitute (regexp "[,\\s]+") "," (conc "," t ",") #t)
	 ;;       	     '()))))
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
				    (loop (car tal)(cdr tal)))))))
		    (change-directory test-path)
		    ;; this block is here only to inform the user early on
		    (if (file-exists? runconfigf)
			(setup-env-defaults db runconfigf run-id *already-seen-runconfig-info*)
			(debug:print 0 "WARNING: You do not have a run config file: " runconfigf))
		    (debug:print 4 "run-id: " run-id " test-name: " test-name " item-path: " item-path " testdat: " (test:get-status testdat) " test-state: " (test:get-state testdat))
		    (case (if force-run ;; (args:get-arg "-force")
			      'NOT_STARTED
			      (if testdat
				  (string->symbol (test:get-state testdat))
				  'failed-to-insert))
		      ((failed-to-insert)
		       (debug:print 0 "ERROR: Failed to insert the record into the db"))
		      ((NOT_STARTED COMPLETED)







|







973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
				    (loop (car tal)(cdr tal)))))))
		    (change-directory test-path)
		    ;; this block is here only to inform the user early on
		    (if (file-exists? runconfigf)
			(setup-env-defaults db runconfigf run-id *already-seen-runconfig-info*)
			(debug:print 0 "WARNING: You do not have a run config file: " runconfigf))
		    (debug:print 4 "run-id: " run-id " test-name: " test-name " item-path: " item-path " testdat: " (test:get-status testdat) " test-state: " (test:get-state testdat))
		    (case (if force ;; (args:get-arg "-force")
			      'NOT_STARTED
			      (if testdat
				  (string->symbol (test:get-state testdat))
				  'failed-to-insert))
		      ((failed-to-insert)
		       (debug:print 0 "ERROR: Failed to insert the record into the db"))
		      ((NOT_STARTED COMPLETED)

Modified tasks.scm from [b3487ed7af] to [3e542e1d23].

227
228
229
230
231
232
233
234








235
236
  (sqlite3:execute db "DELETE FROM monitors WHERE pid=? AND hostname=?;"
		   (current-process-id)
		   (get-host-name)))

(define (tasks:start-run db task)
  ;; Starting run #(3 run matt reset ubuntu/afs/tmp ww44 % % 1319368208.0 1319386680.0)
  ;; Starting run #(5 run matt reset centos/nfs/nada ww42 all all 1319371306.0 1319386801.0)










  (print "Starting run " task))







|
>
>
>
>
>
>
>
>
|
<
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243

  (sqlite3:execute db "DELETE FROM monitors WHERE pid=? AND hostname=?;"
		   (current-process-id)
		   (get-host-name)))

(define (tasks:start-run db task)
  ;; Starting run #(3 run matt reset ubuntu/afs/tmp ww44 % % 1319368208.0 1319386680.0)
  ;; Starting run #(5 run matt reset centos/nfs/nada ww42 all all 1319371306.0 1319386801.0)
  (print "Starting run " task)
  ;; sillyness, just call the damn routine with the task vector and be done with it. FIXME SOMEDAY
  (runs:run-tests db
		  (tasks:task-get-target task)
		  (tasks:task-get-name   task)
		  (tasks:task-get-test   task)
		  (tasks:task-get-item   task)
		  (tasks:task-get-owner  task)
		  (make-hash-table))
  )