Megatest

Changes On Branch 213021e02d0365af
Login

Changes In Branch v1.6596-reload-tests-paths Excluding Merge-Ins

This is equivalent to a diff from 2769e4b7c9 to 213021e02d

2021-03-01
17:42
Manually patched in the new view check-in: f5206150ee user: mrwellan tags: v1.6569-new-view
2021-01-26
14:00
Fix for the > crash. Maybe... Leaf check-in: 5a05fc04ff user: matt tags: v1.6569-gt-crash-fix
2021-01-25
12:03
rebased lazy-queue rollup check-in: 07ab120544 user: matt tags: v1.65-lazyqueue-items-rollup
2021-01-15
22:46
begin diet check-in: badd71f3b3 user: matt tags: v1.6569-diet
21:34
eval-string-in-environment if was disabled, re-enabled check-in: 9564772564 user: matt tags: v1.6569-reenable-eval-if
2021-01-08
11:42
enable custom value for max delay between archive time and test last update time Leaf check-in: 86a3d1148e user: pjhatwal tags: v1.6569-refactor
2020-11-25
14:12
Merged the reload configs feature for Robert check-in: 3436dd042e user: mrwellan tags: v1.65
12:00
Fixed issues in server gating code Leaf check-in: 063273e8cb user: mrwellan tags: v1.6569-server-gate-fix
2020-11-24
22:27
Added support for resetting run - allows to reload tests-paths to add tests to a run part way though. Just run megatest -clean-cache -runname $MT_RUNNAME Leaf check-in: 213021e02d user: mrwellan tags: v1.6596-reload-tests-paths
2020-10-13
16:46
Changed version from 69 to 76. No other changes. Will compile with chicken 13 check-in: 87ca35010f user: mmgraham tags: v1.65, v1.6576
2020-10-12
16:49
Reduced message from failed to info. Reverted a delay which seems to help pass full stack ext-tests. Leaf check-in: 9e35b1252c user: mrwellan tags: v1.65-minor-patch
10:18
Safe vector access in rmt. check-in: 58bb6d997a user: mrwellan tags: v1.65-side2
2020-10-11
22:46
Patched forward adjutant code. check-in: f936717bfa user: matt tags: v1.65-adjutant-again
2020-10-05
22:49
Do not exit on failure to create directory - race conditons on NFS cause false fail scenarios - just keep going and cross your fingers... (cherrypicked from v1.6572) check-in: 05b253a452 user: matt tags: v1.65-sidework
22:46
run duration testdat check-in: 4a0b43f3c6 user: matt tags: v1.65-test-rundat2
2020-09-21
15:36
merged in 1.65-test-rundat branch ==/FAIL/orion,mars/== check-in: cfd25d66e9 user: mmgraham tags: v1.6571, v1.65-failed-testdat
07:00
Added get-testsuite-name all over launch:setup and still not set when needed! This did NOT work. Closed-Leaf check-in: 2efe8ad422 user: mrwellan tags: v1.65-get-testsuitename
2020-09-19
04:21
Start moving test_rundat to no-sync db. ==/20/2/WARN/1203/mars/== check-in: abfabdb839 user: matt tags: v1.65-test-rundat
2020-09-18
17:30
added check for file existence before file delete ==/14/1.9/WARN/orion,mars/== NOTE: This is the last v1.65 before the split off. I.e code from before this point IS in the far future v1.65 branch. Code from this point to that branch might NOT be in the branch. check-in: 2769e4b7c9 user: mmgraham tags: v1.65, v1.6569
12:27
cherry picked 2 fixes, changed version to 1.6569 ==/7.2/2.0/PASS/1201/mars/== check-in: d145d0eb02 user: mmgraham tags: v1.65

Modified launch.scm from [d0067277fa] to [480ecd81f4].

802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
              (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f)))
               (if (not (null? tal))
				  (loop (car tal) (cdr tal) (+ kill-cnt flag))
                 (+ kill-cnt flag))))))

;; DO NOT USE - caching of configs is handled in launch:setup now.
;;
(define (launch:cache-config)
  ;; if we have a linktree and -runtests and -target and the directory exists dump the config
  ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg
  (if (and *configdat* 
	   (or (args:get-arg "-run")
	       (args:get-arg "-runtests")
	       (args:get-arg "-execute")))
      (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE"))
	     (target   (common:args-get-target exit-if-bad: #t))
	     (runname  (or (args:get-arg "-runname")
			   (args:get-arg ":runname")
			   (getenv "MT_RUNNAME")))
	     (fulldir  (conc linktree "/"
			     target "/"
			     runname)))
	(if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree
	    (begin
	      (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%"))
	      (if (not (common:file-exists? fulldir))
		  (create-directory fulldir #t)) ;; need to protect with exception handler 
	      (if (and target
		       runname
		       (common:file-exists? fulldir))
		  (let ((tmpfile  (conc fulldir "/.megatest.cfg." (current-seconds)))
			(targfile (conc fulldir "/.megatest.cfg-"  megatest-version "-" megatest-fossil-hash))
			(rconfig  (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash)))
		    (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached
			(begin
			  (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile)
                          (if (not (common:in-running-test?))
                              (configf:write-alist *configdat* tmpfile))
			  (system (conc "ln -sf " tmpfile " " targfile))))
		    )))
	    (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs.")))))


;; gather available information, if legit read configs in this order:
;;
;;   if have cache;
;;      read it a return it
;;   else
;;     megatest.config     (do not cache)







|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







802
803
804
805
806
807
808
809


































810
811
812
813
814
815
816
              (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f)))
               (if (not (null? tal))
				  (loop (car tal) (cdr tal) (+ kill-cnt flag))
                 (+ kill-cnt flag))))))

;; DO NOT USE - caching of configs is handled in launch:setup now.
;;
;; (launch:cache-config) moved to attic below



































;; gather available information, if legit read configs in this order:
;;
;;   if have cache;
;;      read it a return it
;;   else
;;     megatest.config     (do not cache)
864
865
866
867
868
869
870
















871
872
873
874
875
876
877
878
879
880
881
      (begin
	(debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata")
	(mutex-unlock! *launch-setup-mutex*)
	*toppath*)
      (let ((res (launch:setup-body force-reread: force-reread areapath: areapath)))
	(mutex-unlock! *launch-setup-mutex*)
	res)))

















;; return paths depending on what info is available.
;;
(define (launch:get-cache-file-paths areapath toppath target mtconfig)
  (let* ((use-cache (common:use-cache?))
         (runname  (common:args-get-runname))
         (linktree (common:get-linktree))
         (testname (common:get-full-test-name))
         (rundir   (if (and runname target linktree)
                       (common:directory-writable? (conc linktree "/" target "/" runname))
                       #f))







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



|







830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
      (begin
	(debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata")
	(mutex-unlock! *launch-setup-mutex*)
	*toppath*)
      (let ((res (launch:setup-body force-reread: force-reread areapath: areapath)))
	(mutex-unlock! *launch-setup-mutex*)
	res)))

(define (launch:cache-files-changed? cache-files ref-seconds)
  (let* ((changed #f))
    (if (or (not cache-files)
	    (null? cache-files))
	(set! changed #t) ;; yep, they've changed
	(for-each
	 (lambda (fname)
	   (if (not fname)
	       (set! changed #t)
	       (if (not (file-exists? fname))
		   (set! changed #t)
		   (if (> (file-modification-time fname) ref-seconds)
		       (set! changed #t)))))
	 cache-files))
    changed))

;; return paths depending on what info is available.
;;
(define (launch:get-cache-file-paths areapath toppath target)
  (let* ((use-cache (common:use-cache?))
         (runname  (common:args-get-runname))
         (linktree (common:get-linktree))
         (testname (common:get-full-test-name))
         (rundir   (if (and runname target linktree)
                       (common:directory-writable? (conc linktree "/" target "/" runname))
                       #f))
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
	   (not force-reread)) ;; no need to reprocess
      *toppath*   ;; return toppath
      (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting.  We do not have *configdat*.  Bootstrapping problem here.
	     (toppath  (common:get-toppath areapath))
	     (target   (common:args-get-target))
	     (sections (if target (list "default" target) #f)) ;; for runconfigs
	     (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config 
             (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig))
	     ;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ...
	     (mtcachef   (if (null? cachefiles)
			     #f
			     (car cachefiles))) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-"  megatest-version "-" megatest-fossil-hash)))
	     (rccachef   (if (null? cachefiles)
			     #f
			     (cdr cachefiles)))) ;; (and cachedir (conc cachedir "/" ".runconfigs.cfg-"  megatest-version "-" megatest-fossil-hash)))







|







884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
	   (not force-reread)) ;; no need to reprocess
      *toppath*   ;; return toppath
      (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting.  We do not have *configdat*.  Bootstrapping problem here.
	     (toppath  (common:get-toppath areapath))
	     (target   (common:args-get-target))
	     (sections (if target (list "default" target) #f)) ;; for runconfigs
	     (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config 
             (cachefiles (launch:get-cache-file-paths areapath toppath target))
	     ;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ...
	     (mtcachef   (if (null? cachefiles)
			     #f
			     (car cachefiles))) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-"  megatest-version "-" megatest-fossil-hash)))
	     (rccachef   (if (null? cachefiles)
			     #f
			     (cdr cachefiles)))) ;; (and cachedir (conc cachedir "/" ".runconfigs.cfg-"  megatest-version "-" megatest-fossil-hash)))
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
					pathenvvar: "MT_RUN_AREA_HOME"))
			 (runconfigdat (begin     ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config
					 (for-each (lambda (kt)
						     (setenv (car kt) (cadr kt)))
						   key-vals)
					 (read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ...
						      sections: sections)))
                         (cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
                         (mtcachef     (car cachefiles))
                         (rccachef     (cdr cachefiles)))
                    ;;  trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
                    ;; TODO - consider 1) using simple-lock to bracket cache write
                    ;;                 2) cache in hash on server, since need to do rmt: anyway to lock.

		    (if rccachef







|







965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
					pathenvvar: "MT_RUN_AREA_HOME"))
			 (runconfigdat (begin     ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config
					 (for-each (lambda (kt)
						     (setenv (car kt) (cadr kt)))
						   key-vals)
					 (read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ...
						      sections: sections)))
                         (cachefiles   (launch:get-cache-file-paths areapath toppath target))
                         (mtcachef     (car cachefiles))
                         (rccachef     (cdr cachefiles)))
                    ;;  trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
                    ;; TODO - consider 1) using simple-lock to bracket cache write
                    ;;                 2) cache in hash on server, since need to do rmt: anyway to lock.

		    (if rccachef
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
	      (setenv "MT_TESTSUITENAME" (common:get-testsuite-name)))
	    (begin
	      (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
	      (set! *toppath* #f) ;; force it to be false so we return #f
	      #f))
	
        ;; one more attempt to cache the configs for future reading
        (let* ((cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
               (mtcachef     (car cachefiles))
               (rccachef     (cdr cachefiles)))

          ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
          ;; TODO - consider 1) using simple-lock to bracket cache write
          ;;                 2) cache in hash on server, since need to do rmt: anyway to lock.
          (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef)))







|







1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
	      (setenv "MT_TESTSUITENAME" (common:get-testsuite-name)))
	    (begin
	      (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
	      (set! *toppath* #f) ;; force it to be false so we return #f
	      #f))
	
        ;; one more attempt to cache the configs for future reading
        (let* ((cachefiles   (launch:get-cache-file-paths areapath toppath target))
               (mtcachef     (car cachefiles))
               (rccachef     (cdr cachefiles)))

          ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
          ;; TODO - consider 1) using simple-lock to bracket cache write
          ;;                 2) cache in hash on server, since need to do rmt: anyway to lock.
          (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef)))
1618
1619
1620
1621
1622
1623
1624






































		     (read-symbolic-link (conc "/proc/" pid "/cwd"))
		     #f)))
    ;; now wait on that process if all is correct
    ;; periodically update the db with runtime
    ;; when the process exits look at the db, if still RUNNING after 10 seconds set
    ;; state/status appropriately
    (process-wait pid)))













































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
		     (read-symbolic-link (conc "/proc/" pid "/cwd"))
		     #f)))
    ;; now wait on that process if all is correct
    ;; periodically update the db with runtime
    ;; when the process exits look at the db, if still RUNNING after 10 seconds set
    ;; state/status appropriately
    (process-wait pid)))

;;======================================================================
;; Attic
;;======================================================================
#;(define (launch:cache-config)
  ;; if we have a linktree and -runtests and -target and the directory exists dump the config
  ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg
  (if (and *configdat* 
	   (or (args:get-arg "-run")
	       (args:get-arg "-runtests")
	       (args:get-arg "-execute")))
      (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE"))
	     (target   (common:args-get-target exit-if-bad: #t))
	     (runname  (or (args:get-arg "-runname")
			   (args:get-arg ":runname")
			   (getenv "MT_RUNNAME")))
	     (fulldir  (conc linktree "/"
			     target "/"
			     runname)))
	(if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree
	    (begin
	      (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%"))
	      (if (not (common:file-exists? fulldir))
		  (create-directory fulldir #t)) ;; need to protect with exception handler 
	      (if (and target
		       runname
		       (common:file-exists? fulldir))
		  (let ((tmpfile  (conc fulldir "/.megatest.cfg." (current-seconds)))
			(targfile (conc fulldir "/.megatest.cfg-"  megatest-version "-" megatest-fossil-hash))
			(rconfig  (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash)))
		    (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached
			(begin
			  (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile)
                          (if (not (common:in-running-test?))
                              (configf:write-alist *configdat* tmpfile))
			  (system (conc "ln -sf " tmpfile " " targfile))))
		    )))
	    (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs.")))))

Modified runs.scm from [030b929939] to [259766d723].

462
463
464
465
466
467
468
469


470
471
472
473
474
475
476
	 (required-tests     #f)  ;; Put fully qualified test/testpath names in this list to be done
         (waitors-upon       (make-hash-table)) ;; given a test, return list of tests waiting upon this test.
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 ;; (tdbdat             (tasks:open-db))
	 (config-reruns      (let ((x (configf:lookup *configdat* "setup" "reruns")))
			       (if x (string->number x) #f)))
	 (allowed-tests      #f)
	 (runconf            #f))



    ;; check if readonly
    (when readonly-mode
      (debug:print-error 0 *default-log-port* "megatest.db is readonly.  Cannot proceed.")
      (exit 1))

    ;; per user request. If less than 100Meg space on dbdir partition, bail out with error







|
>
>







462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
	 (required-tests     #f)  ;; Put fully qualified test/testpath names in this list to be done
         (waitors-upon       (make-hash-table)) ;; given a test, return list of tests waiting upon this test.
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 ;; (tdbdat             (tasks:open-db))
	 (config-reruns      (let ((x (configf:lookup *configdat* "setup" "reruns")))
			       (if x (string->number x) #f)))
	 (allowed-tests      #f)
	 (runconf            #f)
	 (cache-files        (launch:get-cache-file-paths #f (common:get-toppath *toppath* ) target))
	 (runstart-time      (current-seconds)))

    ;; check if readonly
    (when readonly-mode
      (debug:print-error 0 *default-log-port* "megatest.db is readonly.  Cannot proceed.")
      (exit 1))

    ;; per user request. If less than 100Meg space on dbdir partition, bail out with error
756
757
758
759
760
761
762





763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
	    (thread-start! th2)
	    ;; (thread-join! th1)
	    ;; just do the main stuff in the main thread
	    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
								    (any->number reglen) all-tests-registry)
	    (set! keep-going #f)
	    (thread-join! th2)





	    ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
	    (if (> run-count 0) ;; handle reruns
		(begin
		  (if (not (hash-table-ref/default flags "-preclean" #f))
		      (hash-table-set! flags "-preclean" #t))
		  (if (not (hash-table-ref/default flags "-rerun" #f))
		      (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS"))
		  ;; recursive call to self
      (runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))
                  (launch:end-of-run-check run-id)))
	  (debug:print-info 0 *default-log-port* "No tests to run")))
    (debug:print-info 4 *default-log-port* "All done by here")
    ;; TODO: try putting post hook call here
      
    ;  (debug:print-info 2 *default-log-port* " run-count " run-count)
    ;  (runs:run-post-hook run-id))
    ;  (debug:print-info 2 *default-log-port* "Not calling post hook runcount = " run-count ))   







>
>
>
>
>








|
|







758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
	    (thread-start! th2)
	    ;; (thread-join! th1)
	    ;; just do the main stuff in the main thread
	    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
								    (any->number reglen) all-tests-registry)
	    (set! keep-going #f)
	    (thread-join! th2)
	    (if (launch:cache-files-changed? cache-files runstart-time)
		(begin ;; force a start-over
		  (launch:setup force-reread: #t)
		  (runs:run-tests target runname test-patts user flags run-count: 0)))
	    
	    ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
	    (if (> run-count 0) ;; handle reruns
		(begin
		  (if (not (hash-table-ref/default flags "-preclean" #f))
		      (hash-table-set! flags "-preclean" #t))
		  (if (not (hash-table-ref/default flags "-rerun" #f))
		      (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS"))
		  ;; recursive call to self
		  (runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))
                (launch:end-of-run-check run-id)))
	  (debug:print-info 0 *default-log-port* "No tests to run")))
    (debug:print-info 4 *default-log-port* "All done by here")
    ;; TODO: try putting post hook call here
      
    ;  (debug:print-info 2 *default-log-port* " run-count " run-count)
    ;  (runs:run-post-hook run-id))
    ;  (debug:print-info 2 *default-log-port* "Not calling post hook runcount = " run-count ))