@@ -736,20 +736,46 @@ (if (equal? (args:get-arg "-dumpmode") "csv") (task:print-testtime test-times ",") (task:print-testtime test-times " "))))) - +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; gets mtpg-run-id and syncs the record if different ;; +;; This function takes parameters including a database handle (dbh), cached information (cached-info), a run ID (run-id), area information (area-info), and the smallest last update time (smallest-last-update-time). + +;; The function first tries to retrieve information about the given run ID from the cached information. If it is already cached, the function returns the cached information. +;; Otherwise, the function retrieves information about the run from the megatest database using various functions such as "rmt:get-run-info" and "db:get-value-by-header". + +;; The function then uses this retrieved information to create a new run in the pgdb database, or update an existing one, using functions such as "pgdb:get-ttype", "pgdb:get-run-id", and "pgdb:insert-run". +;; It also sets various fields such as "state", "status", "owner", and "event_time". + +;; The smallest-last-update-time variable is a single item hash table that is used to keep track of the smallest (earliest) "last_update" time among all the runs that are processed by calls to this function. + +;; The "last_update" time is obtained from the megatest database for each run that needs to be processed. Then, if the "last_update" time for a particular run is greater (later) than the "last_update" time of the +;; corresponding run in the PostgreSQL database, the "last_update" time of that run is stored in the "smallest-last-update-time" hash table. + +;; This is done to ensure that only the runs that have been updated since the last time the synchronization was performed are updated in the PostgreSQL database. This is because updating all the runs, even those +;; that have not been modified since the last synchronization, can be a time-consuming process. + +;; At the end of the function, if a new run is successfully inserted into the PostgreSQL database, the "last_update" time of that run is compared with the current value of "smallest-time" in the "smallest-last-update-time" +;; hash table. If the "last_update" time is smaller than the current value of "smallest-time" or if "smallest-time" does not exist, "last_update" time is stored as the new "smallest-time". This ensures that the smallest +;; "last_update" time among all the runs processed by the function is always stored in the "smallest-last-update-time" hash table. + +;; The smallest-last-update-time hash is referenced in calling functions and is used in the call to "pgdb:write-sync-time dbh area-info smallest-time" in tasks:sync-to-postgres. + + +;; If a new entry was successfully created or updated, the function returns the ID of the new entry. If there was an error, the function returns false. + + (define (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time) (let* ((runs-ht (hash-table-ref cached-info 'runs)) (runinf (hash-table-ref/default runs-ht run-id #f)) (area-id (vector-ref area-info 0))) (if runinf - runinf ;; already cached - (let* ((run-dat (rmt:get-run-info run-id)) ;; NOTE: get-run-info returns a vector < row header > + runinf ;; already cached + (let* ((run-dat (rmt:get-run-info run-id)) ;; NOTE: get-run-info returns a vector < row header > (run-name (rmt:get-run-name-from-id run-id)) (row (db:get-rows run-dat)) ;; yes, this returns a single row (header (db:get-header run-dat)) (state (db:get-value-by-header row header "state")) (status (db:get-value-by-header row header "status")) @@ -777,45 +803,56 @@ (spec-id (pgdb:get-ttype dbh keytarg)) (publish-time (if (args:get-arg "-cp-eventtime-to-publishtime") event-time (current-seconds))) (new-run-id (if (and run-name base-target) (pgdb:get-run-id dbh spec-id target run-name area-id) #f))) - (if new-run-id + (if new-run-id (begin ;; let ((run-record (pgdb:get-run-info dbh new-run-id)) - (hash-table-set! runs-ht run-id new-run-id) - ;; ensure key fields are up to date - ;; if last_update == pgdb_last_update do not update smallest-last-update-time - (let* ((pgdb-last-update (pgdb:get-run-last-update dbh new-run-id)) - (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))) - (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) - (hash-table-set! smallest-last-update-time "smallest-time" last-update))) - (pgdb:refresh-run-info - dbh - new-run-id - state status owner event-time comment fail-count pass-count area-id last-update publish-time) - (debug:print-info 4 *default-log-port* "Working on run-id " run-id " pgdb-id " new-run-id ) - (if (not (equal? run-tag "")) - (task:add-run-tag dbh new-run-id run-tag)) - new-run-id) - + (hash-table-set! runs-ht run-id new-run-id) + ;; ensure key fields are up to date + ;; if last_update == pgdb_last_update do not update smallest-last-update-time + (let* ((pgdb-last-update (pgdb:get-run-last-update dbh new-run-id)) + (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))) + (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) + (hash-table-set! smallest-last-update-time "smallest-time" last-update)) + ) + (pgdb:refresh-run-info dbh new-run-id state status owner event-time comment fail-count pass-count area-id last-update publish-time) + (debug:print-info 4 *default-log-port* "Working on run-id " run-id " pgdb-id " new-run-id ) + (if (not (equal? run-tag "")) + (task:add-run-tag dbh new-run-id run-tag) + ) + new-run-id + ) + ;; if no pgdb run id was found (if (or (not state) (equal? state "deleted")) - (begin - (debug:print-info 1 *default-log-port* "Warning: Run with id " run-id " was created after previous sync and deleted before the sync") #f) - (if (handle-exceptions - exn - (begin (print-call-chain) - (debug:print 0 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) - #f) - - (pgdb:insert-run - dbh - spec-id target run-name state status owner event-time comment fail-count pass-count area-id last-update publish-time)) - (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))) - (if (or (not smallest-time) (< last-update smallest-time)) - (hash-table-set! smallest-last-update-time "smallest-time" last-update)) - (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)) - #f))))))) + (begin + (debug:print-info 1 *default-log-port* "Warning: Run with id " run-id " was created after previous sync and deleted before the sync") #f) + (if (handle-exceptions + exn + (begin (print-call-chain) + (debug:print 0 *default-log-port* ((condition-property-accessor 'exn 'message) exn)) + #f + ) + + (pgdb:insert-run dbh spec-id target run-name state status owner event-time comment fail-count pass-count area-id last-update publish-time) + ) + (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))) + (if (or (not smallest-time) (< last-update smallest-time)) + (hash-table-set! smallest-last-update-time "smallest-time" last-update) + ) + ;; call this function recursively to get the pgdb run id + ;; TODO: Why not just call pgdb:get-run-id here to get the id? + (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time) + ) + #f + ) + ) + ) + ) + ) + ) +) (define (task:add-run-tag dbh run-id tag) (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag))) (if (not tag-info) (begin @@ -946,11 +983,29 @@ (debug:print-info 1 *default-log-port* "Error: Test not in pgdb")))) (debug:print-info 1 *default-log-port* "Error: Could not get test data info for data id " test-data-id )))) ;; this is a wierd senario need to debug test-data-ids))) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; This function synchronizes test data between the megatest db and the pgdb. It takes several parameters, including a pgdb database handle (dbh), +;; a hash table of cached information (cached-info), a list of test IDs (test-ids), and other data related to the specific area being synced (area-info and smallest-last-update-time). + +;; The function first retrieves the cached test hash table (test-ht) from cached-info and sets the run-id-in variable to false. It then iterates over each test ID in test-ids using +;; a for-each loop. Within the loop, it sets run-id-in to the second element of the test ID pair (which is assumed to be a cons cell), and sets test-id to the first element of the pair. + +;; The function then uses rmt:get-test-info-by-id to retrieve information about the test specified by run-id-in and test-id. It extracts various properties from this +;; information, such as the run-id, test-name, item-path, state, status, host, and so on. + +;; If item-path is null or empty, the function prints a message to the log. Otherwise, it checks if pgdb-run-id is truthy, meaning that a corresponding test record was found in the +;; remote database. If pgdb-test-id is also truthy, it means that the test record exists in the remote database and needs to be updated. If pgdb-test-id is falsey, it means that the +;; test record needs to be inserted into the remote database. + +;; If the last-update timestamp of the local test record is greater than the last-update timestamp of the remote test record, the function updates the remote test record with the new +;; data. If the last-update timestamp is less than or equal to the smallest-time value in smallest-last-update-time, the function updates the smallest-time value to the new last-update +;; timestamp. If the remote test record does not exist, the function inserts a new test record into the remote database. +;; After each test ID is processed, the function updates the test-ht hash table with the corresponding pgdb-test-id. (define (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time) (let ((test-ht (hash-table-ref cached-info 'tests)) (run-id-in #f)) (for-each @@ -990,26 +1045,39 @@ ;; "run_duration" "final_logf" "comment" "shortdir" "attemptnum" "archived" (if (or (not item-path) (string-null? item-path)) (debug:print-info 0 *default-log-port* "Working on Run id : " run-id " and test name : " test-name)) (if pgdb-run-id (begin - (if pgdb-test-id ;; have a record - (begin ;; let ((key-name (conc run-id "/" test-name "/" item-path))) - (debug:print-info 4 *default-log-port* "Updating existing test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id " pgdb-test-id " pgdb-test-id) - (let* ((pgdb-last-update (pgdb:get-test-last-update dbh pgdb-test-id))) - (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) ;;if last-update is same as pgdb-last-update then it is safe to assume the records are identical and we can use a larger last update time. - (hash-table-set! smallest-last-update-time "smallest-time" last-update))) - (pgdb:update-test dbh pgdb-test-id pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid)) - (begin - (debug:print-info 4 *default-log-port* "Inserting test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id) - (pgdb:insert-test dbh pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid) - (if (or (not smallest-time) (< last-update smallest-time)) - (hash-table-set! smallest-last-update-time "smallest-time" last-update)) - (set! pgdb-test-id (pgdb:get-test-id dbh pgdb-run-id test-name item-path)))) - (hash-table-set! test-ht test-id pgdb-test-id)) - (debug:print-info 1 *default-log-port* "WARNING: Skipping run with run-id:" run-id ". This run was created after privious sync and removed before this sync.")))) - test-ids))) + (if pgdb-test-id ;; have a record + (begin ;; let ((key-name (conc run-id "/" test-name "/" item-path))) + (debug:print-info 4 *default-log-port* "Updating existing test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id " pgdb-test-id " pgdb-test-id) + (let* ((pgdb-last-update (pgdb:get-test-last-update dbh pgdb-test-id))) + (if (and (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) ;;if last-update is same as pgdb-last-update then it is safe to assume the records are identical and we can use a larger last update time. + (hash-table-set! smallest-last-update-time "smallest-time" last-update) + ) + ) + (pgdb:update-test dbh pgdb-test-id pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid) + ) + (begin + (debug:print-info 4 *default-log-port* "Inserting test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id) + (pgdb:insert-test dbh pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid) + (if (or (not smallest-time) (< last-update smallest-time)) + (hash-table-set! smallest-last-update-time "smallest-time" last-update) + ) + (set! pgdb-test-id (pgdb:get-test-id dbh pgdb-run-id test-name item-path)) + ) + ) + (hash-table-set! test-ht test-id pgdb-test-id)) + (debug:print-info 1 *default-log-port* "WARNING: Skipping run with run-id:" run-id ". This run was created after previous sync and removed before this sync.") + ) + ) + ) + test-ids + ) + ) +) + (define (task:add-area-tag dbh area-info tag) (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag))) (if (not tag-info) (begin @@ -1036,16 +1104,38 @@ (debug:print-info 4 *default-log-port* "Check if run with " run-id " needs to be synced" ) (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)) run-ids)) -;; get runs changed since last sync -;; (define (tasks:sync-test-data dbh cached-info area-info) -;; (let* (( +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; This function takes two arguments "configdat" and "dest". The purpose of this function is to synchronize data from a source database to a PostgreSQL database. + +;; Here's what this function does: + +;; Opens a connection to the PostgreSQL database using the "pgdb:open" function. +;; Retrieves information about the last sync of *toppath*, using pgdb:get-area-by-path, and stores it in the "area-info" vector variable. (id, area_name, area_path, last_sync) +;; Creates a hash table named "cached-info" which will contain a start time, and megatest to pgdb ID mapping tables for runs, targets, tests, steps, and data. The targets table does not seem to be used. (TODO: remove it?) +;; Checks if the user has provided EITHER both the "target" and "run-name" arguments or NEITHER. If not, it prints an error message and exits. +;; Sets the "start" variable to the current time. +;; Creates hash tables for five data types: runs, targets, tests, steps, and data, and enters them in the cached-info table.. +;; Sets the "start" time in the "cached-info" hash table. + +;; If area-info was found, +;; Retrieves the last synchronization time from the area-info vector or from the megatest "-since" argument. +;; Creates the smallest-last-update-time hash table. +;; Retrieves records that have been changed since the last synchronization time using the "rmt:get-changed-record-ids" or "rmt:get-run-record-ids" function, depending on whether the user has provided the "-target" and "-runname" arguments. +;; Retrieves the IDs of runs, tests, test steps, test data, and run stats that have been changed. +;; Retrieves the area tag from the user-provided "-area-tag" or "-area" argument or sets it to the default area tag. +;; Adds the area tag to the area record in the PostgreSQL database. +;; synchronizes the runs, tests, test steps, and test data using their respective "tasks:sync-" functions. +;; Writes the synchronization time to the PostgreSQL database from smallest-last-update-time.. +;; If the area info was not found, it sets the area using tasks:set-area. +;; If the "tasks:set-area" function returns true, the "tasks:sync-to-postgres" function is called again recursively. Otherwise, an error message is printed and the function returns false. +;; TODO: just set the area-info when it is not found, instead of doing recursion here. + (define (tasks:sync-to-postgres configdat dest) - ;; (print "In sync") (let* ((dbh (pgdb:open configdat dbname: dest)) (area-info (pgdb:get-area-by-path dbh *toppath*)) (cached-info (make-hash-table)) (start (current-seconds)) (test-patt (if (args:get-arg "-testpatt") @@ -1063,12 +1153,10 @@ (exit 1))) (if (and (not target) run-name) (begin (debug:print 0 *default-log-port* "Error: Provide target") (exit 1))) - ;(print "123") - ;(exit 1) (for-each (lambda (dtype) (hash-table-set! cached-info dtype (make-hash-table))) '(runs targets tests steps data)) (hash-table-set! cached-info 'start start) ;; when done we'll set sync times to this (if area-info @@ -1085,19 +1173,24 @@ (area-tag (if (args:get-arg "-area-tag") (args:get-arg "-area-tag") (if (args:get-arg "-area") (args:get-arg "-area") "")))) + + (debug:print-info 0 *default-log-port* "changed records since " (time->string (seconds->local-time last-sync-time) "%m/%d %H:%M") ": " changed) + + (debug:print-info 0 *default-log-port* "last sync time: " last-sync-time) (if (and (equal? area-tag "") (not (pgdb:is-area-taged dbh (vector-ref area-info 0)))) (set! area-tag *default-area-tag*)) (if (not (equal? area-tag "")) (task:add-area-tag dbh area-info area-tag)) (if (not (null? run-ids)) (begin (debug:print-info 0 *default-log-port* "syncing runs: " run-ids) (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) ) + ) (if (not (null? test-ids)) (begin (debug:print-info 0 *default-log-port* "syncing tests: " test-ids) (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time) @@ -1105,16 +1198,27 @@ (tasks:sync-test-steps dbh cached-info test-step-ids smallest-last-update-time) (debug:print-info 0 *default-log-port* "syncing test data") (tasks:sync-test-gen-data dbh cached-info test-data-ids smallest-last-update-time) ) ) - (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" (current-seconds)))) - (debug:print-info 0 "smallest-time :" smallest-time " last-sync-time " last-sync-time) - (if (not (and target run-name)) - (if (or (and smallest-time (> smallest-time last-sync-time)) (and smallest-time (eq? last-sync-time 0))) - (pgdb:write-sync-time dbh area-info smallest-time))))) ;;this needs to be changed + (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" (current-seconds)))) + (debug:print-info 0 "smallest-time :" smallest-time " last-sync-time " last-sync-time) + (if (not (and target run-name)) + (if (or (and smallest-time (> smallest-time last-sync-time)) (and smallest-time (eq? last-sync-time 0))) + (pgdb:write-sync-time dbh area-info smallest-time)) + ) + ) + ) ;;this needs to be changed + + ;; If the area info was not found in the the areas table, set it and repeat tasks:sync-to-postgres. + ;; TODO: why not just set the area info when it is not found rather than do recursion? + (if (tasks:set-area dbh configdat) (tasks:sync-to-postgres configdat dest) (begin (debug:print 0 *default-log-port* "ERROR: unable to create an area record") - #f))))) + #f) + ) + ) + ) +)