Merge branch 'master' into gb/fix-ts-creation-message

psrenergy · Nov 7, 2024 · c5171db · c5171db
2 parents 05ab745 + 9d95af3
commit c5171db
Show file tree

Hide file tree

Showing 3 changed files with 49 additions and 23 deletions.
diff --git a/docs/src/psrdatabasesqlite/rules.md b/docs/src/psrdatabasesqlite/rules.md
@@ -85,9 +85,7 @@ CREATE TABLE Plant(
 
 ### Vector Attributes
 
-- In case of a vector attribute, a table should be created with its name indicating the name of the Collection and the name of a group of the attribute, separated by `_vector_`, as presented below
-
-<p style="text-align: center;"> COLLECTION_vector_GROUP_OF_ATTRIBUTES</p>
+- In case of a vector attribute, a table should be created with its name indicating the name of the Collection and the name of a group of the attribute, separated by `_vector_`, such as `COLLECTION_vector_GROUP_OF_ATTRIBUTES`.
 
 - The table must contain a Column named `id` and another named `vector_index`.
 - There must be a Column named after the attributes names, which will store the value of the attribute for the specified element `id` and index `vector_index`.
@@ -135,9 +133,7 @@ CREATE TABLE HydroPlant_vector_GaugingStation(
 ### Time Series Files
 
 - All Time Series files for the elements from a Collection should be stored in a Table
-- The Table name should be the same as the name of the Collection followed by `_time_series_files`, as presented below
-
-<p style="text-align: center"> COLLECTION_vector_ATTRIBUTE</p>
+- The Table name should be the same as the name of the Collection followed by `_time_series_files`, such as `COLLECTION_vector_ATTRIBUTE`.
 
 - Each Column of the table should be named after the name of the attribute.
 - Each Column should store the path to the file containing the time series data.
@@ -152,9 +148,7 @@ CREATE TABLE Plant_time_series_files (
 ```
 
 ### Time Series
-- Time Series stored in the database should be stored in a table with the name of the Collection followed by `_time_series_` and the name of the attribute group, as presented below.
-
-<p style="text-align: center"> COLLECTION_time_series_GROUP_OF_ATTRIBUTES</p>
+- Time Series stored in the database should be stored in a table with the name of the Collection followed by `_time_series_` and the name of the attribute group, such a `COLLECTION_time_series_GROUP_OF_ATTRIBUTES`.
 
 Notice that it is quite similar to the vector attributes, but without the `vector_index` column.
 Instead, a mandatory column named `date_time` should be created to store the date of the time series data.

diff --git a/src/PSRDatabaseSQLite/read.jl b/src/PSRDatabaseSQLite/read.jl
@@ -15,10 +15,6 @@ function number_of_elements(db::DatabaseSQLite, collection_id::String)::Int
     end
 end
 
-function _collection_has_any_data(db::DatabaseSQLite, collection_id::String)::Bool
-    return number_of_elements(db, collection_id) > 0
-end
-
 function _get_id(
     db::DatabaseSQLite,
     collection_id::String,
@@ -391,7 +387,7 @@ function read_time_series_row(
 
     T = attribute.type
 
-    if !(_collection_has_any_data(db, collection_id))
+    if !(_time_controller_collection_has_any_data(db, collection_id))
         return Vector{T}(undef, 0)
     end
     if !haskey(db._time_controller.cache, collection_attribute)

diff --git a/src/PSRDatabaseSQLite/time_controller.jl b/src/PSRDatabaseSQLite/time_controller.jl
@@ -7,10 +7,7 @@ const CollectionAttribute = Tuple{String, String}
 
 # Some comments
 # TODO we can further optimize the time controller with a few strategies
-# 1 - We can try to ask for the data in the same query that we ask for the dates. I just don`t know how to write the good query for that
-# 2 - We can use prepared statements for the queries 
-# 3 - Avoid querying the data for every id in the attribute. Currently we fill the cache of dates before making the query and use it to inform which date each id should query. This is quite inneficient
-# The best way of optimizing it would be to solve 1 and 2.
+# 1 - We can use prepared statements for the queries 
 
 mutable struct TimeControllerCache{T}
     data::Vector{T}
@@ -20,36 +17,75 @@ mutable struct TimeControllerCache{T}
     closest_next_date_with_data::Vector{DateTime}
 
     # Private caches with the closest previous and next dates
-    # _closest_previous_date_with_data = maximum(closest_previous_date_with_data)
-    # _closest_next_date_with_data = minimum(closest_next_date_with_data)
     _closest_global_previous_date_with_data::DateTime
     _closest_global_next_date_with_data::DateTime
 
-    # Cache of collection_ids
+    # Cache of collection_ids, these are the ids of elements in a specific collection
     _collection_ids::Vector{Int}
 end
 
 Base.@kwdef mutable struct TimeController
     cache::Dict{CollectionAttribute, TimeControllerCache} = Dict{CollectionAttribute, TimeControllerCache}()
+
+    # Upon initialization the time controller will ask if a certain 
+    # collection has any elements, if the collection has any elements it 
+    # will be added to this cache. This cache will be used to avoid querying
+    # multiple times if a certain collection has any elements.
+    # This relies on the fact that the Time Controller only works in 
+    # read only databases.
+    collection_has_any_data::Dict{String, Bool} = Dict{String, Bool}()
 end
 
 function _collection_attribute(collection_id::String, attribute_id::String)::CollectionAttribute
     return (collection_id, attribute_id)
 end
 
+function _time_controller_collection_has_any_data(db, collection_id::String)::Bool
+    if haskey(db._time_controller.collection_has_any_data, collection_id)
+        return db._time_controller.collection_has_any_data[collection_id]
+    else
+        db._time_controller.collection_has_any_data[collection_id] = number_of_elements(db, collection_id) > 0
+        return db._time_controller.collection_has_any_data[collection_id]
+    end
+end
+
 function _update_time_controller_cache!(
     cache::TimeControllerCache,
     db,
     attribute::Attribute,
     date_time::DateTime,
 )
     _update_time_controller_cache_dates!(cache, db, attribute, date_time)
+    _request_time_series_data_for_time_controller_cache(cache, db, attribute)
 
+    return nothing
+end
+
+function _request_time_series_data_for_time_controller_cache(
+    cache::TimeControllerCache,
+    db,
+    attribute::Attribute,
+)
+    query = "SELECT id, $(attribute.id) FROM $(attribute.table_where_is_located) WHERE "
     for (i, id) in enumerate(cache._collection_ids)
-        cache.data[i] =
-            _request_time_series_data_for_time_controller_cache(db, attribute, id, cache.closest_previous_date_with_data[i])
+        query *= "(id = $id AND DATETIME(date_time) = DATETIME('$(cache.closest_previous_date_with_data[i])'))"
+        if i < length(cache._collection_ids)
+            query *= " OR "
+        end
     end
+    query *= " ORDER BY id;"
+
+    df = DBInterface.execute(db.sqlite_db, query) |> DataFrame
 
+    _psrdatabasesqlite_null_value(attribute.type)
+    for (i, id) in enumerate(cache._collection_ids)
+        index = searchsorted(df.id, id)
+        if isempty(index)
+            cache.data[i] = _psrdatabasesqlite_null_value(attribute.type)
+        else
+            cache.data[i] = df[index[1], 2]
+        end
+    end
     return nothing
 end