Skip to content

Commit

Permalink
Merge branch 'master' into gb/fix-ts-creation-message
Browse files Browse the repository at this point in the history
  • Loading branch information
guilhermebodin authored Nov 7, 2024
2 parents 05ab745 + 9d95af3 commit c5171db
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 23 deletions.
12 changes: 3 additions & 9 deletions docs/src/psrdatabasesqlite/rules.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,7 @@ CREATE TABLE Plant(

### Vector Attributes

- In case of a vector attribute, a table should be created with its name indicating the name of the Collection and the name of a group of the attribute, separated by `_vector_`, as presented below

<p style="text-align: center;"> COLLECTION_vector_GROUP_OF_ATTRIBUTES</p>
- In case of a vector attribute, a table should be created with its name indicating the name of the Collection and the name of a group of the attribute, separated by `_vector_`, such as `COLLECTION_vector_GROUP_OF_ATTRIBUTES`.

- The table must contain a Column named `id` and another named `vector_index`.
- There must be a Column named after the attributes names, which will store the value of the attribute for the specified element `id` and index `vector_index`.
Expand Down Expand Up @@ -135,9 +133,7 @@ CREATE TABLE HydroPlant_vector_GaugingStation(
### Time Series Files

- All Time Series files for the elements from a Collection should be stored in a Table
- The Table name should be the same as the name of the Collection followed by `_time_series_files`, as presented below

<p style="text-align: center"> COLLECTION_vector_ATTRIBUTE</p>
- The Table name should be the same as the name of the Collection followed by `_time_series_files`, such as `COLLECTION_vector_ATTRIBUTE`.

- Each Column of the table should be named after the name of the attribute.
- Each Column should store the path to the file containing the time series data.
Expand All @@ -152,9 +148,7 @@ CREATE TABLE Plant_time_series_files (
```

### Time Series
- Time Series stored in the database should be stored in a table with the name of the Collection followed by `_time_series_` and the name of the attribute group, as presented below.

<p style="text-align: center"> COLLECTION_time_series_GROUP_OF_ATTRIBUTES</p>
- Time Series stored in the database should be stored in a table with the name of the Collection followed by `_time_series_` and the name of the attribute group, such a `COLLECTION_time_series_GROUP_OF_ATTRIBUTES`.

Notice that it is quite similar to the vector attributes, but without the `vector_index` column.
Instead, a mandatory column named `date_time` should be created to store the date of the time series data.
Expand Down
6 changes: 1 addition & 5 deletions src/PSRDatabaseSQLite/read.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@ function number_of_elements(db::DatabaseSQLite, collection_id::String)::Int
end
end

function _collection_has_any_data(db::DatabaseSQLite, collection_id::String)::Bool
return number_of_elements(db, collection_id) > 0
end

function _get_id(
db::DatabaseSQLite,
collection_id::String,
Expand Down Expand Up @@ -391,7 +387,7 @@ function read_time_series_row(

T = attribute.type

if !(_collection_has_any_data(db, collection_id))
if !(_time_controller_collection_has_any_data(db, collection_id))
return Vector{T}(undef, 0)
end
if !haskey(db._time_controller.cache, collection_attribute)
Expand Down
54 changes: 45 additions & 9 deletions src/PSRDatabaseSQLite/time_controller.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,7 @@ const CollectionAttribute = Tuple{String, String}

# Some comments
# TODO we can further optimize the time controller with a few strategies
# 1 - We can try to ask for the data in the same query that we ask for the dates. I just don`t know how to write the good query for that
# 2 - We can use prepared statements for the queries
# 3 - Avoid querying the data for every id in the attribute. Currently we fill the cache of dates before making the query and use it to inform which date each id should query. This is quite inneficient
# The best way of optimizing it would be to solve 1 and 2.
# 1 - We can use prepared statements for the queries

mutable struct TimeControllerCache{T}
data::Vector{T}
Expand All @@ -20,36 +17,75 @@ mutable struct TimeControllerCache{T}
closest_next_date_with_data::Vector{DateTime}

# Private caches with the closest previous and next dates
# _closest_previous_date_with_data = maximum(closest_previous_date_with_data)
# _closest_next_date_with_data = minimum(closest_next_date_with_data)
_closest_global_previous_date_with_data::DateTime
_closest_global_next_date_with_data::DateTime

# Cache of collection_ids
# Cache of collection_ids, these are the ids of elements in a specific collection
_collection_ids::Vector{Int}
end

Base.@kwdef mutable struct TimeController
cache::Dict{CollectionAttribute, TimeControllerCache} = Dict{CollectionAttribute, TimeControllerCache}()

# Upon initialization the time controller will ask if a certain
# collection has any elements, if the collection has any elements it
# will be added to this cache. This cache will be used to avoid querying
# multiple times if a certain collection has any elements.
# This relies on the fact that the Time Controller only works in
# read only databases.
collection_has_any_data::Dict{String, Bool} = Dict{String, Bool}()
end

function _collection_attribute(collection_id::String, attribute_id::String)::CollectionAttribute
return (collection_id, attribute_id)
end

function _time_controller_collection_has_any_data(db, collection_id::String)::Bool
if haskey(db._time_controller.collection_has_any_data, collection_id)
return db._time_controller.collection_has_any_data[collection_id]
else
db._time_controller.collection_has_any_data[collection_id] = number_of_elements(db, collection_id) > 0
return db._time_controller.collection_has_any_data[collection_id]
end
end

function _update_time_controller_cache!(
cache::TimeControllerCache,
db,
attribute::Attribute,
date_time::DateTime,
)
_update_time_controller_cache_dates!(cache, db, attribute, date_time)
_request_time_series_data_for_time_controller_cache(cache, db, attribute)

return nothing
end

function _request_time_series_data_for_time_controller_cache(
cache::TimeControllerCache,
db,
attribute::Attribute,
)
query = "SELECT id, $(attribute.id) FROM $(attribute.table_where_is_located) WHERE "
for (i, id) in enumerate(cache._collection_ids)
cache.data[i] =
_request_time_series_data_for_time_controller_cache(db, attribute, id, cache.closest_previous_date_with_data[i])
query *= "(id = $id AND DATETIME(date_time) = DATETIME('$(cache.closest_previous_date_with_data[i])'))"
if i < length(cache._collection_ids)
query *= " OR "
end
end
query *= " ORDER BY id;"

df = DBInterface.execute(db.sqlite_db, query) |> DataFrame

_psrdatabasesqlite_null_value(attribute.type)
for (i, id) in enumerate(cache._collection_ids)
index = searchsorted(df.id, id)
if isempty(index)
cache.data[i] = _psrdatabasesqlite_null_value(attribute.type)
else
cache.data[i] = df[index[1], 2]
end
end
return nothing
end

Expand Down

0 comments on commit c5171db

Please sign in to comment.