Skip to content

Commit 99e0757

Browse files
committed
add: metadata loader improved
1 parent 610afb6 commit 99e0757

File tree

6 files changed

+165
-87
lines changed

6 files changed

+165
-87
lines changed

README.md

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -92,14 +92,4 @@ Please ensure to update tests as appropriate.
9292

9393
## License
9494

95-
Distributed under the MIT License. See LICENSE for more information.
96-
97-
98-
## Development
99-
100-
- Generate new pdocs: `.\generate_docs.sh`
101-
- Install package locally: `pip install -e .`
102-
- Run tests locally with pytest: `pytest ./tests`
103-
104-
- Build package for upload: `python setup.py sdist bdist_wheel`
105-
- Upload build package to pypi: `twine upload dist/* --verbose --skip-existing`
95+
Distributed under the MIT License. See LICENSE for more information.

dev_readme.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
## Development
2+
3+
- Generate new pdocs: `.\generate_docs.sh`
4+
- Install package locally: `pip install -e .`
5+
- Run tests locally with pytest: `pytest ./tests`
6+
7+
- Build package for upload: `python setup.py sdist bdist_wheel`
8+
- Upload build package to pypi: `twine upload dist/* --verbose --skip-existing`

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
pandas==2.1.0
2-
pdoc==14.6.1 # for docs
2+
pdoc==14.6.1 # for docs
3+
scipy==1.14.1

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setuptools.setup(
77
name = "timeseries-shaper",
8-
version = "0.0.0.15",
8+
version = "0.0.0.17",
99
author = "Jakob Gabriel",
1010
author_email = "[email protected]",
1111
description = "timeseries-shaper filters, transforms and engineer your timeseries dataframe",

src/timeseries_shaper/loader/metadata/metadata_api_loader.py

Lines changed: 81 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -6,66 +6,104 @@
66

77
class DatapointAPI:
88
"""
9-
Class for accessing datapoints via an API.
9+
Class for accessing datapoints for multiple devices via an API.
1010
"""
1111

12-
def __init__(self, device_name: str, base_url: str, api_token: str, output_path: str = "data_points.json"):
13-
self.device_name = device_name
12+
def __init__(self, device_names: List[str], base_url: str, api_token: str, output_path: str = "data", required_uuid_list: List[str] = None, filter_enabled: bool = True):
13+
"""
14+
Initialize the DatapointAPI class.
15+
16+
:param device_names: List of device names to retrieve metadata for.
17+
:param base_url: Base URL of the API.
18+
:param api_token: API token for authentication.
19+
:param output_path: Directory to save the data points JSON files.
20+
:param required_uuid_list: Mixed list of UUIDs to filter the metadata across devices (optional).
21+
:param filter_enabled: Whether to filter metadata by "enabled == True" (default is True).
22+
"""
23+
self.device_names = device_names
1424
self.base_url = base_url
1525
self.api_token = api_token
1626
self.output_path = output_path
17-
self.uuids: List[str] = []
18-
self.metadata: pd.DataFrame = pd.DataFrame([])
27+
self.required_uuid_list = required_uuid_list or [] # Defaults to an empty list if None
28+
self.filter_enabled = filter_enabled
29+
self.device_metadata: Dict[str, pd.DataFrame] = {} # Store metadata for each device
30+
self.device_uuids: Dict[str, List[str]] = {} # Store UUIDs for each device
1931
self._api_access()
2032

2133
def _api_access(self) -> None:
22-
"""Connect to the API and retrieve metadata for the specified device."""
34+
"""Connect to the API and retrieve metadata for the specified devices."""
2335
headers = {
2436
"Content-Type": "application/json",
2537
"Authorization": f"Bearer {self.api_token}",
2638
}
2739

28-
metadata = []
29-
devices_found = []
30-
31-
for datatron in requests.get(f"{self.base_url}", headers=headers).json():
32-
for device in requests.get(f"{self.base_url}/{datatron['id']}/devices", headers=headers).json():
33-
if device["name"] == self.device_name:
34-
datapoints = requests.get(
35-
f"{self.base_url}/{datatron['id']}/devices/{device['id']}/data_points",
36-
headers=headers,
37-
).json()
38-
metadata += datapoints
39-
devices_found.append(device["name"])
40+
for device_name in self.device_names:
41+
metadata = []
42+
devices_found = []
43+
44+
for datatron in requests.get(f"{self.base_url}", headers=headers).json():
45+
for device in requests.get(f"{self.base_url}/{datatron['id']}/devices", headers=headers).json():
46+
if device["name"] == device_name:
47+
datapoints = requests.get(
48+
f"{self.base_url}/{datatron['id']}/devices/{device['id']}/data_points",
49+
headers=headers,
50+
).json()
51+
metadata += datapoints
52+
devices_found.append(device["name"])
53+
if devices_found:
54+
break
4055
if devices_found:
4156
break
42-
if devices_found:
43-
break
44-
45-
self.metadata = pd.DataFrame(metadata)
46-
if not self.metadata.empty:
47-
self.metadata = self.metadata[self.metadata["enabled"] == True][["uuid", "label", "config"]]
48-
data_points = self.metadata.to_dict(orient="records")
49-
self._export_json(data_points)
50-
self.uuids = [data["uuid"] for data in data_points]
51-
52-
def _export_json(self, data_points: List[Dict[str, str]]) -> None:
53-
"""Export data points to a JSON file."""
54-
with open(self.output_path, 'w') as f:
57+
58+
# Process metadata for the current device
59+
metadata_df = pd.DataFrame(metadata)
60+
if not metadata_df.empty:
61+
if self.filter_enabled:
62+
metadata_df = metadata_df[metadata_df["enabled"] == True]
63+
64+
metadata_df = metadata_df[["uuid", "label", "config"]]
65+
66+
# Filter metadata by required UUIDs, if any
67+
if self.required_uuid_list:
68+
metadata_df = metadata_df[metadata_df["uuid"].isin(self.required_uuid_list)]
69+
70+
# Store processed metadata and UUIDs
71+
self.device_metadata[device_name] = metadata_df
72+
self.device_uuids[device_name] = metadata_df["uuid"].tolist()
73+
74+
# Export JSON file for this device
75+
self._export_json(metadata_df.to_dict(orient="records"), device_name)
76+
77+
def _export_json(self, data_points: List[Dict[str, str]], device_name: str) -> None:
78+
"""Export data points to a JSON file for the specified device."""
79+
file_name = f"{self.output_path}/{device_name.replace(' ', '_')}_data_points.json"
80+
with open(file_name, 'w') as f:
5581
json.dump(data_points, f, indent=2)
5682

57-
def get_uuids(self) -> List[str]:
58-
"""Return the list of UUIDs."""
59-
return self.uuids
83+
def get_all_uuids(self) -> Dict[str, List[str]]:
84+
"""Return a dictionary of UUIDs for each device."""
85+
return self.device_uuids
6086

61-
def get_full_config(self) -> List[Dict[str, str]]:
62-
"""Return the full configuration (uuid, label, config) as a list of dictionaries."""
63-
return self.metadata.to_dict(orient="records")
87+
def get_all_metadata(self) -> Dict[str, List[Dict[str, str]]]:
88+
"""Return a dictionary of metadata for each device."""
89+
return {device: metadata.to_dict(orient="records") for device, metadata in self.device_metadata.items()}
6490

65-
def get_uuid_label_pairs(self) -> List[Dict[str, str]]:
66-
"""Return a list of uuid and label pairs."""
67-
return self.metadata[['uuid', 'label']].to_dict(orient='records')
91+
def display_dataframe(self, device_name: str = None) -> None:
92+
"""
93+
Print the metadata DataFrame for a specific device or all devices.
6894
69-
def display_dataframe(self) -> None:
70-
"""Print the metadata DataFrame to visually inspect data points."""
71-
print(self.metadata)
95+
:param device_name: Name of the device to display metadata for (optional).
96+
If None, displays metadata for all devices.
97+
"""
98+
if device_name:
99+
# Display metadata for a specific device
100+
if device_name in self.device_metadata:
101+
print(f"Metadata for device: {device_name}")
102+
print(self.device_metadata[device_name])
103+
else:
104+
print(f"No metadata found for device: {device_name}")
105+
else:
106+
# Display metadata for all devices
107+
for device, metadata in self.device_metadata.items():
108+
print(f"\nMetadata for device: {device}")
109+
print(metadata)

src/timeseries_shaper/loader/metadata/metadata_db_loader.py

Lines changed: 72 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,31 @@ class DatapointDB:
99
Class for accessing datapoints via a database.
1010
"""
1111

12-
def __init__(self, device_name: str, db_user: str, db_pass: str, db_host: str, output_path: str = "data_points.json"):
13-
self.device_name = device_name
12+
def __init__(self, device_names: List[str], db_user: str, db_pass: str, db_host: str, output_path: str = "data", required_uuid_list: List[str] = None, filter_enabled: bool = True):
13+
"""
14+
Initialize the DatapointDB class.
15+
16+
:param device_names: List of device names to retrieve metadata for.
17+
:param db_user: Database user.
18+
:param db_pass: Database password.
19+
:param db_host: Database host.
20+
:param output_path: Directory to save JSON files.
21+
:param required_uuid_list: List of UUIDs to filter the metadata (optional).
22+
:param filter_enabled: Whether to filter metadata by "enabled == True" and "archived == False" (default is True).
23+
"""
24+
self.device_names = device_names
1425
self.db_user = db_user
1526
self.db_pass = db_pass
1627
self.db_host = db_host
1728
self.output_path = output_path
18-
self.uuids: List[str] = []
19-
self.metadata: pd.DataFrame = pd.DataFrame([])
29+
self.required_uuid_list = required_uuid_list or []
30+
self.filter_enabled = filter_enabled
31+
self.device_metadata: Dict[str, pd.DataFrame] = {} # Store metadata for each device
32+
self.device_uuids: Dict[str, List[str]] = {} # Store UUIDs for each device
2033
self._db_access()
2134

2235
def _db_access(self) -> None:
23-
"""Connect to the database and retrieve metadata for the specified device."""
36+
"""Connect to the database and retrieve metadata for each device."""
2437
conn = psycopg2.connect(
2538
dbname="config_repository",
2639
user=self.db_user,
@@ -30,37 +43,65 @@ def _db_access(self) -> None:
3043
)
3144
cursor = conn.cursor()
3245

33-
cursor.execute(f"""
34-
SELECT dp.uuid, dp.label, dp.config
35-
FROM data_points dp
36-
INNER JOIN devices dev ON dev.id = dp.device_id
37-
WHERE dp.enabled = true AND dp.archived = false AND dev.name = %s
38-
""", (self.device_name,))
46+
for device_name in self.device_names:
47+
query = """
48+
SELECT dp.uuid, dp.label, dp.config
49+
FROM data_points dp
50+
INNER JOIN devices dev ON dev.id = dp.device_id
51+
WHERE dev.name = %s
52+
"""
53+
if self.filter_enabled:
54+
query += " AND dp.enabled = true AND dp.archived = false"
3955

40-
data_points = [{"uuid": r[0], "label": r[1], "config": r[2]} for r in cursor.fetchall()]
41-
conn.close()
56+
cursor.execute(query, (device_name,))
57+
data_points = [{"uuid": r[0], "label": r[1], "config": r[2]} for r in cursor.fetchall()]
58+
59+
# Convert to DataFrame and filter by required UUIDs if necessary
60+
metadata_df = pd.DataFrame(data_points)
61+
if not metadata_df.empty and self.required_uuid_list:
62+
metadata_df = metadata_df[metadata_df["uuid"].isin(self.required_uuid_list)]
4263

43-
self.metadata = pd.DataFrame(data_points)
44-
self._export_json(data_points)
45-
self.uuids = [data["uuid"] for data in data_points]
64+
# Store metadata and UUIDs for the device
65+
self.device_metadata[device_name] = metadata_df
66+
self.device_uuids[device_name] = metadata_df["uuid"].tolist()
67+
68+
# Export to JSON file
69+
self._export_json(metadata_df.to_dict(orient="records"), device_name)
70+
71+
conn.close()
4672

47-
def _export_json(self, data_points: List[Dict[str, str]]) -> None:
48-
"""Export data points to a JSON file."""
49-
with open(self.output_path, 'w') as f:
73+
def _export_json(self, data_points: List[Dict[str, str]], device_name: str) -> None:
74+
"""Export data points to a JSON file for the specified device."""
75+
file_name = f"{self.output_path}/{device_name.replace(' ', '_')}_data_points.json"
76+
with open(file_name, 'w') as f:
5077
json.dump(data_points, f, indent=2)
5178

52-
def get_uuids(self) -> List[str]:
53-
"""Return the list of UUIDs."""
54-
return self.uuids
79+
def get_all_uuids(self) -> Dict[str, List[str]]:
80+
"""Return a dictionary of UUIDs for each device."""
81+
return self.device_uuids
5582

56-
def get_full_config(self) -> List[Dict[str, str]]:
57-
"""Return the full configuration (uuid, label, config) as a list of dictionaries."""
58-
return self.metadata.to_dict(orient="records")
83+
def get_all_metadata(self) -> Dict[str, List[Dict[str, str]]]:
84+
"""Return a dictionary of metadata for each device."""
85+
return {device: metadata.to_dict(orient="records") for device, metadata in self.device_metadata.items()}
5986

60-
def get_uuid_label_pairs(self) -> List[Dict[str, str]]:
61-
"""Return a list of uuid and label pairs."""
62-
return self.metadata[['uuid', 'label']].to_dict(orient='records')
87+
def display_dataframe(self, device_name: str = None, aggregate: bool = False) -> None:
88+
"""
89+
Display metadata as a DataFrame for a specific device or all devices.
6390
64-
def display_dataframe(self) -> None:
65-
"""Print the metadata DataFrame to visually inspect data points."""
66-
print(self.metadata)
91+
:param device_name: Name of the device to display metadata for (optional).
92+
:param aggregate: If True, combine metadata from all devices into a single DataFrame.
93+
"""
94+
if aggregate:
95+
combined_df = pd.concat(self.device_metadata.values(), keys=self.device_metadata.keys())
96+
print("Aggregated metadata for all devices:")
97+
print(combined_df)
98+
elif device_name:
99+
if device_name in self.device_metadata:
100+
print(f"Metadata for device: {device_name}")
101+
print(self.device_metadata[device_name])
102+
else:
103+
print(f"No metadata found for device: {device_name}")
104+
else:
105+
for device, metadata in self.device_metadata.items():
106+
print(f"\nMetadata for device: {device}")
107+
print(metadata)

0 commit comments

Comments
 (0)