From d1a9f52188c13f3682c0ab1ae4d68408305ec14d Mon Sep 17 00:00:00 2001 From: Espen Enes Date: Sun, 24 Mar 2024 11:15:10 +0100 Subject: [PATCH 01/10] Refactor channel search functionality in `tdm_loader` The previous implementation of the channel search function in the `tdm_loader.py` was refactored. Search now matches channels directly without set generation and updates retrieval with more efficient lookups. The modification updates the testing to reflect the changes in the returned search results as well. Before the refactor searching throe 22000 channels took 6.5min and after it took 0.08s Also the test has changed to be similar when searching for "", all channels should be returned. --- tdm_loader/tdm_loader.py | 46 +++++++++++++--------------- tdm_loader/tests/test_non_zip_tdm.py | 4 ++- 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/tdm_loader/tdm_loader.py b/tdm_loader/tdm_loader.py index 4c39e19..b21bbc1 100644 --- a/tdm_loader/tdm_loader.py +++ b/tdm_loader/tdm_loader.py @@ -228,31 +228,27 @@ def channel_search(self, search_term): """ search_term = str(search_term).upper().replace(" ", "") - ind_chg_ch = [] - for j in range(len(self._xml_chgs)): - chs = self._channels_xml(j) - - if search_term == "": - found_terms = [ - ch.findtext("name") for ch in chs if ch.findtext("name") is None - ] - else: - found_terms = [ - ch.findtext("name") - for ch in chs - if ch.findtext("name") is not None - and ch.findtext("name") - .upper() - .replace(" ", "") - .find(str(search_term)) - >= 0 - ] - - for name in found_terms: - i = [ch.findtext("name") for ch in chs].index(name) - ind_chg_ch.append((name, j, i)) - - return ind_chg_ch + matched_channels = [] + channel_groups_cache = {} + channel_group_ids = {v: i for i, v in enumerate(x.get("id") for x in self._xml_chgs)} + + for channel in self._root.findall(".//tdm_channel"): + if channel_name := channel.find("name").text: + channel_id = channel.get("id") + group_uri = re.findall(r'id\("(.+?)"\)', channel.find("group").text) + group_id = channel_group_ids.get(group_uri[0]) + channels = channel_groups_cache.get(group_id) + + if not channels: + group = self._xml_chgs[group_id] + channels = {v: i for i, v in enumerate(re.findall(r'id\("(.+?)"\)', group.find("channels").text))} + channel_groups_cache[group_id] = channels + channel_id = channels.get(channel_id) + + if channel_name.upper().replace(" ", "").find(search_term) >= 0: + matched_channels.append((channel_name, group_id, channel_id)) + + return matched_channels def channel(self, channel_group, channel, occurrence=0, ch_occurrence=0): """Returns a data channel by its channel group and channel index. diff --git a/tdm_loader/tests/test_non_zip_tdm.py b/tdm_loader/tests/test_non_zip_tdm.py index b60e2f3..2d29488 100644 --- a/tdm_loader/tests/test_non_zip_tdm.py +++ b/tdm_loader/tests/test_non_zip_tdm.py @@ -204,7 +204,9 @@ def test_channel_search(tdm_file): ("Float as Float", 0, 1), ] - assert tdm_file.channel_search("") == [] + assert tdm_file.channel_search("") == [('Float_4_Integers', 0, 0), + ('Float as Float', 0, 1), + ('Integer32_with_max_min', 0, 2)] # pylint: disable=redefined-outer-name From aab3b66d9acfeecf860d726f2a086844a67929d6 Mon Sep 17 00:00:00 2001 From: EspenEnes Date: Mon, 25 Mar 2024 12:15:00 +0100 Subject: [PATCH 02/10] Update tdm_loader/tdm_loader.py Co-authored-by: Florian Dobener --- tdm_loader/tdm_loader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tdm_loader/tdm_loader.py b/tdm_loader/tdm_loader.py index b21bbc1..75c44e6 100644 --- a/tdm_loader/tdm_loader.py +++ b/tdm_loader/tdm_loader.py @@ -233,7 +233,8 @@ def channel_search(self, search_term): channel_group_ids = {v: i for i, v in enumerate(x.get("id") for x in self._xml_chgs)} for channel in self._root.findall(".//tdm_channel"): - if channel_name := channel.find("name").text: + channel_name = channel.find("name").text + if channel_name: channel_id = channel.get("id") group_uri = re.findall(r'id\("(.+?)"\)', channel.find("group").text) group_id = channel_group_ids.get(group_uri[0]) From c8ca0d174e36144ae6eb06d5bec5cd385b9e07b4 Mon Sep 17 00:00:00 2001 From: EspenEnes Date: Mon, 25 Mar 2024 12:40:44 +0100 Subject: [PATCH 03/10] Update tdm_loader/tdm_loader.py Co-authored-by: Florian Dobener --- tdm_loader/tdm_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tdm_loader/tdm_loader.py b/tdm_loader/tdm_loader.py index 75c44e6..36474c5 100644 --- a/tdm_loader/tdm_loader.py +++ b/tdm_loader/tdm_loader.py @@ -238,7 +238,7 @@ def channel_search(self, search_term): channel_id = channel.get("id") group_uri = re.findall(r'id\("(.+?)"\)', channel.find("group").text) group_id = channel_group_ids.get(group_uri[0]) - channels = channel_groups_cache.get(group_id) + channels = get_channels(group_id) if not channels: group = self._xml_chgs[group_id] From a554ea83e201ffea84fe48927e89efc3384219e6 Mon Sep 17 00:00:00 2001 From: EspenEnes Date: Mon, 25 Mar 2024 12:40:50 +0100 Subject: [PATCH 04/10] Update tdm_loader/tdm_loader.py Co-authored-by: Florian Dobener --- tdm_loader/tdm_loader.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tdm_loader/tdm_loader.py b/tdm_loader/tdm_loader.py index 36474c5..16284d5 100644 --- a/tdm_loader/tdm_loader.py +++ b/tdm_loader/tdm_loader.py @@ -240,10 +240,6 @@ def channel_search(self, search_term): group_id = channel_group_ids.get(group_uri[0]) channels = get_channels(group_id) - if not channels: - group = self._xml_chgs[group_id] - channels = {v: i for i, v in enumerate(re.findall(r'id\("(.+?)"\)', group.find("channels").text))} - channel_groups_cache[group_id] = channels channel_id = channels.get(channel_id) if channel_name.upper().replace(" ", "").find(search_term) >= 0: From e2f5a733e754f60ce63eb9ae721276ea60e08cf7 Mon Sep 17 00:00:00 2001 From: EspenEnes Date: Mon, 25 Mar 2024 12:40:58 +0100 Subject: [PATCH 05/10] Update tdm_loader/tdm_loader.py Co-authored-by: Florian Dobener --- tdm_loader/tdm_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tdm_loader/tdm_loader.py b/tdm_loader/tdm_loader.py index 16284d5..7b9e866 100644 --- a/tdm_loader/tdm_loader.py +++ b/tdm_loader/tdm_loader.py @@ -240,7 +240,7 @@ def channel_search(self, search_term): group_id = channel_group_ids.get(group_uri[0]) channels = get_channels(group_id) - channel_id = channels.get(channel_id) + channel_id = channels.get(channel.get("id")) if channel_name.upper().replace(" ", "").find(search_term) >= 0: matched_channels.append((channel_name, group_id, channel_id)) From e5809e2b9fa0c5c3af3ef67c8ca6caf8dd7d5fe7 Mon Sep 17 00:00:00 2001 From: EspenEnes Date: Mon, 25 Mar 2024 12:41:04 +0100 Subject: [PATCH 06/10] Update tdm_loader/tdm_loader.py Co-authored-by: Florian Dobener --- tdm_loader/tdm_loader.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tdm_loader/tdm_loader.py b/tdm_loader/tdm_loader.py index 7b9e866..d6596a8 100644 --- a/tdm_loader/tdm_loader.py +++ b/tdm_loader/tdm_loader.py @@ -235,7 +235,6 @@ def channel_search(self, search_term): for channel in self._root.findall(".//tdm_channel"): channel_name = channel.find("name").text if channel_name: - channel_id = channel.get("id") group_uri = re.findall(r'id\("(.+?)"\)', channel.find("group").text) group_id = channel_group_ids.get(group_uri[0]) channels = get_channels(group_id) From 159ab797d1f0d775ad7f989492868d106b3fbcd6 Mon Sep 17 00:00:00 2001 From: EspenEnes Date: Mon, 25 Mar 2024 12:41:10 +0100 Subject: [PATCH 07/10] Update tdm_loader/tdm_loader.py Co-authored-by: Florian Dobener --- tdm_loader/tdm_loader.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tdm_loader/tdm_loader.py b/tdm_loader/tdm_loader.py index d6596a8..277ddce 100644 --- a/tdm_loader/tdm_loader.py +++ b/tdm_loader/tdm_loader.py @@ -229,7 +229,6 @@ def channel_search(self, search_term): search_term = str(search_term).upper().replace(" ", "") matched_channels = [] - channel_groups_cache = {} channel_group_ids = {v: i for i, v in enumerate(x.get("id") for x in self._xml_chgs)} for channel in self._root.findall(".//tdm_channel"): From fce26819614b5aac13070abae2c4f1713804fb2d Mon Sep 17 00:00:00 2001 From: Espen Enes Date: Mon, 25 Mar 2024 12:51:42 +0100 Subject: [PATCH 08/10] Implement caching and optimize channel search in `tdm_loader` A cache decorator is introduced for the `get_channels` function to speed up repeated lookups, replacing the previous, dictionary lookup. --- tdm_loader/tdm_loader.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tdm_loader/tdm_loader.py b/tdm_loader/tdm_loader.py index 277ddce..3523003 100644 --- a/tdm_loader/tdm_loader.py +++ b/tdm_loader/tdm_loader.py @@ -26,6 +26,7 @@ import os import zipfile import re +from functools import cache from xml.etree import ElementTree import warnings @@ -168,6 +169,11 @@ def _get_usi_from_txt(txt): return [] return re.findall(r'id\("(.+?)"\)', txt) + @cache + def get_channels(self, group_id): + group = self._xml_chgs[group_id] + return {v: i for i, v in enumerate(re.findall(r'id\("(.+?)"\)', group.find("channels").text))} + def channel_group_search(self, search_term): """Returns a list of channel group names that contain ``search term``. Results are independent of case and spaces in the channel name. @@ -236,7 +242,7 @@ def channel_search(self, search_term): if channel_name: group_uri = re.findall(r'id\("(.+?)"\)', channel.find("group").text) group_id = channel_group_ids.get(group_uri[0]) - channels = get_channels(group_id) + channels = self.get_channels(group_id) channel_id = channels.get(channel.get("id")) From 5b006cb8cd28c1ef25bd1fd731f8438d508ed494 Mon Sep 17 00:00:00 2001 From: Espen Enes Date: Mon, 25 Mar 2024 15:28:00 +0100 Subject: [PATCH 09/10] Rename method get_channels to _get_channels in tdm_loader The public `get_channels` function is renamed to the private method `_get_channels` in 'tdm_loader.py'. This is due to internal use only, to provide more clarity and prevent unintended external access. --- tdm_loader/tdm_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tdm_loader/tdm_loader.py b/tdm_loader/tdm_loader.py index 3523003..78fc1bc 100644 --- a/tdm_loader/tdm_loader.py +++ b/tdm_loader/tdm_loader.py @@ -170,7 +170,7 @@ def _get_usi_from_txt(txt): return re.findall(r'id\("(.+?)"\)', txt) @cache - def get_channels(self, group_id): + def _get_channels(self, group_id): group = self._xml_chgs[group_id] return {v: i for i, v in enumerate(re.findall(r'id\("(.+?)"\)', group.find("channels").text))} From 6e4e557d8adaf603df0a36c33861a126ac3697b0 Mon Sep 17 00:00:00 2001 From: Espen Enes Date: Mon, 25 Mar 2024 15:30:17 +0100 Subject: [PATCH 10/10] Correct typo in method name in tdm_loader The method name "get_channels" has been corrected to "_get_channels" in the 'tdm_loader.py' file. The change was necessitated by the requirement for the method to be private, hence the underscore, as it's only meant for internal use, preventing unintended external access. --- tdm_loader/tdm_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tdm_loader/tdm_loader.py b/tdm_loader/tdm_loader.py index 78fc1bc..72283d1 100644 --- a/tdm_loader/tdm_loader.py +++ b/tdm_loader/tdm_loader.py @@ -242,7 +242,7 @@ def channel_search(self, search_term): if channel_name: group_uri = re.findall(r'id\("(.+?)"\)', channel.find("group").text) group_id = channel_group_ids.get(group_uri[0]) - channels = self.get_channels(group_id) + channels = self._get_channels(group_id) channel_id = channels.get(channel.get("id"))