From 6ab8f488adea40a2b59b0ad7f7889da720d4d554 Mon Sep 17 00:00:00 2001 From: Lohit Vankineni Date: Fri, 10 Jan 2025 10:19:37 -0800 Subject: [PATCH] Lazy load imports inside init files --- haystack/components/converters/__init__.py | 78 +++++++++++-------- haystack/components/embedders/__init__.py | 56 ++++++++----- haystack/components/generators/__init__.py | 46 +++++++---- haystack/components/preprocessors/__init__.py | 34 ++++++-- haystack/components/rankers/__init__.py | 42 ++++++---- haystack/components/retrievers/__init__.py | 30 +++++-- haystack/components/routers/__init__.py | 46 +++++++---- haystack/lazy_imports.py | 20 +++++ haystack/utils/__init__.py | 72 ++++++++++------- ...ed-init-lazy-imports-042eebe124f98e5f.yaml | 4 + 10 files changed, 294 insertions(+), 134 deletions(-) create mode 100644 releasenotes/notes/added-init-lazy-imports-042eebe124f98e5f.yaml diff --git a/haystack/components/converters/__init__.py b/haystack/components/converters/__init__.py index 2c7ed33505..f5c09a0a1e 100644 --- a/haystack/components/converters/__init__.py +++ b/haystack/components/converters/__init__.py @@ -2,35 +2,49 @@ # # SPDX-License-Identifier: Apache-2.0 -from haystack.components.converters.azure import AzureOCRDocumentConverter -from haystack.components.converters.csv import CSVToDocument -from haystack.components.converters.docx import DOCXMetadata, DOCXToDocument -from haystack.components.converters.html import HTMLToDocument -from haystack.components.converters.json import JSONConverter -from haystack.components.converters.markdown import MarkdownToDocument -from haystack.components.converters.openapi_functions import OpenAPIServiceToFunctions -from haystack.components.converters.output_adapter import OutputAdapter -from haystack.components.converters.pdfminer import PDFMinerToDocument -from haystack.components.converters.pptx import PPTXToDocument -from haystack.components.converters.pypdf import PyPDFToDocument -from haystack.components.converters.tika import TikaDocumentConverter -from haystack.components.converters.txt import TextFileToDocument -from haystack.components.converters.xlsx import XLSXToDocument - -__all__ = [ - "TextFileToDocument", - "TikaDocumentConverter", - "AzureOCRDocumentConverter", - "PyPDFToDocument", - "PDFMinerToDocument", - "HTMLToDocument", - "MarkdownToDocument", - "OpenAPIServiceToFunctions", - "OutputAdapter", - "DOCXToDocument", - "DOCXMetadata", - "PPTXToDocument", - "CSVToDocument", - "JSONConverter", - "XLSXToDocument", -] +from typing import TYPE_CHECKING + +from haystack.lazy_imports import lazy_dir, lazy_getattr + +if TYPE_CHECKING: + from haystack.components.converters.azure import AzureOCRDocumentConverter + from haystack.components.converters.csv import CSVToDocument + from haystack.components.converters.docx import DOCXMetadata, DOCXToDocument + from haystack.components.converters.html import HTMLToDocument + from haystack.components.converters.json import JSONConverter + from haystack.components.converters.markdown import MarkdownToDocument + from haystack.components.converters.openapi_functions import OpenAPIServiceToFunctions + from haystack.components.converters.output_adapter import OutputAdapter + from haystack.components.converters.pdfminer import PDFMinerToDocument + from haystack.components.converters.pptx import PPTXToDocument + from haystack.components.converters.pypdf import PyPDFToDocument + from haystack.components.converters.tika import TikaDocumentConverter + from haystack.components.converters.txt import TextFileToDocument + + +_lazy_imports = { + "TextFileToDocument": "haystack.components.converters.txt", + "TikaDocumentConverter": "haystack.components.converters.tika", + "AzureOCRDocumentConverter": "haystack.components.converters.txt", + "PyPDFToDocument": "haystack.components.converters.pypdf", + "PDFMinerToDocument": "haystack.components.converters.pdfminer", + "HTMLToDocument": "haystack.components.converters.html", + "MarkdownToDocument": "haystack.components.converters.markdown", + "OpenAPIServiceToFunctions": "haystack.components.converters.openapi_functions", + "OutputAdapter": "haystack.components.converters.output_adapter", + "DOCXToDocument": "haystack.components.converters.docx", + "DOCXMetadata": "haystack.components.converters.docx", + "PPTXToDocument": "haystack.components.converters.pptx", + "CSVToDocument": "haystack.components.converters.csv", + "JSONConverter": "haystack.components.converters.json", +} + +__all__ = list(_lazy_imports.keys()) + + +def __getattr__(name): + return lazy_getattr(name, _lazy_imports, __name__) + + +def __dir__(): + return lazy_dir(_lazy_imports) diff --git a/haystack/components/embedders/__init__.py b/haystack/components/embedders/__init__.py index 2b6cf4301e..efaab0f3f4 100644 --- a/haystack/components/embedders/__init__.py +++ b/haystack/components/embedders/__init__.py @@ -2,22 +2,40 @@ # # SPDX-License-Identifier: Apache-2.0 -from haystack.components.embedders.azure_document_embedder import AzureOpenAIDocumentEmbedder -from haystack.components.embedders.azure_text_embedder import AzureOpenAITextEmbedder -from haystack.components.embedders.hugging_face_api_document_embedder import HuggingFaceAPIDocumentEmbedder -from haystack.components.embedders.hugging_face_api_text_embedder import HuggingFaceAPITextEmbedder -from haystack.components.embedders.openai_document_embedder import OpenAIDocumentEmbedder -from haystack.components.embedders.openai_text_embedder import OpenAITextEmbedder -from haystack.components.embedders.sentence_transformers_document_embedder import SentenceTransformersDocumentEmbedder -from haystack.components.embedders.sentence_transformers_text_embedder import SentenceTransformersTextEmbedder - -__all__ = [ - "HuggingFaceAPITextEmbedder", - "HuggingFaceAPIDocumentEmbedder", - "SentenceTransformersTextEmbedder", - "SentenceTransformersDocumentEmbedder", - "OpenAITextEmbedder", - "OpenAIDocumentEmbedder", - "AzureOpenAITextEmbedder", - "AzureOpenAIDocumentEmbedder", -] +from typing import TYPE_CHECKING + +from haystack.lazy_imports import lazy_dir, lazy_getattr + +if TYPE_CHECKING: + from haystack.components.embedders.azure_document_embedder import AzureOpenAIDocumentEmbedder + from haystack.components.embedders.azure_text_embedder import AzureOpenAITextEmbedder + from haystack.components.embedders.hugging_face_api_document_embedder import HuggingFaceAPIDocumentEmbedder + from haystack.components.embedders.hugging_face_api_text_embedder import HuggingFaceAPITextEmbedder + from haystack.components.embedders.openai_document_embedder import OpenAIDocumentEmbedder + from haystack.components.embedders.openai_text_embedder import OpenAITextEmbedder + from haystack.components.embedders.sentence_transformers_document_embedder import ( + SentenceTransformersDocumentEmbedder, + ) + from haystack.components.embedders.sentence_transformers_text_embedder import SentenceTransformersTextEmbedder + + +_lazy_imports = { + "AzureOpenAIDocumentEmbedder": "haystack.components.embedders.azure_document_embedder", + "AzureOpenAITextEmbedder": "haystack.components.embedders.azure_text_embedder", + "HuggingFaceAPIDocumentEmbedder": "haystack.components.embedders.hugging_face_api_document_embedder", + "HuggingFaceAPITextEmbedder": "haystack.components.embedders.hugging_face_api_text_embedder", + "OpenAIDocumentEmbedder": "haystack.components.embedders.openai_document_embedder", + "OpenAITextEmbedder": "haystack.components.embedders.openai_text_embedder", + "SentenceTransformersDocumentEmbedder": "haystack.components.embedders.sentence_transformers_document_embedder", + "SentenceTransformersTextEmbedder": "haystack.components.embedders.sentence_transformers_text_embedder", +} + +__all__ = list(_lazy_imports.keys()) + + +def __getattr__(name): + return lazy_getattr(name, _lazy_imports, __name__) + + +def __dir__(): + return lazy_dir(_lazy_imports) diff --git a/haystack/components/generators/__init__.py b/haystack/components/generators/__init__.py index 952c2dadd2..5d12da8728 100644 --- a/haystack/components/generators/__init__.py +++ b/haystack/components/generators/__init__.py @@ -2,18 +2,34 @@ # # SPDX-License-Identifier: Apache-2.0 -from haystack.components.generators.openai import ( # noqa: I001 (otherwise we end up with partial imports) - OpenAIGenerator, -) -from haystack.components.generators.azure import AzureOpenAIGenerator -from haystack.components.generators.hugging_face_local import HuggingFaceLocalGenerator -from haystack.components.generators.hugging_face_api import HuggingFaceAPIGenerator -from haystack.components.generators.openai_dalle import DALLEImageGenerator - -__all__ = [ - "HuggingFaceLocalGenerator", - "HuggingFaceAPIGenerator", - "OpenAIGenerator", - "AzureOpenAIGenerator", - "DALLEImageGenerator", -] +from typing import TYPE_CHECKING + +from haystack.lazy_imports import lazy_dir, lazy_getattr + +if TYPE_CHECKING: + from haystack.components.generators.openai import ( # noqa: I001 (otherwise we end up with partial imports) + OpenAIGenerator, + ) + from haystack.components.generators.azure import AzureOpenAIGenerator + from haystack.components.generators.hugging_face_local import HuggingFaceLocalGenerator + from haystack.components.generators.hugging_face_api import HuggingFaceAPIGenerator + from haystack.components.generators.openai_dalle import DALLEImageGenerator + + +_lazy_imports = { + "OpenAIGenerator": "haystack.components.generators.openai", + "AzureOpenAIGenerator": "haystack.components.generators.azure", + "HuggingFaceLocalGenerator": "haystack.components.generators.hugging_face_local", + "HuggingFaceAPIGenerator": "haystack.components.generators.hugging_face_api", + "DALLEImageGenerator": "haystack.components.generators.openai_dalle", +} + +__all__ = list(_lazy_imports.keys()) + + +def __getattr__(name): + return lazy_getattr(name, _lazy_imports, __name__) + + +def __dir__(): + return lazy_dir(_lazy_imports) diff --git a/haystack/components/preprocessors/__init__.py b/haystack/components/preprocessors/__init__.py index 467f16ceeb..46685f0efa 100644 --- a/haystack/components/preprocessors/__init__.py +++ b/haystack/components/preprocessors/__init__.py @@ -2,10 +2,32 @@ # # SPDX-License-Identifier: Apache-2.0 -from .document_cleaner import DocumentCleaner -from .document_splitter import DocumentSplitter -from .nltk_document_splitter import NLTKDocumentSplitter -from .sentence_tokenizer import SentenceSplitter -from .text_cleaner import TextCleaner +from typing import TYPE_CHECKING -__all__ = ["DocumentSplitter", "DocumentCleaner", "NLTKDocumentSplitter", "SentenceSplitter", "TextCleaner"] +from haystack.lazy_imports import lazy_dir, lazy_getattr + +if TYPE_CHECKING: + from haystack.components.preprocessors.document_cleaner import DocumentCleaner + from haystack.components.preprocessors.document_splitter import DocumentSplitter + from haystack.components.preprocessors.nltk_document_splitter import NLTKDocumentSplitter + from haystack.components.preprocessors.sentence_tokenizer import SentenceSplitter + from haystack.components.preprocessors.text_cleaner import TextCleaner + + +_lazy_imports = { + "DocumentCleaner": "haystack.components.preprocessors.document_cleaner", + "DocumentSplitter": "haystack.components.preprocessors.document_splitter", + "NLTKDocumentSplitter": "haystack.components.preprocessors.nltk_document_splitter", + "SentenceSplitter": "haystack.components.preprocessors.sentence_tokenizer", + "TextCleaner": "haystack.components.preprocessors.text_cleaner", +} + +__all__ = list(_lazy_imports.keys()) + + +def __getattr__(name): + return lazy_getattr(name, _lazy_imports, __name__) + + +def __dir__(): + return lazy_dir(_lazy_imports) diff --git a/haystack/components/rankers/__init__.py b/haystack/components/rankers/__init__.py index e76fa68c69..46287244c0 100644 --- a/haystack/components/rankers/__init__.py +++ b/haystack/components/rankers/__init__.py @@ -2,16 +2,32 @@ # # SPDX-License-Identifier: Apache-2.0 -from haystack.components.rankers.lost_in_the_middle import LostInTheMiddleRanker -from haystack.components.rankers.meta_field import MetaFieldRanker -from haystack.components.rankers.meta_field_grouping_ranker import MetaFieldGroupingRanker -from haystack.components.rankers.sentence_transformers_diversity import SentenceTransformersDiversityRanker -from haystack.components.rankers.transformers_similarity import TransformersSimilarityRanker - -__all__ = [ - "LostInTheMiddleRanker", - "MetaFieldRanker", - "MetaFieldGroupingRanker", - "SentenceTransformersDiversityRanker", - "TransformersSimilarityRanker", -] +from typing import TYPE_CHECKING + +from haystack.lazy_imports import lazy_dir, lazy_getattr + +if TYPE_CHECKING: + from haystack.components.rankers.lost_in_the_middle import LostInTheMiddleRanker + from haystack.components.rankers.meta_field import MetaFieldRanker + from haystack.components.rankers.meta_field_grouping_ranker import MetaFieldGroupingRanker + from haystack.components.rankers.sentence_transformers_diversity import SentenceTransformersDiversityRanker + from haystack.components.rankers.transformers_similarity import TransformersSimilarityRanker + + +_lazy_imports = { + "LostInTheMiddleRanker": "haystack.components.rankers.lost_in_the_middle", + "MetaFieldRanker": "haystack.components.rankers.meta_field", + "MetaFieldGroupingRanker": "haystack.components.rankers.meta_field_grouping_ranker", + "SentenceTransformersDiversityRanker": "haystack.components.rankers.sentence_transformers_diversity", + "TransformersSimilarityRanker": "haystack.components.rankers.transformers_similarity", +} + +__all__ = list(_lazy_imports.keys()) + + +def __getattr__(name): + return lazy_getattr(name, _lazy_imports, __name__) + + +def __dir__(): + return lazy_dir(_lazy_imports) diff --git a/haystack/components/retrievers/__init__.py b/haystack/components/retrievers/__init__.py index 91d1288a19..c1f1773101 100644 --- a/haystack/components/retrievers/__init__.py +++ b/haystack/components/retrievers/__init__.py @@ -2,9 +2,29 @@ # # SPDX-License-Identifier: Apache-2.0 -from haystack.components.retrievers.filter_retriever import FilterRetriever -from haystack.components.retrievers.in_memory.bm25_retriever import InMemoryBM25Retriever -from haystack.components.retrievers.in_memory.embedding_retriever import InMemoryEmbeddingRetriever -from haystack.components.retrievers.sentence_window_retriever import SentenceWindowRetriever +from typing import TYPE_CHECKING -__all__ = ["FilterRetriever", "InMemoryEmbeddingRetriever", "InMemoryBM25Retriever", "SentenceWindowRetriever"] +from haystack.lazy_imports import lazy_dir, lazy_getattr + +if TYPE_CHECKING: + from haystack.components.retrievers.filter_retriever import FilterRetriever + from haystack.components.retrievers.in_memory.bm25_retriever import InMemoryBM25Retriever + from haystack.components.retrievers.in_memory.embedding_retriever import InMemoryEmbeddingRetriever + from haystack.components.retrievers.sentence_window_retriever import SentenceWindowRetriever + +_lazy_imports = { + "FilterRetriever": "haystack.components.retrievers.filter_retriever", + "InMemoryBM25Retriever": "haystack.components.retrievers.in_memory.bm25_retriever", + "InMemoryEmbeddingRetriever": "haystack.components.retrievers.in_memory.embedding_retriever", + "SentenceWindowRetriever": "haystack.components.retrievers.sentence_window_retriever", +} + +__all__ = list(_lazy_imports.keys()) + + +def __getattr__(name): + return lazy_getattr(name, _lazy_imports, __name__) + + +def __dir__(): + return lazy_dir(_lazy_imports) diff --git a/haystack/components/routers/__init__.py b/haystack/components/routers/__init__.py index f22d69917d..f6c28f2b0a 100644 --- a/haystack/components/routers/__init__.py +++ b/haystack/components/routers/__init__.py @@ -2,18 +2,34 @@ # # SPDX-License-Identifier: Apache-2.0 -from haystack.components.routers.conditional_router import ConditionalRouter -from haystack.components.routers.file_type_router import FileTypeRouter -from haystack.components.routers.metadata_router import MetadataRouter -from haystack.components.routers.text_language_router import TextLanguageRouter -from haystack.components.routers.transformers_text_router import TransformersTextRouter -from haystack.components.routers.zero_shot_text_router import TransformersZeroShotTextRouter - -__all__ = [ - "FileTypeRouter", - "MetadataRouter", - "TextLanguageRouter", - "ConditionalRouter", - "TransformersZeroShotTextRouter", - "TransformersTextRouter", -] +from typing import TYPE_CHECKING + +from haystack.lazy_imports import lazy_dir, lazy_getattr + +if TYPE_CHECKING: + from haystack.components.routers.conditional_router import ConditionalRouter + from haystack.components.routers.file_type_router import FileTypeRouter + from haystack.components.routers.metadata_router import MetadataRouter + from haystack.components.routers.text_language_router import TextLanguageRouter + from haystack.components.routers.transformers_text_router import TransformersTextRouter + from haystack.components.routers.zero_shot_text_router import TransformersZeroShotTextRouter + + +_lazy_imports = { + "ConditionalRouter": "haystack.components.routers.conditional_router", + "FileTypeRouter": "haystack.components.routers.file_type_router", + "MetadataRouter": "haystack.components.routers.metadata_router", + "TextLanguageRouter": "haystack.components.routers.text_language_router", + "TransformersTextRouter": "haystack.components.routers.transformers_text_router", + "TransformersZeroShotTextRouter": "haystack.components.routers.zero_shot_text_router", +} + +__all__ = list(_lazy_imports.keys()) + + +def __getattr__(name): + return lazy_getattr(name, _lazy_imports, __name__) + + +def __dir__(): + return lazy_dir(_lazy_imports) diff --git a/haystack/lazy_imports.py b/haystack/lazy_imports.py index a2688fb624..b2c23e2ede 100644 --- a/haystack/lazy_imports.py +++ b/haystack/lazy_imports.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 +import importlib from types import TracebackType from typing import Optional, Type @@ -48,3 +49,22 @@ def __exit__( self._deferred = (exc_value, message) return True return None + + +def lazy_getattr(attr_name, lazy_imports, parent_module): + """ + Lazy loads a module and fetches the requested attribute. + """ + + if attr_name in lazy_imports: + module = importlib.import_module(lazy_imports[attr_name]) + return getattr(module, attr_name) + + raise AttributeError(f"module ${parent_module} has no attribute {attr_name}") + + +def lazy_dir(lazy_imports): + """ + Returns a dynamically generated list of all available attributes. + """ + return sorted(set(lazy_imports.keys()) | set(globals())) diff --git a/haystack/utils/__init__.py b/haystack/utils/__init__.py index cc46f07f43..a4cf97528a 100644 --- a/haystack/utils/__init__.py +++ b/haystack/utils/__init__.py @@ -1,34 +1,48 @@ # SPDX-FileCopyrightText: 2022-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 +from typing import TYPE_CHECKING + +from haystack.lazy_imports import lazy_dir, lazy_getattr -from .auth import Secret, deserialize_secrets_inplace -from .callable_serialization import deserialize_callable, serialize_callable -from .device import ComponentDevice, Device, DeviceMap, DeviceType -from .docstore_deserialization import deserialize_document_store_in_init_params_inplace from .expit import expit -from .filters import document_matches_filter, raise_on_invalid_filter_syntax -from .jinja2_extensions import Jinja2TimeExtension -from .jupyter import is_in_jupyter -from .requests_utils import request_with_retry -from .type_serialization import deserialize_type, serialize_type - -__all__ = [ - "Secret", - "deserialize_secrets_inplace", - "ComponentDevice", - "Device", - "DeviceMap", - "DeviceType", - "expit", - "document_matches_filter", - "raise_on_invalid_filter_syntax", - "is_in_jupyter", - "request_with_retry", - "serialize_callable", - "deserialize_callable", - "serialize_type", - "deserialize_type", - "deserialize_document_store_in_init_params_inplace", - "Jinja2TimeExtension", -] + +if TYPE_CHECKING: + from .auth import Secret, deserialize_secrets_inplace + from .callable_serialization import deserialize_callable, serialize_callable + from .device import ComponentDevice, Device, DeviceMap, DeviceType + from .docstore_deserialization import deserialize_document_store_in_init_params_inplace + from .filters import document_matches_filter, raise_on_invalid_filter_syntax + from .jinja2_extensions import Jinja2TimeExtension + from .jupyter import is_in_jupyter + from .requests_utils import request_with_retry + from .type_serialization import deserialize_type, serialize_type + +_lazy_imports = { + "Secret": "haystack.utils.auth", + "deserialize_secrets_inplace": "haystack.utils.auth", + "deserialize_callable": "haystack.utils.callable_serialization", + "serialize_callable": "haystack.utils.callable_serialization", + "ComponentDevice": "haystack.utils.device", + "Device": "haystack.utils.device", + "DeviceMap": "haystack.utils.device", + "DeviceType": "haystack.utils.device", + "deserialize_document_store_in_init_params_inplace": "haystack.utils.docstore_deserialization", + "document_matches_filter": "haystack.utils.filters", + "raise_on_invalid_filter_syntax": "haystack.utils.filters", + "Jinja2TimeExtension": "haystack.utils.jinja2_extensions", + "is_in_jupyter": "haystack.utils.jupyter", + "request_with_retry": "haystack.utils.requests_utils", + "deserialize_type": "haystack.utils.type_serialization", + "serialize_type": "haystack.utils.type_serialization", +} + +__all__ = list(_lazy_imports.keys()) + + +def __getattr__(name): + return lazy_getattr(name, _lazy_imports, __name__) + + +def __dir__(): + return lazy_dir(_lazy_imports) diff --git a/releasenotes/notes/added-init-lazy-imports-042eebe124f98e5f.yaml b/releasenotes/notes/added-init-lazy-imports-042eebe124f98e5f.yaml new file mode 100644 index 0000000000..f1e52c8f87 --- /dev/null +++ b/releasenotes/notes/added-init-lazy-imports-042eebe124f98e5f.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + Updated init.py of components and utils directories to lazy import in order to reduce package load times