Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Address performance by lazy loading imports inside init files #8706

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 46 additions & 32 deletions haystack/components/converters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,49 @@
#
# SPDX-License-Identifier: Apache-2.0

from haystack.components.converters.azure import AzureOCRDocumentConverter
from haystack.components.converters.csv import CSVToDocument
from haystack.components.converters.docx import DOCXMetadata, DOCXToDocument
from haystack.components.converters.html import HTMLToDocument
from haystack.components.converters.json import JSONConverter
from haystack.components.converters.markdown import MarkdownToDocument
from haystack.components.converters.openapi_functions import OpenAPIServiceToFunctions
from haystack.components.converters.output_adapter import OutputAdapter
from haystack.components.converters.pdfminer import PDFMinerToDocument
from haystack.components.converters.pptx import PPTXToDocument
from haystack.components.converters.pypdf import PyPDFToDocument
from haystack.components.converters.tika import TikaDocumentConverter
from haystack.components.converters.txt import TextFileToDocument
from haystack.components.converters.xlsx import XLSXToDocument

__all__ = [
"TextFileToDocument",
"TikaDocumentConverter",
"AzureOCRDocumentConverter",
"PyPDFToDocument",
"PDFMinerToDocument",
"HTMLToDocument",
"MarkdownToDocument",
"OpenAPIServiceToFunctions",
"OutputAdapter",
"DOCXToDocument",
"DOCXMetadata",
"PPTXToDocument",
"CSVToDocument",
"JSONConverter",
"XLSXToDocument",
]
from typing import TYPE_CHECKING

from haystack.lazy_imports import lazy_dir, lazy_getattr

if TYPE_CHECKING:
from haystack.components.converters.azure import AzureOCRDocumentConverter
from haystack.components.converters.csv import CSVToDocument
from haystack.components.converters.docx import DOCXMetadata, DOCXToDocument
from haystack.components.converters.html import HTMLToDocument
from haystack.components.converters.json import JSONConverter
from haystack.components.converters.markdown import MarkdownToDocument
from haystack.components.converters.openapi_functions import OpenAPIServiceToFunctions
from haystack.components.converters.output_adapter import OutputAdapter
from haystack.components.converters.pdfminer import PDFMinerToDocument
from haystack.components.converters.pptx import PPTXToDocument
from haystack.components.converters.pypdf import PyPDFToDocument
from haystack.components.converters.tika import TikaDocumentConverter
from haystack.components.converters.txt import TextFileToDocument


_lazy_imports = {
"TextFileToDocument": "haystack.components.converters.txt",
"TikaDocumentConverter": "haystack.components.converters.tika",
"AzureOCRDocumentConverter": "haystack.components.converters.txt",
"PyPDFToDocument": "haystack.components.converters.pypdf",
"PDFMinerToDocument": "haystack.components.converters.pdfminer",
"HTMLToDocument": "haystack.components.converters.html",
"MarkdownToDocument": "haystack.components.converters.markdown",
"OpenAPIServiceToFunctions": "haystack.components.converters.openapi_functions",
"OutputAdapter": "haystack.components.converters.output_adapter",
"DOCXToDocument": "haystack.components.converters.docx",
"DOCXMetadata": "haystack.components.converters.docx",
"PPTXToDocument": "haystack.components.converters.pptx",
"CSVToDocument": "haystack.components.converters.csv",
"JSONConverter": "haystack.components.converters.json",
}

__all__ = list(_lazy_imports.keys())


def __getattr__(name):
return lazy_getattr(name, _lazy_imports, __name__)


def __dir__():
return lazy_dir(_lazy_imports)
56 changes: 37 additions & 19 deletions haystack/components/embedders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,40 @@
#
# SPDX-License-Identifier: Apache-2.0

from haystack.components.embedders.azure_document_embedder import AzureOpenAIDocumentEmbedder
from haystack.components.embedders.azure_text_embedder import AzureOpenAITextEmbedder
from haystack.components.embedders.hugging_face_api_document_embedder import HuggingFaceAPIDocumentEmbedder
from haystack.components.embedders.hugging_face_api_text_embedder import HuggingFaceAPITextEmbedder
from haystack.components.embedders.openai_document_embedder import OpenAIDocumentEmbedder
from haystack.components.embedders.openai_text_embedder import OpenAITextEmbedder
from haystack.components.embedders.sentence_transformers_document_embedder import SentenceTransformersDocumentEmbedder
from haystack.components.embedders.sentence_transformers_text_embedder import SentenceTransformersTextEmbedder

__all__ = [
"HuggingFaceAPITextEmbedder",
"HuggingFaceAPIDocumentEmbedder",
"SentenceTransformersTextEmbedder",
"SentenceTransformersDocumentEmbedder",
"OpenAITextEmbedder",
"OpenAIDocumentEmbedder",
"AzureOpenAITextEmbedder",
"AzureOpenAIDocumentEmbedder",
]
from typing import TYPE_CHECKING

from haystack.lazy_imports import lazy_dir, lazy_getattr

if TYPE_CHECKING:
from haystack.components.embedders.azure_document_embedder import AzureOpenAIDocumentEmbedder
from haystack.components.embedders.azure_text_embedder import AzureOpenAITextEmbedder
from haystack.components.embedders.hugging_face_api_document_embedder import HuggingFaceAPIDocumentEmbedder
from haystack.components.embedders.hugging_face_api_text_embedder import HuggingFaceAPITextEmbedder
from haystack.components.embedders.openai_document_embedder import OpenAIDocumentEmbedder
from haystack.components.embedders.openai_text_embedder import OpenAITextEmbedder
from haystack.components.embedders.sentence_transformers_document_embedder import (
SentenceTransformersDocumentEmbedder,
)
from haystack.components.embedders.sentence_transformers_text_embedder import SentenceTransformersTextEmbedder


_lazy_imports = {
"AzureOpenAIDocumentEmbedder": "haystack.components.embedders.azure_document_embedder",
"AzureOpenAITextEmbedder": "haystack.components.embedders.azure_text_embedder",
"HuggingFaceAPIDocumentEmbedder": "haystack.components.embedders.hugging_face_api_document_embedder",
"HuggingFaceAPITextEmbedder": "haystack.components.embedders.hugging_face_api_text_embedder",
"OpenAIDocumentEmbedder": "haystack.components.embedders.openai_document_embedder",
"OpenAITextEmbedder": "haystack.components.embedders.openai_text_embedder",
"SentenceTransformersDocumentEmbedder": "haystack.components.embedders.sentence_transformers_document_embedder",
"SentenceTransformersTextEmbedder": "haystack.components.embedders.sentence_transformers_text_embedder",
}

__all__ = list(_lazy_imports.keys())


def __getattr__(name):
return lazy_getattr(name, _lazy_imports, __name__)


def __dir__():
return lazy_dir(_lazy_imports)
46 changes: 31 additions & 15 deletions haystack/components/generators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,34 @@
#
# SPDX-License-Identifier: Apache-2.0

from haystack.components.generators.openai import ( # noqa: I001 (otherwise we end up with partial imports)
OpenAIGenerator,
)
from haystack.components.generators.azure import AzureOpenAIGenerator
from haystack.components.generators.hugging_face_local import HuggingFaceLocalGenerator
from haystack.components.generators.hugging_face_api import HuggingFaceAPIGenerator
from haystack.components.generators.openai_dalle import DALLEImageGenerator

__all__ = [
"HuggingFaceLocalGenerator",
"HuggingFaceAPIGenerator",
"OpenAIGenerator",
"AzureOpenAIGenerator",
"DALLEImageGenerator",
]
from typing import TYPE_CHECKING

from haystack.lazy_imports import lazy_dir, lazy_getattr

if TYPE_CHECKING:
from haystack.components.generators.openai import ( # noqa: I001 (otherwise we end up with partial imports)
OpenAIGenerator,
)
from haystack.components.generators.azure import AzureOpenAIGenerator
from haystack.components.generators.hugging_face_local import HuggingFaceLocalGenerator
from haystack.components.generators.hugging_face_api import HuggingFaceAPIGenerator
from haystack.components.generators.openai_dalle import DALLEImageGenerator


_lazy_imports = {
"OpenAIGenerator": "haystack.components.generators.openai",
"AzureOpenAIGenerator": "haystack.components.generators.azure",
"HuggingFaceLocalGenerator": "haystack.components.generators.hugging_face_local",
"HuggingFaceAPIGenerator": "haystack.components.generators.hugging_face_api",
"DALLEImageGenerator": "haystack.components.generators.openai_dalle",
}

__all__ = list(_lazy_imports.keys())


def __getattr__(name):
return lazy_getattr(name, _lazy_imports, __name__)


def __dir__():
return lazy_dir(_lazy_imports)
34 changes: 28 additions & 6 deletions haystack/components/preprocessors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,32 @@
#
# SPDX-License-Identifier: Apache-2.0

from .document_cleaner import DocumentCleaner
from .document_splitter import DocumentSplitter
from .nltk_document_splitter import NLTKDocumentSplitter
from .sentence_tokenizer import SentenceSplitter
from .text_cleaner import TextCleaner
from typing import TYPE_CHECKING

__all__ = ["DocumentSplitter", "DocumentCleaner", "NLTKDocumentSplitter", "SentenceSplitter", "TextCleaner"]
from haystack.lazy_imports import lazy_dir, lazy_getattr

if TYPE_CHECKING:
from haystack.components.preprocessors.document_cleaner import DocumentCleaner
from haystack.components.preprocessors.document_splitter import DocumentSplitter
from haystack.components.preprocessors.nltk_document_splitter import NLTKDocumentSplitter
from haystack.components.preprocessors.sentence_tokenizer import SentenceSplitter
from haystack.components.preprocessors.text_cleaner import TextCleaner


_lazy_imports = {
"DocumentCleaner": "haystack.components.preprocessors.document_cleaner",
"DocumentSplitter": "haystack.components.preprocessors.document_splitter",
"NLTKDocumentSplitter": "haystack.components.preprocessors.nltk_document_splitter",
"SentenceSplitter": "haystack.components.preprocessors.sentence_tokenizer",
"TextCleaner": "haystack.components.preprocessors.text_cleaner",
}

__all__ = list(_lazy_imports.keys())


def __getattr__(name):
return lazy_getattr(name, _lazy_imports, __name__)


def __dir__():
return lazy_dir(_lazy_imports)
42 changes: 29 additions & 13 deletions haystack/components/rankers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,32 @@
#
# SPDX-License-Identifier: Apache-2.0

from haystack.components.rankers.lost_in_the_middle import LostInTheMiddleRanker
from haystack.components.rankers.meta_field import MetaFieldRanker
from haystack.components.rankers.meta_field_grouping_ranker import MetaFieldGroupingRanker
from haystack.components.rankers.sentence_transformers_diversity import SentenceTransformersDiversityRanker
from haystack.components.rankers.transformers_similarity import TransformersSimilarityRanker

__all__ = [
"LostInTheMiddleRanker",
"MetaFieldRanker",
"MetaFieldGroupingRanker",
"SentenceTransformersDiversityRanker",
"TransformersSimilarityRanker",
]
from typing import TYPE_CHECKING

from haystack.lazy_imports import lazy_dir, lazy_getattr

if TYPE_CHECKING:
from haystack.components.rankers.lost_in_the_middle import LostInTheMiddleRanker
from haystack.components.rankers.meta_field import MetaFieldRanker
from haystack.components.rankers.meta_field_grouping_ranker import MetaFieldGroupingRanker
from haystack.components.rankers.sentence_transformers_diversity import SentenceTransformersDiversityRanker
from haystack.components.rankers.transformers_similarity import TransformersSimilarityRanker


_lazy_imports = {
"LostInTheMiddleRanker": "haystack.components.rankers.lost_in_the_middle",
"MetaFieldRanker": "haystack.components.rankers.meta_field",
"MetaFieldGroupingRanker": "haystack.components.rankers.meta_field_grouping_ranker",
"SentenceTransformersDiversityRanker": "haystack.components.rankers.sentence_transformers_diversity",
"TransformersSimilarityRanker": "haystack.components.rankers.transformers_similarity",
}

__all__ = list(_lazy_imports.keys())


def __getattr__(name):
return lazy_getattr(name, _lazy_imports, __name__)


def __dir__():
return lazy_dir(_lazy_imports)
30 changes: 25 additions & 5 deletions haystack/components/retrievers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,29 @@
#
# SPDX-License-Identifier: Apache-2.0

from haystack.components.retrievers.filter_retriever import FilterRetriever
from haystack.components.retrievers.in_memory.bm25_retriever import InMemoryBM25Retriever
from haystack.components.retrievers.in_memory.embedding_retriever import InMemoryEmbeddingRetriever
from haystack.components.retrievers.sentence_window_retriever import SentenceWindowRetriever
from typing import TYPE_CHECKING

__all__ = ["FilterRetriever", "InMemoryEmbeddingRetriever", "InMemoryBM25Retriever", "SentenceWindowRetriever"]
from haystack.lazy_imports import lazy_dir, lazy_getattr

if TYPE_CHECKING:
from haystack.components.retrievers.filter_retriever import FilterRetriever
from haystack.components.retrievers.in_memory.bm25_retriever import InMemoryBM25Retriever
from haystack.components.retrievers.in_memory.embedding_retriever import InMemoryEmbeddingRetriever
from haystack.components.retrievers.sentence_window_retriever import SentenceWindowRetriever

_lazy_imports = {
"FilterRetriever": "haystack.components.retrievers.filter_retriever",
"InMemoryBM25Retriever": "haystack.components.retrievers.in_memory.bm25_retriever",
"InMemoryEmbeddingRetriever": "haystack.components.retrievers.in_memory.embedding_retriever",
"SentenceWindowRetriever": "haystack.components.retrievers.sentence_window_retriever",
}

__all__ = list(_lazy_imports.keys())


def __getattr__(name):
return lazy_getattr(name, _lazy_imports, __name__)


def __dir__():
return lazy_dir(_lazy_imports)
46 changes: 31 additions & 15 deletions haystack/components/routers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,34 @@
#
# SPDX-License-Identifier: Apache-2.0

from haystack.components.routers.conditional_router import ConditionalRouter
from haystack.components.routers.file_type_router import FileTypeRouter
from haystack.components.routers.metadata_router import MetadataRouter
from haystack.components.routers.text_language_router import TextLanguageRouter
from haystack.components.routers.transformers_text_router import TransformersTextRouter
from haystack.components.routers.zero_shot_text_router import TransformersZeroShotTextRouter

__all__ = [
"FileTypeRouter",
"MetadataRouter",
"TextLanguageRouter",
"ConditionalRouter",
"TransformersZeroShotTextRouter",
"TransformersTextRouter",
]
from typing import TYPE_CHECKING

from haystack.lazy_imports import lazy_dir, lazy_getattr

if TYPE_CHECKING:
from haystack.components.routers.conditional_router import ConditionalRouter
from haystack.components.routers.file_type_router import FileTypeRouter
from haystack.components.routers.metadata_router import MetadataRouter
from haystack.components.routers.text_language_router import TextLanguageRouter
from haystack.components.routers.transformers_text_router import TransformersTextRouter
from haystack.components.routers.zero_shot_text_router import TransformersZeroShotTextRouter


_lazy_imports = {
"ConditionalRouter": "haystack.components.routers.conditional_router",
"FileTypeRouter": "haystack.components.routers.file_type_router",
"MetadataRouter": "haystack.components.routers.metadata_router",
"TextLanguageRouter": "haystack.components.routers.text_language_router",
"TransformersTextRouter": "haystack.components.routers.transformers_text_router",
"TransformersZeroShotTextRouter": "haystack.components.routers.zero_shot_text_router",
}

__all__ = list(_lazy_imports.keys())


def __getattr__(name):
return lazy_getattr(name, _lazy_imports, __name__)


def __dir__():
return lazy_dir(_lazy_imports)
20 changes: 20 additions & 0 deletions haystack/lazy_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#
# SPDX-License-Identifier: Apache-2.0

import importlib
from types import TracebackType
from typing import Optional, Type

Expand Down Expand Up @@ -48,3 +49,22 @@ def __exit__(
self._deferred = (exc_value, message)
return True
return None


def lazy_getattr(attr_name, lazy_imports, parent_module):
"""
Lazy loads a module and fetches the requested attribute.
"""

if attr_name in lazy_imports:
module = importlib.import_module(lazy_imports[attr_name])
return getattr(module, attr_name)

raise AttributeError(f"module ${parent_module} has no attribute {attr_name}")


def lazy_dir(lazy_imports):
"""
Returns a dynamically generated list of all available attributes.
"""
return sorted(set(lazy_imports.keys()) | set(globals()))
Loading
Loading