Merge branch 'main' into cz/fix-fass

langflow-ai · Feb 3, 2025 · 770527e · 770527e
2 parents 02d0393 + ea5806f
commit 770527e
Show file tree

Hide file tree

Showing 39 changed files with 1,472 additions and 278 deletions.
diff --git a/.github/workflows/codeflash.yml b/.github/workflows/codeflash.yml
@@ -22,7 +22,7 @@ jobs:
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
-      - uses: astral-sh/setup-uv@v4
+      - uses: astral-sh/setup-uv@v5
         with:
           enable-cache: true
           python-version: "3.12"

diff --git a/.github/workflows/conventional-labels.yml b/.github/workflows/conventional-labels.yml
@@ -13,7 +13,7 @@ jobs:
     steps:
       - name: Validate the pull request
         id: validate
-        uses: Namchee/[email protected].5
+        uses: Namchee/[email protected].6
         with:
           access_token: ${{ secrets.GITHUB_TOKEN }}
           issue: false

diff --git a/.github/workflows/js_autofix.yml b/.github/workflows/js_autofix.yml
@@ -42,4 +42,4 @@ jobs:
           cd src/frontend
           npm run format
 
-      - uses: autofix-ci/action@ff86a557419858bb967097bfc916833f5647fa8c
+      - uses: autofix-ci/action@551dded8c6cc8a1054039c8bc0b8b48c51dfc6ef
diff --git a/.github/workflows/py_autofix.yml b/.github/workflows/py_autofix.yml
@@ -16,7 +16,7 @@ jobs:
         uses: ./.github/actions/setup-uv
       - run: uv run ruff check --fix-only .
       - run: uv run ruff format .
-      - uses: autofix-ci/action@ff86a557419858bb967097bfc916833f5647fa8c
+      - uses: autofix-ci/action@551dded8c6cc8a1054039c8bc0b8b48c51dfc6ef
       - name: Minimize uv cache
         run: uv cache prune --ci
 
@@ -36,7 +36,7 @@ jobs:
       - name: Run starter projects update
         run: uv run python scripts/ci/update_starter_projects.py
 
-      - uses: autofix-ci/action@ff86a557419858bb967097bfc916833f5647fa8c
+      - uses: autofix-ci/action@551dded8c6cc8a1054039c8bc0b8b48c51dfc6ef
 
       - name: Minimize uv cache
         run: uv cache prune --ci

diff --git a/.github/workflows/python_test.yml b/.github/workflows/python_test.yml
@@ -50,7 +50,7 @@ jobs:
         with:
           node-version: ${{ env.NODE_VERSION }}
       - name: Install uv
-        uses: astral-sh/setup-uv@v4
+        uses: astral-sh/setup-uv@v5
         with:
           enable-cache: true
           cache-dependency-glob: "uv.lock"
@@ -87,7 +87,7 @@ jobs:
         with:
           ref: ${{ inputs.ref || github.ref }}
       - name: Install uv
-        uses: astral-sh/setup-uv@v4
+        uses: astral-sh/setup-uv@v5
         with:
           enable-cache: true
           cache-dependency-glob: "uv.lock"

diff --git a/.github/workflows/store_pytest_durations.yml b/.github/workflows/store_pytest_durations.yml
@@ -24,7 +24,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install uv
-        uses: astral-sh/setup-uv@v4
+        uses: astral-sh/setup-uv@v5
         with:
           enable-cache: true
           cache-dependency-glob: "uv.lock"

diff --git a/docs/docs/Configuration/configuration-authentication.md b/docs/docs/Configuration/configuration-authentication.md
@@ -169,6 +169,18 @@ The generated secret key value is now used to encrypt your global variables.
 
 If no key is provided, Langflow will automatically generate a secure key. This is not recommended for production environments, because in a multi-instance deployment like Kubernetes, auto-generated keys won't be able to decrypt data encrypted by other instances. Instead, you should explicitly set the `LANGFLOW_SECRET_KEY` environment variable in the deployment configuration to be the same across all instances.
 
+### Rotate the LANGFLOW_SECRET_KEY
+
+To rotate the key, follow these steps.
+
+1. Create a new `LANGFLOW_SECRET_KEY` with the command in [Create a LANGFLOW_SECRET_KEY](#create-a-langflow_secret_key).
+2. Stop your Langflow instance.
+3. Update the `LANGFLOW_SECRET_KEY` in your `.env` file with the new key.
+4. Restart Langflow with the updated environment file:
+```bash
+langflow run --env-file .env
+```
+
 ### LANGFLOW_NEW_USER_IS_ACTIVE
 
 By default, this variable is set to `False`. When enabled, new users are automatically activated and can log in without requiring explicit activation by the superuser.

diff --git a/src/backend/base/langflow/api/v1/chat.py b/src/backend/base/langflow/api/v1/chat.py
@@ -389,7 +389,6 @@ async def event_generator(event_manager: EventManager, client_consumed_queue: as
             raise
         event_manager.on_vertices_sorted(data={"ids": ids, "to_run": vertices_to_run})
         await client_consumed_queue.get()
-
         tasks = []
         for vertex_id in ids:
             task = asyncio.create_task(build_vertices(vertex_id, graph, client_consumed_queue, event_manager))

diff --git a/src/backend/base/langflow/api/v1/schemas.py b/src/backend/base/langflow/api/v1/schemas.py
@@ -1,5 +1,4 @@
 from datetime import datetime, timezone
-from decimal import Decimal
 from enum import Enum
 from pathlib import Path
 from typing import Any
@@ -11,13 +10,14 @@
 from langflow.schema import dotdict
 from langflow.schema.graph import Tweaks
 from langflow.schema.schema import InputType, OutputType, OutputValue
+from langflow.serialization.constants import MAX_ITEMS_LENGTH, MAX_TEXT_LENGTH
+from langflow.serialization.serialization import serialize
 from langflow.services.database.models.api_key.model import ApiKeyRead
 from langflow.services.database.models.base import orjson_dumps
 from langflow.services.database.models.flow import FlowCreate, FlowRead
 from langflow.services.database.models.user import UserRead
 from langflow.services.settings.feature_flags import FeatureFlags
 from langflow.services.tracing.schema import Log
-from langflow.utils.constants import MAX_ITEMS_LENGTH, MAX_TEXT_LENGTH
 from langflow.utils.util_strings import truncate_long_strings
 
 
@@ -270,65 +270,17 @@ class ResultDataResponse(BaseModel):
     @classmethod
     def serialize_results(cls, v):
         """Serialize results with custom handling for special types and truncation."""
-        if isinstance(v, dict):
-            return {key: cls._serialize_and_truncate(val, max_length=MAX_TEXT_LENGTH) for key, val in v.items()}
-        return cls._serialize_and_truncate(v, max_length=MAX_TEXT_LENGTH)
-
-    @staticmethod
-    def _serialize_and_truncate(obj: Any, max_length: int = MAX_TEXT_LENGTH) -> Any:
-        """Helper method to serialize and truncate values."""
-        if isinstance(obj, bytes):
-            obj = obj.decode("utf-8", errors="ignore")
-            if len(obj) > max_length:
-                return f"{obj[:max_length]}... [truncated]"
-            return obj
-        if isinstance(obj, str):
-            if len(obj) > max_length:
-                return f"{obj[:max_length]}... [truncated]"
-            return obj
-        if isinstance(obj, datetime):
-            return obj.replace(tzinfo=timezone.utc).isoformat()
-        if isinstance(obj, Decimal):
-            return float(obj)
-        if isinstance(obj, UUID):
-            return str(obj)
-        if isinstance(obj, OutputValue | Log):
-            # First serialize the model
-            serialized = obj.model_dump()
-            # Then recursively truncate all values in the serialized dict
-            for key, value in serialized.items():
-                # Handle string values directly to ensure proper truncation
-                if isinstance(value, str) and len(value) > max_length:
-                    serialized[key] = f"{value[:max_length]}... [truncated]"
-                else:
-                    serialized[key] = ResultDataResponse._serialize_and_truncate(value, max_length=max_length)
-            return serialized
-        if isinstance(obj, BaseModel):
-            # For other BaseModel instances, serialize all fields
-            serialized = obj.model_dump()
-            return {
-                k: ResultDataResponse._serialize_and_truncate(v, max_length=max_length) for k, v in serialized.items()
-            }
-        if isinstance(obj, dict):
-            return {k: ResultDataResponse._serialize_and_truncate(v, max_length=max_length) for k, v in obj.items()}
-        if isinstance(obj, list | tuple):
-            # If list is too long, truncate it
-            if len(obj) > MAX_ITEMS_LENGTH:
-                truncated_list = list(obj)[:MAX_ITEMS_LENGTH]
-                truncated_list.append(f"... [truncated {len(obj) - MAX_ITEMS_LENGTH} items]")
-                obj = truncated_list
-            return [ResultDataResponse._serialize_and_truncate(item, max_length=max_length) for item in obj]
-        return obj
+        return serialize(v, max_length=MAX_TEXT_LENGTH, max_items=MAX_ITEMS_LENGTH)
 
     @model_serializer(mode="plain")
     def serialize_model(self) -> dict:
         """Custom serializer for the entire model."""
         return {
             "results": self.serialize_results(self.results),
-            "outputs": self._serialize_and_truncate(self.outputs, max_length=MAX_TEXT_LENGTH),
-            "logs": self._serialize_and_truncate(self.logs, max_length=MAX_TEXT_LENGTH),
-            "message": self._serialize_and_truncate(self.message, max_length=MAX_TEXT_LENGTH),
-            "artifacts": self._serialize_and_truncate(self.artifacts, max_length=MAX_TEXT_LENGTH),
+            "outputs": serialize(self.outputs, max_length=MAX_TEXT_LENGTH, max_items=MAX_ITEMS_LENGTH),
+            "logs": serialize(self.logs, max_length=MAX_TEXT_LENGTH, max_items=MAX_ITEMS_LENGTH),
+            "message": serialize(self.message, max_length=MAX_TEXT_LENGTH, max_items=MAX_ITEMS_LENGTH),
+            "artifacts": serialize(self.artifacts, max_length=MAX_TEXT_LENGTH, max_items=MAX_ITEMS_LENGTH),
             "timedelta": self.timedelta,
             "duration": self.duration,
             "used_frozen_result": self.used_frozen_result,

diff --git a/src/backend/base/langflow/components/vectorstores/astradb.py b/src/backend/base/langflow/components/vectorstores/astradb.py
@@ -122,6 +122,7 @@ class NewCollectionInput:
             display_name="Environment",
             info="The environment for the Astra DB API Endpoint.",
             advanced=True,
+            real_time_refresh=True,
         ),
         DropdownInput(
             name="api_endpoint",
@@ -315,11 +316,16 @@ def get_database_list_static(cls, token: str, environment: str | None = None):
         # Get the list of databases
         db_list = list(admin_client.list_databases())
 
+        # Set the environment properly
+        env_string = ""
+        if environment and environment != "prod":
+            env_string = f"-{environment}"
+
         # Generate the api endpoint for each database
         db_info_dict = {}
         for db in db_list:
             try:
-                api_endpoint = f"https://{db.info.id}-{db.info.region}.apps.astra.datastax.com"
+                api_endpoint = f"https://{db.info.id}-{db.info.region}.apps.astra{env_string}.datastax.com"
                 db_info_dict[db.info.name] = {
                     "api_endpoint": api_endpoint,
                     "collections": len(
@@ -473,6 +479,36 @@ def _initialize_collection_options(self, api_endpoint: str | None = None):
             for col in collection_list
         ]
 
+    def reset_collection_list(self, build_config: dict):
+        # Get the list of options we have based on the token provided
+        collection_options = self._initialize_collection_options()
+
+        # If we retrieved options based on the token, show the dropdown
+        build_config["collection_name"]["options"] = [col["name"] for col in collection_options]
+        build_config["collection_name"]["options_metadata"] = [
+            {k: v for k, v in col.items() if k not in ["name"]} for col in collection_options
+        ]
+
+        # Reset the selected collection
+        build_config["collection_name"]["value"] = ""
+
+        return build_config
+
+    def reset_database_list(self, build_config: dict):
+        # Get the list of options we have based on the token provided
+        database_options = self._initialize_database_options()
+
+        # If we retrieved options based on the token, show the dropdown
+        build_config["api_endpoint"]["options"] = [db["name"] for db in database_options]
+        build_config["api_endpoint"]["options_metadata"] = [
+            {k: v for k, v in db.items() if k not in ["name"]} for db in database_options
+        ]
+
+        # Reset the selected database
+        build_config["api_endpoint"]["value"] = ""
+
+        return build_config
+
     def reset_build_config(self, build_config: dict):
         # Reset the list of databases we have based on the token provided
         build_config["api_endpoint"]["options"] = []
@@ -489,25 +525,17 @@ def reset_build_config(self, build_config: dict):
 
     def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):
         # When the component first executes, this is the update refresh call
-        first_run = field_name == "collection_name" and not field_value
+        first_run = field_name == "collection_name" and not field_value and not build_config["api_endpoint"]["options"]
 
         # If the token has not been provided, simply return
-        if not self.token or field_name == "environment":
+        if not self.token:
             return self.reset_build_config(build_config)
 
-        # Refresh the database name options
-        if first_run or field_name == "token":
+        # If this is the first execution of the component, reset and build database list
+        if first_run or field_name in ["token", "environment"]:
             # Reset the build config to ensure we are starting fresh
             build_config = self.reset_build_config(build_config)
-
-            # Get the list of options we have based on the token provided
-            database_options = self._initialize_database_options()
-
-            # If we retrieved options based on the token, show the dropdown
-            build_config["api_endpoint"]["options"] = [db["name"] for db in database_options]
-            build_config["api_endpoint"]["options_metadata"] = [
-                {k: v for k, v in db.items() if k not in ["name"]} for db in database_options
-            ]
+            build_config = self.reset_database_list(build_config)
 
             # Get list of regions for a given cloud provider
             """
@@ -526,8 +554,9 @@ def update_build_config(self, build_config: dict, field_value: str, field_name:
 
         # Refresh the collection name options
         if field_name == "api_endpoint":
-            # Reset the selected collection
-            build_config["collection_name"]["value"] = ""
+            # If missing, refresh the database options
+            if not build_config["api_endpoint"]["options"] or not field_value:
+                return self.update_build_config(build_config, field_value=self.token, field_name="token")
 
             # Set the underlying api endpoint value of the database
             if field_value in build_config["api_endpoint"]["options"]:
@@ -538,21 +567,14 @@ def update_build_config(self, build_config: dict, field_value: str, field_name:
             else:
                 build_config["d_api_endpoint"]["value"] = ""
 
-            # Reload the list of collections and metadata associated
-            collection_options = self._initialize_collection_options(
-                api_endpoint=build_config["d_api_endpoint"]["value"]
-            )
-
-            # If we have collections, show the dropdown
-            build_config["collection_name"]["options"] = [col["name"] for col in collection_options]
-            build_config["collection_name"]["options_metadata"] = [
-                {k: v for k, v in col.items() if k not in ["name"]} for col in collection_options
-            ]
-
-            return build_config
+            # Reset the list of collections we have based on the token provided
+            return self.reset_collection_list(build_config)
 
         # Hide embedding model option if opriona_metadata provider is not null
         if field_name == "collection_name" and field_value:
+            # Assume we will be autodetecting the collection:
+            build_config["autodetect_collection"]["value"] = True
+
             # Set the options for collection name to be the field value if its a new collection
             if field_value not in build_config["collection_name"]["options"]:
                 # Add the new collection to the list of options
@@ -563,13 +585,9 @@ def update_build_config(self, build_config: dict, field_value: str, field_name:
 
                 # Ensure that autodetect collection is set to False, since its a new collection
                 build_config["autodetect_collection"]["value"] = False
-            else:
-                build_config["autodetect_collection"]["value"] = True
 
             # Find the position of the selected collection to align with metadata
             index_of_name = build_config["collection_name"]["options"].index(field_value)
-
-            # Get the provider value of the selected collection
             value_of_provider = build_config["collection_name"]["options_metadata"][index_of_name]["provider"]
 
             # If we were able to determine the Vectorize provider, set it accordingly

diff --git a/src/backend/base/langflow/components/vectorstores/pinecone.py b/src/backend/base/langflow/components/vectorstores/pinecone.py
@@ -1,5 +1,5 @@
 import numpy as np
-from langchain_pinecone import Pinecone
+from langchain_core.vectorstores import VectorStore
 
 from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
 from langflow.helpers.data import docs_to_data
@@ -42,8 +42,14 @@ class PineconeVectorStoreComponent(LCVectorStoreComponent):
     ]
 
     @check_cached_vector_store
-    def build_vector_store(self) -> Pinecone:
+    def build_vector_store(self) -> VectorStore:
         """Build and return a Pinecone vector store instance."""
+        try:
+            from langchain_pinecone import PineconeVectorStore
+        except ImportError as e:
+            msg = "langchain-pinecone is not installed. Please install it with `pip install langchain-pinecone`."
+            raise ValueError(msg) from e
+
         try:
             from langchain_pinecone._utilities import DistanceStrategy
 
@@ -55,7 +61,7 @@ def build_vector_store(self) -> Pinecone:
             distance_strategy = DistanceStrategy[distance_strategy]
 
             # Initialize Pinecone instance with wrapped embeddings
-            pinecone = Pinecone(
+            pinecone = PineconeVectorStore(
                 index_name=self.index_name,
                 embedding=wrapped_embeddings,  # Use wrapped embeddings
                 text_key=self.text_key,

diff --git a/src/backend/base/langflow/graph/schema.py b/src/backend/base/langflow/graph/schema.py
@@ -3,8 +3,8 @@
 
 from pydantic import BaseModel, Field, field_serializer, model_validator
 
-from langflow.graph.utils import serialize_field
 from langflow.schema.schema import OutputValue, StreamURL
+from langflow.serialization import serialize
 from langflow.utils.schemas import ChatOutputResponse, ContainsEnumMeta
 
 
@@ -23,8 +23,8 @@ class ResultData(BaseModel):
     @field_serializer("results")
     def serialize_results(self, value):
         if isinstance(value, dict):
-            return {key: serialize_field(val) for key, val in value.items()}
-        return serialize_field(value)
+            return {key: serialize(val) for key, val in value.items()}
+        return serialize(value)
 
     @model_validator(mode="before")
     @classmethod