diff --git a/contributing/samples/sdc_agents_demo/README.md b/contributing/samples/sdc_agents_demo/README.md new file mode 100644 index 00000000..a8b6374d --- /dev/null +++ b/contributing/samples/sdc_agents_demo/README.md @@ -0,0 +1,58 @@ +# SDC Agents Demo + +A minimal example composing SDC Agents toolsets with an ADK `LlmAgent`. + +## Prerequisites + +- Python 3.11+ +- An SDCStudio API key (set as `SDC_API_KEY` environment variable) + +## Setup + +```bash +pip install google-adk-community[sdc-agents] + +export SDC_API_KEY="your-sdcstudio-api-key" +export GOOGLE_API_KEY="your-google-api-key" +``` + +## Usage + +```bash +# Run with the ADK CLI +adk run . + +# Or use the ADK web UI +adk web . +``` + +## What This Demo Does + +The agent composes two SDC Agents toolsets: + +- **CatalogToolset**: Search published SDC4 schemas, download artifacts + (XSD, RDF, JSON-LD), and check wallet balance. +- **IntrospectToolset**: Analyze a datasource to infer column types, + constraints, and statistics. + +## Sample Queries + +``` +> Search the catalog for schemas related to lab results +> Introspect the sample datasource +> What published schemas match the columns in my datasource? +``` + +## Structure + +``` +sdc_agents_demo/ +├── agent.py # Agent definition with SDC toolsets +└── README.md # This file +``` + +## Resources + +- [SDC Agents Documentation](https://github.com/SemanticDataCharter/SDC_Agents) +- [SDC Agents on PyPI](https://pypi.org/project/sdc-agents/) +- [ADK Integration Guide](https://github.com/SemanticDataCharter/SDC_Agents/blob/main/docs/integrations/ADK_INTEGRATION.md) diff --git a/contributing/samples/sdc_agents_demo/agent.py b/contributing/samples/sdc_agents_demo/agent.py new file mode 100644 index 00000000..3e030c52 --- /dev/null +++ b/contributing/samples/sdc_agents_demo/agent.py @@ -0,0 +1,79 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""SDC Agents demo -- full data governance pipeline. + +Composes five SDC Agents toolsets into a single LlmAgent that can +introspect datasources, discover matching catalog components, map +columns to schemas, and assemble validated data models. + +Prerequisites: + pip install google-adk-community[sdc-agents] + export SDC_API_KEY="your-sdcstudio-api-key" + +Usage: + adk run . +""" + +from google.adk.agents import LlmAgent + +from google.adk_community.sdc_agents import ( + CatalogToolset, + IntrospectToolset, + MappingToolset, + AssemblyToolset, + ValidationToolset, + SDCAgentsConfig, +) + +config = SDCAgentsConfig( + sdcstudio={ + "base_url": "https://sdcstudio.com", + "api_key": "${SDC_API_KEY}", + }, + datasources={ + "sample": { + "type": "csv", + "path": "./data/sample.csv", + }, + }, + cache={"root": ".sdc-cache"}, + audit={"path": ".sdc-cache/audit.jsonl"}, +) + +root_agent = LlmAgent( + name="sdc_demo_agent", + model="gemini-2.0-flash", + description=( + "Full data governance pipeline: introspect, discover, map," + " and assemble SDC4 data models." + ), + instruction=( + "You help data engineers govern their data. Follow this workflow:\n" + "1. Introspect the datasource to discover columns and types\n" + "2. Search the SDC4 catalog for matching published schemas\n" + "3. Discover catalog components that match the datasource structure\n" + "4. Map unmatched columns to schema components by similarity\n" + "5. Propose a cluster hierarchy for the data model\n" + "6. Assemble the final data model via the Assembly API\n" + "7. Validate the generated artifacts" + ), + tools=[ + IntrospectToolset(config=config), + CatalogToolset(config=config), + MappingToolset(config=config), + AssemblyToolset(config=config), + ValidationToolset(config=config), + ], +) diff --git a/pyproject.toml b/pyproject.toml index 11afcd82..d8dc3e17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,9 @@ test = [ "pytest>=8.4.2", "pytest-asyncio>=1.2.0", ] +sdc-agents = [ + "sdc-agents>=4.3.3", +] [tool.pyink] diff --git a/src/google/adk_community/__init__.py b/src/google/adk_community/__init__.py index 9a1dc35f..fb0b7d69 100644 --- a/src/google/adk_community/__init__.py +++ b/src/google/adk_community/__init__.py @@ -15,4 +15,9 @@ from . import memory from . import sessions from . import version + +try: + from . import sdc_agents +except ImportError: + pass # Optional: pip install google-adk-community[sdc-agents] __version__ = version.__version__ diff --git a/src/google/adk_community/sdc_agents/README.md b/src/google/adk_community/sdc_agents/README.md new file mode 100644 index 00000000..c1f67144 --- /dev/null +++ b/src/google/adk_community/sdc_agents/README.md @@ -0,0 +1,87 @@ +# SDC Agents -- Semantic Data Governance for ADK + +Thin re-export wrapper over the +[`sdc-agents`](https://pypi.org/project/sdc-agents/) PyPI package. The +canonical source lives at +[SemanticDataCharter/SDC_Agents](https://github.com/SemanticDataCharter/SDC_Agents); +this module provides importability through the `google.adk_community` +namespace. + +## Installation + +```bash +pip install google-adk-community[sdc-agents] +``` + +## Usage + +```python +from google.adk.agents import LlmAgent +from google.adk_community.sdc_agents import ( + load_config, + CatalogToolset, + IntrospectToolset, + MappingToolset, +) + +config = load_config("sdc-agents.yaml") + +agent = LlmAgent( + name="data_governance_agent", + model="gemini-2.0-flash", + description="Introspects data sources and maps them to SDC4 schemas.", + instruction=( + "You help data engineers govern their data. When given a datasource:\n" + "1. Introspect the structure to discover columns and types\n" + "2. Search the SDC4 catalog for matching published schemas\n" + "3. Map columns to schema components by type and name similarity\n" + "4. Report the mapping with confidence scores" + ), + tools=[ + IntrospectToolset(config=config), + CatalogToolset(config=config), + MappingToolset(config=config), + ], +) +``` + +## Exported Toolsets + +| Toolset | Description | +|---------|-------------| +| **CatalogToolset** | Discover published SDC4 schemas, download artifacts (XSD, RDF, JSON-LD) | +| **IntrospectToolset** | Analyze datasource structure -- infer column types and constraints from SQL, CSV, JSON, MongoDB with sidecar metadata support | +| **MappingToolset** | Match datasource columns to schema components by type compatibility and name similarity, persist mapping configs with schema and datasource context | +| **AssemblyToolset** | Compose data models from catalog components -- reuse existing or mint new, with catalog-first discovery and structured unmatched column reporting | +| **GeneratorToolset** | Generate validated XML instances, batch processing, and preview | +| **ValidationToolset** | Validate XML instances against schemas, digitally sign via VaaS API | +| **DistributionToolset** | Deliver RDF triples to Fuseki, Neo4j, GraphDB, or REST endpoints | +| **KnowledgeToolset** | Index domain documentation (JSON, CSV, TTL, Markdown, PDF, DOCX) for semantic search | + +## Configuration + +SDC Agents uses a YAML config file with environment variable substitution: + +```yaml +sdcstudio: + base_url: "https://sdcstudio.com" + api_key: "${SDC_API_KEY}" + +datasources: + warehouse: + type: csv + path: "./data/sample.csv" + +cache: + root: ".sdc-cache" + +audit: + path: ".sdc-cache/audit.jsonl" +``` + +## Resources + +- [SDC Agents on PyPI](https://pypi.org/project/sdc-agents/) +- [SDC Agents GitHub](https://github.com/SemanticDataCharter/SDC_Agents) +- [ADK Integration Guide](https://github.com/SemanticDataCharter/SDC_Agents/blob/main/docs/integrations/ADK_INTEGRATION.md) +- [SDCStudio](https://sdcstudio.com) diff --git a/src/google/adk_community/sdc_agents/__init__.py b/src/google/adk_community/sdc_agents/__init__.py new file mode 100644 index 00000000..a72920b7 --- /dev/null +++ b/src/google/adk_community/sdc_agents/__init__.py @@ -0,0 +1,49 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""SDC Agents -- Purpose-scoped semantic data governance toolsets for ADK. + +Eight BaseToolset implementations (32 tools) that transform SQL, CSV, JSON, +and MongoDB data into validated, self-describing SDC4 artifacts with +structured audit trails and enforced agent isolation boundaries. + +Install: pip install google-adk-community[sdc-agents] +Docs: https://github.com/SemanticDataCharter/SDC_Agents + +Requires sdc-agents >= 4.3.3. +""" + +from sdc_agents.common.config import load_config +from sdc_agents.common.config import SDCAgentsConfig +from sdc_agents.toolsets.assembly import AssemblyToolset +from sdc_agents.toolsets.catalog import CatalogToolset +from sdc_agents.toolsets.distribution import DistributionToolset +from sdc_agents.toolsets.generator import GeneratorToolset +from sdc_agents.toolsets.introspect import IntrospectToolset +from sdc_agents.toolsets.knowledge import KnowledgeToolset +from sdc_agents.toolsets.mapping import MappingToolset +from sdc_agents.toolsets.validation import ValidationToolset + +__all__ = [ + "load_config", + "SDCAgentsConfig", + "AssemblyToolset", + "CatalogToolset", + "DistributionToolset", + "GeneratorToolset", + "IntrospectToolset", + "KnowledgeToolset", + "MappingToolset", + "ValidationToolset", +] diff --git a/tests/unittests/test_sdc_agents_imports.py b/tests/unittests/test_sdc_agents_imports.py new file mode 100644 index 00000000..f9a1bb0e --- /dev/null +++ b/tests/unittests/test_sdc_agents_imports.py @@ -0,0 +1,78 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Verify that all SDC Agents re-exports resolve correctly.""" + +import importlib + +import pytest + + +@pytest.fixture(autouse=True) +def _skip_if_not_installed(): + """Skip every test in this module when sdc-agents is not installed.""" + pytest.importorskip("sdc_agents") + + +class TestSDCAgentsImports: + """Validate that the community re-export module exposes all toolsets.""" + + def test_module_importable(self): + mod = importlib.import_module("google.adk_community.sdc_agents") + assert mod is not None + + def test_load_config_exported(self): + from google.adk_community.sdc_agents import load_config + + assert callable(load_config) + + def test_sdc_agents_config_exported(self): + from google.adk_community.sdc_agents import SDCAgentsConfig + + assert SDCAgentsConfig is not None + + @pytest.mark.parametrize( + "name", + [ + "AssemblyToolset", + "CatalogToolset", + "DistributionToolset", + "GeneratorToolset", + "IntrospectToolset", + "KnowledgeToolset", + "MappingToolset", + "ValidationToolset", + ], + ) + def test_toolset_exported(self, name: str): + mod = importlib.import_module("google.adk_community.sdc_agents") + cls = getattr(mod, name, None) + assert cls is not None, f"{name} not found in sdc_agents module" + + def test_all_list_complete(self): + from google.adk_community import sdc_agents + + expected = { + "load_config", + "SDCAgentsConfig", + "AssemblyToolset", + "CatalogToolset", + "DistributionToolset", + "GeneratorToolset", + "IntrospectToolset", + "KnowledgeToolset", + "MappingToolset", + "ValidationToolset", + } + assert set(sdc_agents.__all__) == expected