-
Notifications
You must be signed in to change notification settings - Fork 17
Custom Metadata tool #163
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Custom Metadata tool #163
Changes from all commits
22c024b
18c4b04
ff5f808
56d4e6f
99b8d55
8652d58
bf9ccb9
8f0eaf4
7eb389f
d73074a
8bfe222
fcd70a5
150d08b
042babe
0a52351
7512a6b
93cc2f6
017d738
dafac19
e2f2654
a08271c
069a2cc
abe1430
3fa116d
9960d50
b049283
2860c2f
37142c5
582a125
9f58b99
890694f
89211d6
300df26
ade0e97
8d808ab
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ | |
| get_assets_by_dsl, | ||
| traverse_lineage, | ||
| update_assets, | ||
| get_custom_metadata_context, | ||
| create_glossary_category_assets, | ||
| create_glossary_assets, | ||
| create_glossary_term_assets, | ||
|
|
@@ -62,9 +63,12 @@ def search_assets_tool( | |
| """ | ||
| Advanced asset search using FluentSearch with flexible conditions. | ||
|
|
||
| Custom metadata can be referenced directly in conditions using the format "SetName.AttributeName". | ||
|
|
||
| Args: | ||
| conditions (Dict[str, Any], optional): Dictionary of attribute conditions to match. | ||
| Format: {"attribute_name": value} or {"attribute_name": {"operator": operator, "value": value}} | ||
| Custom metadata: {"SetName.AttributeName": value} or {"SetName.AttributeName": {"operator": "eq", "value": value}} | ||
| negative_conditions (Dict[str, Any], optional): Dictionary of attribute conditions to exclude. | ||
| Format: {"attribute_name": value} or {"attribute_name": {"operator": operator, "value": value}} | ||
| some_conditions (Dict[str, Any], optional): Conditions for where_some() queries that require min_somes of them to match. | ||
|
|
@@ -110,6 +114,53 @@ def search_assets_tool( | |
| include_attributes=["owner_users", "owner_groups"] | ||
| ) | ||
|
|
||
| # Search for assets with custom metadata | ||
| # Use nested "custom_metadata" key for clarity | ||
| assets = search_assets( | ||
| conditions={ | ||
| "certificate_status": CertificateStatus.VERIFIED.value, | ||
| "custom_metadata": { | ||
| "Business Ownership.business_owner": "John" | ||
| } | ||
| } | ||
| ) | ||
|
|
||
| # Search for assets with custom metadata using operators | ||
| assets = search_assets( | ||
| conditions={ | ||
| "custom_metadata": { | ||
| "Data Quality.quality_score": { | ||
| "operator": "gt", | ||
| "value": 80 | ||
| }, | ||
| "Data Classification.sensitivity_level": { | ||
| "operator": "eq", | ||
| "value": "sensitive", | ||
| "case_insensitive": True | ||
| } | ||
| } | ||
| } | ||
| ) | ||
|
|
||
| # Search with multiple custom metadata and standard conditions | ||
| assets = search_assets( | ||
| asset_type="Table", | ||
| conditions={ | ||
| "name": { | ||
| "operator": "startswith", | ||
| "value": "customer_" | ||
| }, | ||
| "custom_metadata": { | ||
| "Data Governance.data_owner": "John Smith", | ||
| "Data Governance.retention_period": { | ||
| "operator": "gte", | ||
| "value": 365 | ||
| } | ||
| } | ||
| } | ||
| ) | ||
|
|
||
|
|
||
| # Search for columns with specific certificate status | ||
| columns = search_assets( | ||
| asset_type="Column", | ||
|
|
@@ -694,6 +745,50 @@ def create_glossary_categories(categories) -> List[Dict[str, Any]]: | |
| return create_glossary_category_assets(categories) | ||
|
|
||
|
|
||
| @mcp.tool() | ||
| def get_custom_metadata_context_tool() -> Dict[str, Any]: | ||
| """ | ||
| Fetch all available custom metadata (business metadata) definitions from the Atlan instance. | ||
|
|
||
| This tool returns information about all custom metadata sets and their attributes, | ||
| including attribute names, data types, descriptions, and enum values (if applicable). | ||
|
|
||
| Use this tool to discover what custom metadata is available before searching for assets | ||
| with custom metadata filters. | ||
|
|
||
| Returns: | ||
| Dict[str, Any]: Dictionary containing: | ||
| - context: Description of the returned data | ||
| - business_metadata_results: List of business metadata definitions, each containing: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. make this consistent |
||
| - prompt: Formatted string with metadata name and attributes | ||
| - metadata: Dictionary with: | ||
| - name: Internal name of the custom metadata set | ||
| - display_name: Display name of the custom metadata set | ||
| - description: Description of the custom metadata set | ||
| - attributes: List of attribute definitions with name, display_name, data_type, | ||
| description, and optional enumEnrichment (with allowed values) | ||
| - id: GUID of the custom metadata definition | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need to send this? |
||
|
|
||
| Example: | ||
| # Get available custom metadata | ||
| context = get_custom_metadata_context_tool() | ||
|
|
||
| # The response will show custom metadata sets like "Data Classification", "Business Ownership", etc. | ||
| # Then you can use them in search_assets_tool with the format "SetName.AttributeName": | ||
|
|
||
| assets = search_assets_tool( | ||
| conditions={ | ||
| "Data Classification.sensitivity_level": "sensitive", | ||
| "Business Ownership.business_owner": "John Smith" | ||
| } | ||
| ) | ||
| """ | ||
| try: | ||
| return get_custom_metadata_context() | ||
| except Exception as e: | ||
| return {"error": f"Error getting custom metadata context: {str(e)}"} | ||
|
|
||
|
|
||
| def main(): | ||
| mcp.run() | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,163 @@ | ||
| import logging | ||
| from typing import Any, Dict, List | ||
| from client import get_atlan_client | ||
| from pyatlan.cache.custom_metadata_cache import CustomMetadataCache | ||
| from pyatlan.cache.enum_cache import EnumCache | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| def process_business_metadata( | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lets keep it consistent to custom_metadata |
||
| cm_def: Any, | ||
| enum_cache: EnumCache, | ||
| ) -> Dict[str, Any]: | ||
| """ | ||
| Generates context prompt for a given Atlan business metadata definition. | ||
| Args: | ||
| cm_def: CustomMetadataDef object from PyAtlan | ||
| enum_cache: EnumCache instance for enriching enum attributes | ||
| Returns: | ||
| Dictionary containing prompt, metadata details, and id | ||
| """ | ||
| cm_name = cm_def.name or "N/A" | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are the default values correct? |
||
| cm_display_name = cm_def.display_name or "N/A" | ||
| description = cm_def.description or "No description available." | ||
| guid = cm_def.guid | ||
|
|
||
| # For prompt: comma separated attribute names and descriptions | ||
| attributes_list_for_prompt: List[str] = [] | ||
| parsed_attributes_for_metadata: List[Dict[str, Any]] = [] | ||
|
|
||
| if cm_def.attribute_defs: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please do if not raise |
||
| for attr_def in cm_def.attribute_defs: | ||
| attr_name = attr_def.display_name or attr_def.name or "Unnamed attribute" | ||
| attr_desc = attr_def.description or "No description" | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please check default values here as well |
||
| attributes_list_for_prompt.append(f"{attr_name}:{attr_desc}") | ||
|
|
||
| base_description = attr_def.description or "" | ||
| enhanced_description = base_description | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is this supposed to do? |
||
|
|
||
| # Check if attribute is an enum type and enrich with enum values | ||
| if attr_def.options and attr_def.options.is_enum: | ||
| enum_type = attr_def.options.enum_type | ||
| if enum_type: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please do if nots to decrease the tabs |
||
| try: | ||
| enum_def = enum_cache.get_by_name(enum_type) | ||
| if enum_def and enum_def.element_defs: | ||
| enum_values = [ | ||
| elem.value | ||
| for elem in enum_def.element_defs | ||
| if elem.value | ||
| ] | ||
| if enum_values: | ||
| quoted_values = ", ".join( | ||
| [f"'{value}'" for value in enum_values] | ||
| ) | ||
| enum_suffix = f" This attribute can have enum values: {quoted_values}." | ||
| enhanced_description = ( | ||
| f"{base_description}{enum_suffix}".strip() | ||
| ) | ||
|
|
||
| # Create enum enrichment data | ||
| enum_enrichment = { | ||
| "status": "ENRICHED", | ||
| "enumType": enum_type, | ||
| "enumGuid": enum_def.guid, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need to send this? |
||
| "enumDescription": enum_def.description, | ||
| "values": enum_values, | ||
| } | ||
| except Exception as e: | ||
| logger.debug(f"Could not enrich enum type {enum_type}: {e}") | ||
| enum_enrichment = None | ||
| else: | ||
| enum_enrichment = None | ||
| else: | ||
| enum_enrichment = None | ||
|
|
||
| attribute_metadata = { | ||
| "name": attr_def.name, | ||
| "display_name": attr_def.display_name, | ||
| "data_type": attr_def.type_name, | ||
| "description": enhanced_description, | ||
| } | ||
|
|
||
| if enum_enrichment: | ||
| attribute_metadata["enumEnrichment"] = enum_enrichment | ||
|
|
||
| parsed_attributes_for_metadata.append(attribute_metadata) | ||
|
|
||
| attributes_str_for_prompt = ( | ||
| ", ".join(attributes_list_for_prompt) if attributes_list_for_prompt else "None" | ||
| ) | ||
|
|
||
| metadata: Dict[str, Any] = { | ||
| "name": cm_name, | ||
| "display_name": cm_display_name, | ||
| "description": description, | ||
| "attributes": parsed_attributes_for_metadata, | ||
| } | ||
|
|
||
| prompt = f"""{cm_display_name}|{description}|{attributes_str_for_prompt}""" | ||
|
|
||
| return {"prompt": prompt, "metadata": metadata, "id": guid} | ||
|
|
||
|
|
||
| def get_custom_metadata_context() -> Dict[str, Any]: | ||
| """ | ||
| Fetch custom metadata context using PyAtlan's native cache classes. | ||
| Returns: | ||
| Dictionary containing context and business metadata results | ||
| """ | ||
| business_metadata_results: List[Dict[str, Any]] = [] | ||
|
|
||
| try: | ||
| # Get Atlan client | ||
| client = get_atlan_client() | ||
|
|
||
| # Initialize caches using PyAtlan's native classes | ||
| cm_cache = CustomMetadataCache(client) | ||
| enum_cache = EnumCache(client) | ||
|
|
||
| # Get all custom metadata attributes (includes full definitions) | ||
| all_custom_attributes = cm_cache.get_all_custom_attributes( | ||
| include_deleted=False, force_refresh=True | ||
| ) | ||
|
|
||
| # Process each custom metadata set | ||
| for set_name in all_custom_attributes.keys(): | ||
| try: | ||
| # Get the full custom metadata definition | ||
| cm_def = cm_cache.get_custom_metadata_def(set_name) | ||
|
|
||
| # Process and enrich with enum data | ||
| result = process_business_metadata(cm_def, enum_cache) | ||
| business_metadata_results.append(result) | ||
|
|
||
| except Exception as e: | ||
| logger.warning( | ||
| f"Error processing custom metadata set '{set_name}': {e}" | ||
| ) | ||
| continue | ||
|
|
||
| logger.info( | ||
| f"Fetched {len(business_metadata_results)} business metadata definitions with enum enrichment." | ||
| ) | ||
|
|
||
| except Exception as e: | ||
| logger.error( | ||
| f"Error fetching custom metadata context: {e}", | ||
| exc_info=True, | ||
| ) | ||
| return { | ||
| "context": "Error fetching business metadata definitions", | ||
| "business_metadata_results": [], | ||
| "error": str(e), | ||
| } | ||
|
|
||
| return { | ||
| "context": "This is the list of business metadata definitions used in the data catalog to add more information to an asset", | ||
| "business_metadata_results": business_metadata_results, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use consistent naming please |
||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
shouldn't the LLM use the names rather than displayNames for this?