From 4dce0a1725ca76885bab9ead84a431830e864782 Mon Sep 17 00:00:00 2001 From: Juan Denis Date: Tue, 30 Sep 2025 20:50:59 -0400 Subject: [PATCH 1/7] Add docs version update script and workflow changes Introduces a Python script to dynamically update the Sphinx documentation version from multiple sources. Updates the documentation workflow to run this script and deploy from the 'dev' branch. Enhances README with badges and detailed field definitions usage. --- .github/scripts/update_docs_version.py | 214 +++++++++++++++++++++++++ .github/workflows/documentation.yml | 7 +- README.md | 66 +++++++- 3 files changed, 284 insertions(+), 3 deletions(-) create mode 100644 .github/scripts/update_docs_version.py diff --git a/.github/scripts/update_docs_version.py b/.github/scripts/update_docs_version.py new file mode 100644 index 0000000..cf27dd4 --- /dev/null +++ b/.github/scripts/update_docs_version.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python3 +""" +Script to dynamically update the version number in Sphinx documentation. + +This script updates the hardcoded version in docs/source/index.rst before +the Sphinx build runs. It reads the version from multiple sources in priority order: +1. VERSION file at project root (if exists) +2. setuptools_scm (if available) +3. pyproject.toml as fallback + +Usage: + python .github/scripts/update_docs_version.py +""" + +import os +import re +import sys +from pathlib import Path + + +def get_version_from_file(project_root): + """ + Read version from the VERSION file at project root. + + Args: + project_root: Path to the project root directory + + Returns: + str: Version string if found, None otherwise + """ + version_file = project_root / "VERSION" + if version_file.exists(): + try: + version = version_file.read_text().strip() + print(f"✓ Found version in VERSION file: {version}") + return version + except Exception as e: + print(f"✗ Error reading VERSION file: {e}", file=sys.stderr) + return None + + +def get_version_from_setuptools_scm(project_root): + """ + Get version from setuptools_scm if available. + + Args: + project_root: Path to the project root directory + + Returns: + str: Version string if found, None otherwise + """ + try: + from setuptools_scm import get_version + version = get_version(root=str(project_root)) + print(f"✓ Found version from setuptools_scm: {version}") + return version + except ImportError: + print("✗ setuptools_scm not available", file=sys.stderr) + except Exception as e: + print(f"✗ Error getting version from setuptools_scm: {e}", file=sys.stderr) + return None + + +def get_version_from_pyproject(project_root): + """ + Extract version from pyproject.toml as a fallback. + + Args: + project_root: Path to the project root directory + + Returns: + str: Version string if found, None otherwise + """ + pyproject_file = project_root / "pyproject.toml" + if pyproject_file.exists(): + try: + content = pyproject_file.read_text() + # Look for version = "X.X.X" pattern + match = re.search(r'version\s*=\s*["\']([^"\']+)["\']', content) + if match: + version = match.group(1) + print(f"✓ Found version in pyproject.toml: {version}") + return version + except Exception as e: + print(f"✗ Error reading pyproject.toml: {e}", file=sys.stderr) + return None + + +def get_version(project_root): + """ + Get version from available sources in priority order. + + Priority: + 1. VERSION file + 2. setuptools_scm + 3. pyproject.toml + + Args: + project_root: Path to the project root directory + + Returns: + str: Version string + + Raises: + RuntimeError: If no version can be determined + """ + # Try VERSION file first + version = get_version_from_file(project_root) + if version: + return version + + # Try setuptools_scm + version = get_version_from_setuptools_scm(project_root) + if version: + return version + + # Fallback to pyproject.toml + version = get_version_from_pyproject(project_root) + if version: + return version + + raise RuntimeError("Could not determine version from any source") + + +def update_index_rst(index_file, version): + """ + Update the version number in docs/source/index.rst. + + Args: + index_file: Path to the index.rst file + version: Version string to insert + + Returns: + bool: True if file was updated, False otherwise + """ + if not index_file.exists(): + print(f"✗ File not found: {index_file}", file=sys.stderr) + return False + + try: + # Read the file + content = index_file.read_text(encoding='utf-8') + lines = content.splitlines(keepends=True) + + # Pattern to match the version line (line 20, 0-indexed as 19) + pattern = re.compile( + r'^(\s*Prompture is currently in development \(version )' + r'[^)]+' + r'(\)\. APIs may change between versions\.\s*)$' + ) + + # Update line 20 (index 19) + if len(lines) >= 20: + line_idx = 19 # Line 20 is at index 19 + original_line = lines[line_idx] + + # Check if the line matches the expected pattern + if pattern.match(original_line): + # Replace with new version + new_line = pattern.sub( + rf'\g<1>{version}\g<2>', + original_line + ) + lines[line_idx] = new_line + + # Write back to file + index_file.write_text(''.join(lines), encoding='utf-8') + print(f"✓ Updated version in {index_file}") + print(f" Old: {original_line.strip()}") + print(f" New: {new_line.strip()}") + return True + else: + print(f"✗ Line 20 does not match expected pattern", file=sys.stderr) + print(f" Found: {original_line.strip()}", file=sys.stderr) + return False + else: + print(f"✗ File has fewer than 20 lines", file=sys.stderr) + return False + + except Exception as e: + print(f"✗ Error updating index.rst: {e}", file=sys.stderr) + return False + + +def main(): + """Main entry point for the script.""" + # Determine project root (two levels up from this script) + script_path = Path(__file__).resolve() + project_root = script_path.parent.parent.parent + + print(f"Project root: {project_root}") + + # Get version + try: + version = get_version(project_root) + print(f"\n→ Using version: {version}\n") + except RuntimeError as e: + print(f"\n✗ Fatal error: {e}", file=sys.stderr) + sys.exit(1) + + # Update index.rst + index_file = project_root / "docs" / "source" / "index.rst" + success = update_index_rst(index_file, version) + + if success: + print("\n✓ Documentation version updated successfully") + sys.exit(0) + else: + print("\n✗ Failed to update documentation version", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 391b7c9..e0baf5c 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -3,7 +3,7 @@ name: documentation on: push: branches: - - main + - dev permissions: @@ -18,12 +18,15 @@ jobs: - name: Install dependencies run: | pip install -r docs/requirements.txt + - name: Update documentation version + run: | + python .github/scripts/update_docs_version.py - name: Sphinx build run: | sphinx-build docs/source _build - name: Deploy to GitHub Pages uses: peaceiris/actions-gh-pages@v3 - if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/dev' }} with: publish_branch: gh-pages github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/README.md b/README.md index b6a2baa..0448f82 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,12 @@ # Prompture +[![PyPI version](https://badge.fury.io/py/prompture.svg)](https://badge.fury.io/py/prompture) +[![Python Versions](https://img.shields.io/pypi/pyversions/prompture.svg)](https://pypi.org/project/prompture/) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) +[![Downloads](https://static.pepy.tech/badge/prompture)](https://pepy.tech/project/prompture) +![GitHub Repo stars](https://img.shields.io/github/stars/jhd3197/prompture?style=social) + + **Prompture** is an API-first library for getting **structured JSON** (or any structure) from LLMs, validating it, and benchmarking multiple models with one spec. ## ✨ Features @@ -94,7 +101,64 @@ person = extract_with_model(Person, text, model_name="ollama/gpt-oss:20b") print(person.dict()) ``` -**Why start here?** It’s fast (one call), cost-efficient, and returns a validated Pydantic instance. +**Why start here?** It's fast (one call), cost-efficient, and returns a validated Pydantic instance. + +--- + +## 📋 Field Definitions + +Prompture includes a powerful **field definitions system** that provides a centralized registry of structured data extraction fields. This system enables consistent, reusable field configurations across your data extraction workflows with built-in fields for common use cases like personal info, contact details, professional data, and more. + +**Key benefits:** +- 🎯 Pre-configured fields with descriptions and extraction instructions +- 🔄 Template variables like `{{current_year}}`, `{{current_date}}`, `{{current_datetime}}` +- 🔌 Seamless Pydantic integration via `field_from_registry()` +- ⚙️ Easy custom field registration + +### Using Built-in Fields + +```python +from pydantic import BaseModel +from prompture import field_from_registry, stepwise_extract_with_model + +class Person(BaseModel): + name: str = field_from_registry("name") + age: int = field_from_registry("age") + email: str = field_from_registry("email") + occupation: str = field_from_registry("occupation") + company: str = field_from_registry("company") + +# Built-in fields include: name, age, email, phone, address, city, country, +# occupation, company, education_level, salary, and many more! + +result = stepwise_extract_with_model( + Person, + "John Smith is 25 years old, software engineer at TechCorp, john@example.com", + model_name="openai/gpt-4" +) +``` + +### Registering Custom Fields + +```python +from prompture import register_field, field_from_registry + +# Register a custom field with template variables +register_field("document_date", { + "type": "str", + "description": "Document creation or processing date", + "instructions": "Use {{current_date}} if not specified in document", + "default": "{{current_date}}", + "nullable": False +}) + +# Use custom field in your model +class Document(BaseModel): + title: str = field_from_registry("name") + created_date: str = field_from_registry("document_date") +``` + +📚 **[View Full Field Definitions Reference →](https://prompture.readthedocs.io/en/latest/field_definitions_reference.html)** --- From bc600401ef7ba337ef4333d75bd09568a87e656c Mon Sep 17 00:00:00 2001 From: Juan Denis Date: Tue, 30 Sep 2025 21:01:13 -0400 Subject: [PATCH 2/7] Add version diagnosis script and update docs requirements Introduces test_version_diagnosis.py to diagnose version detection methods. Updates docs/requirements.txt to include setuptools_scm for version management. Removes the VERSION file and sets fetch-depth to 0 in documentation workflow for full git history. --- .github/workflows/documentation.yml | 2 + VERSION | 1 - docs/requirements.txt | 3 +- test_version_diagnosis.py | 97 +++++++++++++++++++++++++++++ 4 files changed, 101 insertions(+), 2 deletions(-) delete mode 100644 VERSION create mode 100644 test_version_diagnosis.py diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index e0baf5c..0e49b7e 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -14,6 +14,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: actions/setup-python@v5 - name: Install dependencies run: | diff --git a/VERSION b/VERSION deleted file mode 100644 index db412fd..0000000 --- a/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.0.23.dev1 diff --git a/docs/requirements.txt b/docs/requirements.txt index acee5b7..e9d07f1 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -2,4 +2,5 @@ sphinx>=5.0.0 sphinx-rtd-theme>=1.3.0 sphinxcontrib-napoleon>=0.7 -myst-parser>=2.0.0 \ No newline at end of file +myst-parser>=2.0.0 +setuptools_scm>=8 \ No newline at end of file diff --git a/test_version_diagnosis.py b/test_version_diagnosis.py new file mode 100644 index 0000000..23954a9 --- /dev/null +++ b/test_version_diagnosis.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +""" +Diagnostic script to test version detection methods. +""" + +import os +import sys +from pathlib import Path + +# Set project root +project_root = Path(__file__).parent + +print("=" * 60) +print("VERSION DETECTION DIAGNOSIS") +print("=" * 60) +print() + +# Test 1: Check VERSION file +print("1. Testing VERSION file:") +version_file = project_root / "VERSION" +if version_file.exists(): + version_content = version_file.read_text().strip() + print(f" ✓ VERSION file exists") + print(f" Content: {version_content}") +else: + print(f" ✗ VERSION file not found") +print() + +# Test 2: Check setuptools_scm +print("2. Testing setuptools_scm:") +try: + from setuptools_scm import get_version + print(f" ✓ setuptools_scm is installed") + try: + scm_version = get_version(root=str(project_root)) + print(f" ✓ setuptools_scm version: {scm_version}") + except Exception as e: + print(f" ✗ Error getting version from setuptools_scm:") + print(f" {type(e).__name__}: {e}") +except ImportError: + print(f" ✗ setuptools_scm not installed") +print() + +# Test 3: Check git tags +print("3. Testing git repository:") +try: + import subprocess + result = subprocess.run( + ['git', 'tag', '--list'], + capture_output=True, + text=True, + cwd=project_root + ) + if result.returncode == 0: + tags = result.stdout.strip().split('\n') if result.stdout.strip() else [] + print(f" ✓ Git repository found") + print(f" Number of tags: {len([t for t in tags if t])}") + if tags and tags[0]: + print(f" Tags: {', '.join(tags[:10])}") + else: + print(f" ✗ Error running git command") +except Exception as e: + print(f" ✗ Error checking git: {e}") +print() + +# Test 4: Check what the update script would use +print("4. Simulating update_docs_version.py logic:") +print(" Priority order:") +print(" 1. VERSION file") +print(" 2. setuptools_scm") +print(" 3. pyproject.toml") +print() + +# Determine which version would be used +final_version = None +source = None + +# Check VERSION file first +if version_file.exists(): + final_version = version_file.read_text().strip() + source = "VERSION file" +else: + # Try setuptools_scm + try: + from setuptools_scm import get_version + final_version = get_version(root=str(project_root)) + source = "setuptools_scm" + except: + source = "Failed - no version found" + +print(f" → Version that WOULD be used: {final_version}") +print(f" → Source: {source}") +print() + +print("=" * 60) +print("ANALYSIS COMPLETE") +print("=" * 60) \ No newline at end of file From 43f7a515afea686a30cdbcd3c0121a203e7eddb3 Mon Sep 17 00:00:00 2001 From: Juan Denis Date: Tue, 30 Sep 2025 21:11:22 -0400 Subject: [PATCH 3/7] Update docs requirements and add VERSION file Removed setuptools_scm from docs/requirements.txt and added a VERSION file with version 0.0.29.dev1. Also simplified the documentation workflow by removing fetch-depth from the checkout step. --- .github/workflows/documentation.yml | 2 -- VERSION | 1 + docs/requirements.txt | 3 +-- 3 files changed, 2 insertions(+), 4 deletions(-) create mode 100644 VERSION diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 0e49b7e..e0baf5c 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -14,8 +14,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - with: - fetch-depth: 0 - uses: actions/setup-python@v5 - name: Install dependencies run: | diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..f7a0f5b --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.0.29.dev1 \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt index e9d07f1..acee5b7 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -2,5 +2,4 @@ sphinx>=5.0.0 sphinx-rtd-theme>=1.3.0 sphinxcontrib-napoleon>=0.7 -myst-parser>=2.0.0 -setuptools_scm>=8 \ No newline at end of file +myst-parser>=2.0.0 \ No newline at end of file From 98ebfd9eeae4bdf8315ec06e5d644c40a2eb3c5b Mon Sep 17 00:00:00 2001 From: Juan Denis Date: Tue, 30 Sep 2025 21:44:24 -0400 Subject: [PATCH 4/7] Remove extra whitespace in workflow config Deleted unnecessary whitespace from the documentation.yml GitHub Actions workflow for improved readability. --- .github/workflows/documentation.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index e0baf5c..14a5ee8 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -5,7 +5,6 @@ on: branches: - dev - permissions: contents: write From 50dcbad4c082513e24fb000419d1c0266beed637 Mon Sep 17 00:00:00 2001 From: Juan Denis Date: Thu, 2 Oct 2025 21:16:12 -0400 Subject: [PATCH 5/7] Add enum field support and validation utilities Introduces enum field definitions for sentiment, priority, status, and risk_level. Adds `validate_enum_value` and `normalize_enum_value` utilities for enum validation and normalization. Updates `field_from_registry` to handle enum fields and enhance schema metadata. Refactors text analysis example to demonstrate enum usage and adds a test script for enum implementation. --- examples/text_analysis_example.py | 156 +++++++++++++++--------------- prompture/__init__.py | 6 +- prompture/field_definitions.py | 135 +++++++++++++++++++++++++- 3 files changed, 214 insertions(+), 83 deletions(-) diff --git a/examples/text_analysis_example.py b/examples/text_analysis_example.py index 5932990..c065481 100644 --- a/examples/text_analysis_example.py +++ b/examples/text_analysis_example.py @@ -1,92 +1,90 @@ """ -Text Analysis Example +Text Analysis Example with Enum Field Support This example demonstrates how to use Prompture for analyzing general text and extracting -boolean (true/false) values. It shows how to register boolean fields for sentiment analysis, -content type detection, and writing style assessment. +sentiment using enum fields. It shows how enum fields restrict LLM output to specific +predefined values (positive, negative, neutral) for sentiment analysis. """ from pydantic import BaseModel -from typing import Optional -from prompture import register_field, field_from_registry, extract_with_model +from typing import Optional, Literal +from prompture import field_from_registry, extract_with_model, get_field_definition, validate_enum_value -# Register boolean fields for text analysis -register_field("is_positive_sentiment", { - "type": "bool", - "description": "Whether the text expresses positive sentiment", - "instructions": "Analyze the overall tone and determine if the sentiment is predominantly positive. Look for positive words, optimistic language, and favorable opinions.", - "default": False, - "nullable": False -}) - -register_field("contains_facts", { - "type": "bool", - "description": "Whether the text contains factual information or data", - "instructions": "Determine if the text includes verifiable facts, statistics, dates, or objective information rather than just opinions.", - "default": False, - "nullable": False -}) - -register_field("is_formal_tone", { - "type": "bool", - "description": "Whether the text uses formal language and professional tone", - "instructions": "Check if the writing style is formal, professional, and uses proper grammar. Informal language, slang, or casual expressions indicate false.", - "default": False, - "nullable": False -}) - -register_field("has_call_to_action", { - "type": "bool", - "description": "Whether the text includes a call to action", - "instructions": "Look for explicit requests or suggestions for the reader to take action, such as 'buy now', 'sign up', 'learn more', or similar directives.", - "default": False, - "nullable": False -}) - -register_field("is_persuasive", { - "type": "bool", - "description": "Whether the text attempts to persuade or convince the reader", - "instructions": "Determine if the text uses persuasive techniques, arguments, or tries to influence the reader's opinion or behavior.", - "default": False, - "nullable": False -}) - -# Define the Pydantic model for text analysis +# Define the Pydantic model for text analysis using the sentiment enum field class TextAnalysis(BaseModel): - is_positive_sentiment: bool = field_from_registry("is_positive_sentiment") - contains_facts: bool = field_from_registry("contains_facts") - is_formal_tone: bool = field_from_registry("is_formal_tone") - has_call_to_action: bool = field_from_registry("has_call_to_action") - is_persuasive: bool = field_from_registry("is_persuasive") + sentiment: str = field_from_registry("sentiment") + topic: Optional[str] = field_from_registry("topic") -# Sample text - a product review -sample_text = """ -I recently purchased the TechPro Wireless Headphones and I'm absolutely thrilled with my purchase! -The sound quality is exceptional, delivering crisp highs and deep bass that brings my music to life. +# Sample texts to analyze +sample_texts = [ + """ + I recently purchased the TechPro Wireless Headphones and I'm absolutely thrilled with my purchase! + The sound quality is exceptional, delivering crisp highs and deep bass that brings my music to life. + + The battery lasts for an impressive 30 hours on a single charge, and the quick-charge feature gives + you 5 hours of playback in just 10 minutes. The noise cancellation technology is top-notch, blocking + out up to 95% of ambient noise according to the manufacturer's specifications. + + At $149.99, these headphones offer incredible value for money. If you're in the market for premium + wireless headphones without breaking the bank, I highly recommend giving these a try. You won't be + disappointed! Check them out on the TechPro website today. + """, + """ + I had a terrible experience with customer service today. I waited on hold for over an hour + only to be transferred three times to different departments. Nobody seemed to know how to help me, + and my issue remains unresolved. Very frustrating and disappointing. + """, + """ + The weather forecast for tomorrow shows partly cloudy skies with temperatures ranging from + 68 to 75 degrees Fahrenheit. There is a 20% chance of precipitation in the afternoon. Wind + speeds will be moderate at 10-15 mph from the northwest. + """ +] -The battery lasts for an impressive 30 hours on a single charge, and the quick-charge feature gives -you 5 hours of playback in just 10 minutes. The noise cancellation technology is top-notch, blocking -out up to 95% of ambient noise according to the manufacturer's specifications. +# Display enum information +print("=" * 70) +print("ENUM FIELD INFORMATION") +print("=" * 70) +sentiment_def = get_field_definition("sentiment") +if sentiment_def and 'enum' in sentiment_def: + print(f"Field: sentiment") + print(f"Allowed values: {sentiment_def['enum']}") + print(f"Description: {sentiment_def['description']}") + print(f"Instructions: {sentiment_def['instructions']}") +print("=" * 70) +print() -At $149.99, these headphones offer incredible value for money. If you're in the market for premium -wireless headphones without breaking the bank, I highly recommend giving these a try. You won't be -disappointed! Check them out on the TechPro website today. -""" +# Analyze each text +for i, text in enumerate(sample_texts, 1): + print(f"Analyzing Text {i}...") + print("-" * 70) + + # Extract sentiment analysis from the text + analysis = extract_with_model( + TextAnalysis, + text, + "lmstudio/deepseek/deepseek-r1-0528-qwen3-8b" + ) + + # Print the analysis results + print(f"Sentiment: {analysis.model.sentiment}") + print(f"Topic: {analysis.model.topic or 'N/A'}") + + # Validate the enum value + is_valid = validate_enum_value("sentiment", analysis.model.sentiment) + print(f"Valid sentiment value: {is_valid}") + + print("-" * 70) + print() + +# Demonstrate manual enum validation +print("=" * 70) +print("ENUM VALIDATION EXAMPLES") +print("=" * 70) -# Extract boolean analysis from the text -analysis = extract_with_model( - TextAnalysis, - sample_text, - "lmstudio/deepseek/deepseek-r1-0528-qwen3-8b" -) +test_values = ["positive", "negative", "neutral", "happy", "POSITIVE"] +for value in test_values: + is_valid = validate_enum_value("sentiment", value) + print(f"Value '{value}': {'✓ Valid' if is_valid else '✗ Invalid'}") -# Print the analysis results -print("=" * 60) -print("TEXT ANALYSIS RESULTS") -print("=" * 60) -print(f"Positive Sentiment: {analysis.model.is_positive_sentiment}") -print(f"Contains Facts: {analysis.model.contains_facts}") -print(f"Formal Tone: {analysis.model.is_formal_tone}") -print(f"Has Call to Action: {analysis.model.has_call_to_action}") -print(f"Is Persuasive: {analysis.model.is_persuasive}") -print("=" * 60) \ No newline at end of file +print("=" * 70) \ No newline at end of file diff --git a/prompture/__init__.py b/prompture/__init__.py index 8d6b81b..916bcef 100644 --- a/prompture/__init__.py +++ b/prompture/__init__.py @@ -5,7 +5,8 @@ from .field_definitions import ( FIELD_DEFINITIONS, get_field_definition, get_required_fields, get_field_names, field_from_registry, register_field, add_field_definition, add_field_definitions, - get_registry_snapshot, clear_registry, reset_registry + get_registry_snapshot, clear_registry, reset_registry, validate_enum_value, + normalize_enum_value ) from .runner import run_suite_from_spec from .validator import validate_against_schema @@ -51,4 +52,7 @@ "get_registry_snapshot", "clear_registry", "reset_registry", + # Enum Field Support + "validate_enum_value", + "normalize_enum_value", ] \ No newline at end of file diff --git a/prompture/field_definitions.py b/prompture/field_definitions.py index 4169d65..e8e896d 100644 --- a/prompture/field_definitions.py +++ b/prompture/field_definitions.py @@ -14,7 +14,7 @@ import threading import warnings from datetime import datetime, date -from typing import Dict, Any, Union, Optional, List +from typing import Dict, Any, Union, Optional, List, Literal, get_args from pydantic import Field # Template variable providers @@ -266,7 +266,8 @@ def _apply_templates(text: str, custom_vars: Optional[Dict[str, Any]] = None) -> "sentiment": { "type": str, "description": "Sentiment classification", - "instructions": "Classify as positive, negative, or neutral", + "instructions": "Classify the sentiment of the content", + "enum": ["positive", "negative", "neutral"], "default": "neutral", "nullable": True, }, @@ -290,6 +291,32 @@ def _apply_templates(text: str, custom_vars: Optional[Dict[str, Any]] = None) -> "instructions": "Identify primary topic or theme of content", "default": "", "nullable": True, + }, + + # Enum Fields for Task Management + "priority": { + "type": str, + "description": "Priority level", + "instructions": "Determine the priority level", + "enum": ["low", "medium", "high", "urgent"], + "default": "medium", + "nullable": True, + }, + "status": { + "type": str, + "description": "Status of the item", + "instructions": "Identify the current status", + "enum": ["pending", "in_progress", "completed", "cancelled"], + "default": "pending", + "nullable": True, + }, + "risk_level": { + "type": str, + "description": "Risk assessment level", + "instructions": "Assess the risk level", + "enum": ["minimal", "low", "moderate", "high", "critical"], + "default": "low", + "nullable": True, } } @@ -426,8 +453,31 @@ def field_from_registry(field_name: str, apply_templates: bool = True, # Extract Pydantic Field parameters default_value = field_def.get('default') description = field_def.get('description', f"Extract the {field_name} from the text.") + instructions = field_def.get('instructions', '') - # Handle nullable/required logic + # Handle enum fields + enum_values = field_def.get('enum') + if enum_values: + # Enhance description with enum constraint information + enum_str = "', '".join(str(v) for v in enum_values) + enhanced_instructions = f"{instructions}. Must be one of: '{enum_str}'" + enhanced_description = f"{description}. Allowed values: {enum_str}" + + # Create json_schema_extra with enum constraint + json_schema_extra = { + "enum": enum_values, + "instructions": enhanced_instructions + } + + # Handle nullable/required logic with enum + if field_def.get('nullable', True) and default_value is not None: + return Field(default=default_value, description=enhanced_description, json_schema_extra=json_schema_extra) + elif field_def.get('nullable', True): + return Field(default=None, description=enhanced_description, json_schema_extra=json_schema_extra) + else: + return Field(description=enhanced_description, json_schema_extra=json_schema_extra) + + # Handle non-enum fields (original logic) if field_def.get('nullable', True) and default_value is not None: # Optional field with default return Field(default=default_value, description=description) @@ -438,6 +488,85 @@ def field_from_registry(field_name: str, apply_templates: bool = True, # Required field return Field(description=description) +def validate_enum_value(field_name: str, value: Any) -> bool: + """ + Validate that a value is in the allowed enum list for a field. + + Args: + field_name (str): Name of the field in the registry + value (Any): Value to validate + + Returns: + bool: True if value is valid for the enum field, False otherwise + """ + field_def = get_field_definition(field_name, apply_templates=False) + + if field_def is None: + return False + + enum_values = field_def.get('enum') + if not enum_values: + # Not an enum field, so any value is valid + return True + + # Check if value is in the allowed enum list + return value in enum_values + +def normalize_enum_value(field_name: str, value: Any, case_sensitive: bool = True) -> Any: + """ + Normalize and validate an enum value for a field. + + Args: + field_name (str): Name of the field in the registry + value (Any): Value to normalize + case_sensitive (bool): Whether to perform case-sensitive matching + + Returns: + Any: Normalized value if valid, original value otherwise + + Raises: + ValueError: If value is not in the allowed enum list + """ + field_def = get_field_definition(field_name, apply_templates=False) + + if field_def is None: + raise KeyError(f"Field '{field_name}' not found in registry") + + enum_values = field_def.get('enum') + if not enum_values: + # Not an enum field, return as-is + return value + + # Convert value to string for comparison + str_value = str(value) if value is not None else None + + if str_value is None: + # Handle nullable fields + if field_def.get('nullable', True): + return None + else: + raise ValueError(f"Field '{field_name}' does not allow null values") + + # Case-sensitive matching + if case_sensitive: + if str_value in enum_values: + return str_value + raise ValueError( + f"Invalid value '{str_value}' for field '{field_name}'. " + f"Must be one of: {', '.join(repr(v) for v in enum_values)}" + ) + + # Case-insensitive matching + lower_value = str_value.lower() + for enum_val in enum_values: + if str(enum_val).lower() == lower_value: + return enum_val + + raise ValueError( + f"Invalid value '{str_value}' for field '{field_name}'. " + f"Must be one of: {', '.join(repr(v) for v in enum_values)}" + ) + def get_registry_snapshot() -> Dict[str, FieldDefinition]: """ Get a snapshot of the current global registry. From 232e5f7747cf9916bc9876ed649b877d234c6b4a Mon Sep 17 00:00:00 2001 From: Juan Denis Date: Fri, 3 Oct 2025 21:43:21 -0400 Subject: [PATCH 6/7] Add text classification example and tone enum field Introduces a new example script demonstrating text classification using enum fields for tone and topic. Adds a 'tone' field definition to BASE_FIELD_DEFINITIONS with allowed values and metadata for improved LLM output consistency. --- examples/text_analysis_example_v2.py | 84 ++++++++++++++++++++++++++++ prompture/field_definitions.py | 8 +++ 2 files changed, 92 insertions(+) create mode 100644 examples/text_analysis_example_v2.py diff --git a/examples/text_analysis_example_v2.py b/examples/text_analysis_example_v2.py new file mode 100644 index 0000000..c830603 --- /dev/null +++ b/examples/text_analysis_example_v2.py @@ -0,0 +1,84 @@ +""" +Text Classification Example with Multiple Enum Fields + +This example shows how to use Prompture for classifying text into both tone +and topic categories using enum fields. Enum fields ensure that LLM outputs +stay within predefined valid options, improving reliability and consistency. +""" + +from pydantic import BaseModel +from typing import Optional +from prompture import field_from_registry, extract_with_model, get_field_definition, validate_enum_value + +# Define the Pydantic model for text classification using enum fields +class TextClassification(BaseModel): + tone: str = field_from_registry("tone") # e.g. ["formal", "informal", "optimistic", "pessimistic"] + topic: Optional[str] = field_from_registry("topic") # General subject/topic of the text + +# Example texts to classify +sample_texts = [ + """ + We are delighted to announce the grand opening of our new office space + in downtown Miami. This expansion reflects our commitment to innovation + and growth in the region. We look forward to welcoming our clients to + this modern and vibrant workspace. + """, + """ + Honestly, I don’t think the new update did much to fix the app. It’s still + laggy, crashes often, and the support team keeps giving canned responses. + I’m getting really tired of this. + """, + """ + The company will host a quarterly town hall meeting next week. Employees + are encouraged to submit questions in advance. The agenda includes a + review of financial performance, upcoming projects, and a Q&A session. + """ +] + +# Display enum info for tone +print("=" * 70) +print("ENUM FIELD INFORMATION") +print("=" * 70) +tone_def = get_field_definition("tone") +if tone_def and 'enum' in tone_def: + print(f"Field: tone") + print(f"Allowed values: {tone_def['enum']}") + print(f"Description: {tone_def['description']}") + print(f"Instructions: {tone_def['instructions']}") +print("=" * 70) +print() + +# Analyze each text +for i, text in enumerate(sample_texts, 1): + print(f"Classifying Text {i}...") + print("-" * 70) + + # Extract classification from text + classification = extract_with_model( + TextClassification, + text, + "lmstudio/deepseek/deepseek-r1-0528-qwen3-8b" + ) + + # Print results + print(f"Tone: {classification.model.tone}") + print(f"Topic: {classification.model.topic or 'N/A'}") + + # Validate enum value for tone + is_valid = validate_enum_value("tone", classification.model.tone) + print(f"Valid tone value: {is_valid}") + + print("-" * 70) + print() + +# Manual validation demo +print("=" * 70) +print("ENUM VALIDATION EXAMPLES") +print("=" * 70) + +test_values = ["formal", "casual", "optimistic", "angry", "PESSIMISTIC"] +for value in test_values: + is_valid = validate_enum_value("tone", value) + print(f"Value '{value}': {'✓ Valid' if is_valid else '✗ Invalid'}") + +print("=" * 70) diff --git a/prompture/field_definitions.py b/prompture/field_definitions.py index e8e896d..27e0cb6 100644 --- a/prompture/field_definitions.py +++ b/prompture/field_definitions.py @@ -317,6 +317,14 @@ def _apply_templates(text: str, custom_vars: Optional[Dict[str, Any]] = None) -> "enum": ["minimal", "low", "moderate", "high", "critical"], "default": "low", "nullable": True, + }, + "tone": { + "type": str, + "description": "Tone of the text", + "instructions": "Classify the tone of the text", + "enum": ["formal", "informal", "optimistic", "pessimistic"], + "default": "formal", + "nullable": True, } } From 0f6cfd752a7002828ccf6d6f6d4f3093d3352e75 Mon Sep 17 00:00:00 2001 From: Juan Denis Date: Sun, 12 Oct 2025 00:28:02 -0400 Subject: [PATCH 7/7] Update documentation workflow to use main branch Changed the GitHub Actions workflow to trigger on pushes to the 'main' branch instead of 'dev', and updated the deployment condition accordingly. This aligns documentation deployment with the primary branch. --- .github/workflows/documentation.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 14a5ee8..ba17c1b 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -3,7 +3,7 @@ name: documentation on: push: branches: - - dev + - main permissions: contents: write @@ -25,7 +25,7 @@ jobs: sphinx-build docs/source _build - name: Deploy to GitHub Pages uses: peaceiris/actions-gh-pages@v3 - if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/dev' }} + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} with: publish_branch: gh-pages github_token: ${{ secrets.GITHUB_TOKEN }}