diff --git a/.github/workflows/beta-release.yml b/.github/workflows/beta-release.yml
index e9972dfa..36adbb60 100644
--- a/.github/workflows/beta-release.yml
+++ b/.github/workflows/beta-release.yml
@@ -72,8 +72,10 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install bump2version build twine
+          pip install bump2version build twine psutil
           pip install -e ".[all,dev]"
+          # Install memory monitoring tools
+          pip install memory_profiler
 
       - name: Configure git
         run: |
@@ -108,10 +110,28 @@ jobs:
           python scripts/generate_changelog.py --beta --output BETA_CHANGELOG.md
 
       - name: Run tests
+        env:
+          # Control memory usage to prevent segmentation faults
+          PYTHONMALLOC: debug
+          # Limit the number of threads used by numpy/OpenMP
+          OMP_NUM_THREADS: 1
+          MKL_NUM_THREADS: 1
+          OPENBLAS_NUM_THREADS: 1
+          # Limit spaCy's memory usage
+          SPACY_MAX_THREADS: 1
         run: |
-          python -m pytest tests/ -v --tb=short
-          python -m pytest -m integration -v
-          python -m pytest tests/benchmark_text_service.py -v
+          # Print system memory info
+          free -h || echo "free command not available"
+          
+          # Split tests into smaller batches to avoid memory issues
+          python -m pytest tests/ -v --tb=short -k "not benchmark and not integration" --no-header
+          
+          # Run integration tests separately
+          python -m pytest -m integration -v --no-header
+          
+          # Run benchmark tests with reduced sample size
+          python -c "print('Running memory-intensive benchmark tests with safeguards')"
+          python -m pytest tests/benchmark_text_service.py -v --no-header
 
       - name: Build package
         run: |
diff --git a/examples/quick_start.ipynb b/examples/quick_start.ipynb
new file mode 100644
index 00000000..ea7aa03c
--- /dev/null
+++ b/examples/quick_start.ipynb
@@ -0,0 +1,627 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img src=\"../public/colorlogo.png\" width=\"50%\"/>\n",
+    "\n",
+    "[Homepage](https://www.datafog.ai) | \n",
+    "[Discord](https://discord.gg/bzDth394R4) | \n",
+    "[Github](https://github.com/datafog/datafog-python) | \n",
+    "[Contact](mailto:sid@datafog.ai) |\n",
+    "[Documentation](https://www.datafog.ai/datafog-docs/)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": "# DataFog Quick Start Guide\\n\\n> **📦 Version Requirement**: This guide is for DataFog v4.2.0 and above\\n> \\n> ✅ **New in v4.2.0**: GLiNER integration, smart cascading, and enhanced performance\\n\\nWelcome to DataFog! This notebook demonstrates how to get started with DataFog's fast PII detection and anonymization capabilities.\\n\\n## What makes DataFog special?\\n\\n- **🚀 Ultra-Fast**: 190x faster than spaCy for structured PII, 32x faster with GLiNER\\n- **🪶 Lightweight**: <2MB core package with optional ML extras\\n- **🧠 Smart Engines**: Choose from regex, GLiNER, spaCy, or smart cascading\\n- **📦 Production Ready**: Comprehensive testing and performance validation\"",
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Installation\n",
+    "\n",
+    "Let's start by installing DataFog with the advanced features:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install DataFog with advanced ML features\n",
+    "!pip install datafog[nlp-advanced] --quiet\n",
+    "\n",
+    "print(\"✅ DataFog installed successfully!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Simple API - Get Started in Seconds\n",
+    "\n",
+    "The fastest way to detect PII in your text:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datafog import DataFog\n",
+    "\n",
+    "# Create a DataFog instance\n",
+    "detector = DataFog()\n",
+    "\n",
+    "# Sample text with various PII types\n",
+    "sample_text = \"\"\"\n",
+    "Hi there! I'm Dr. Sarah Johnson, and you can reach me at sarah.johnson@hospital.com \n",
+    "or call my office at (555) 123-4567. My SSN is 123-45-6789 for verification.\n",
+    "I work at General Hospital located at 123 Main St, New York, NY 10001.\n",
+    "My credit card ending in 4111-1111-1111-1111 expires on 12/25.\n",
+    "\"\"\"\n",
+    "\n",
+    "# Detect PII - this uses the fast regex engine by default\n",
+    "results = detector.scan_text(sample_text)\n",
+    "\n",
+    "print(\"🔍 PII Detection Results:\")\n",
+    "print(f\"Found {len(results)} pieces of PII:\")\n",
+    "for entity_type, entities in results.items():\n",
+    "    if entities:  # Only show types that were found\n",
+    "        print(f\"  {entity_type}: {entities}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Engine Comparison - Choose Your Power Level\n",
+    "\n",
+    "DataFog offers multiple engines for different needs. Let's compare them:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datafog.services import TextService\n",
+    "import time\n",
+    "\n",
+    "# Test text with both structured and unstructured PII\n",
+    "test_text = \"Dr. John Smith works at General Hospital. Contact him at john@hospital.com or (555) 123-4567.\"\n",
+    "\n",
+    "# Engine configurations\n",
+    "engines = {\n",
+    "    \"regex\": \"🚀 Fastest - Pattern-based detection\",\n",
+    "    \"gliner\": \"⚡ Fast - Modern ML with high accuracy\", \n",
+    "    \"smart\": \"🧠 Balanced - Combines regex + GLiNER for best results\"\n",
+    "}\n",
+    "\n",
+    "print(\"⚡ Engine Performance Comparison\\n\")\n",
+    "\n",
+    "for engine_name, description in engines.items():\n",
+    "    try:\n",
+    "        print(f\"{description}\")\n",
+    "        \n",
+    "        # Create service with specific engine\n",
+    "        service = TextService(engine=engine_name)\n",
+    "        \n",
+    "        # Time the detection\n",
+    "        start_time = time.time()\n",
+    "        result = service.annotate_text_sync(test_text)\n",
+    "        end_time = time.time()\n",
+    "        \n",
+    "        # Show results\n",
+    "        processing_time = (end_time - start_time) * 1000  # Convert to milliseconds\n",
+    "        print(f\"  ⏱️  Processing time: {processing_time:.2f}ms\")\n",
+    "        print(f\"  🎯 Entities found: {list(result.keys()) if result else 'None'}\")\n",
+    "        print()\n",
+    "        \n",
+    "    except ImportError as e:\n",
+    "        print(f\"  ❌ {engine_name} engine not available (missing dependencies)\")\n",
+    "        print(f\"     Install with: pip install datafog[nlp-advanced]\")\n",
+    "        print()\n",
+    "    except Exception as e:\n",
+    "        print(f\"  ⚠️  Error with {engine_name}: {str(e)}\")\n",
+    "        print()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Advanced Detection with GLiNER\n",
+    "\n",
+    "GLiNER is DataFog's modern ML engine that provides excellent accuracy for named entity recognition:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Complex text with various entity types\n",
+    "complex_text = \"\"\"\n",
+    "Medical Report - Patient: Emily Rodriguez, DOB: 03/15/1985\n",
+    "Dr. Michael Chen from Stanford Medical Center treated the patient.\n",
+    "Insurance ID: INS-789-456-123, Policy expires December 2024.\n",
+    "Emergency contact: Maria Rodriguez at (408) 555-9876.\n",
+    "Address: 1234 Oak Street, San Francisco, CA 94102\n",
+    "Lab results show glucose level of 120 mg/dL on 2024-01-15.\n",
+    "\"\"\"\n",
+    "\n",
+    "try:\n",
+    "    # Use GLiNER for advanced entity detection\n",
+    "    gliner_service = TextService(engine=\"gliner\")\n",
+    "    \n",
+    "    print(\"🧠 GLiNER Advanced Detection Results:\")\n",
+    "    print(\"=\" * 50)\n",
+    "    \n",
+    "    results = gliner_service.annotate_text_sync(complex_text)\n",
+    "    \n",
+    "    for entity_type, entities in results.items():\n",
+    "        if entities:  # Only show found entities\n",
+    "            print(f\"\\n{entity_type}:\")\n",
+    "            for entity in entities:\n",
+    "                print(f\"  • {entity}\")\n",
+    "    \n",
+    "    print(f\"\\n✅ Total entity types detected: {len([k for k, v in results.items() if v])}\")\n",
+    "    \n",
+    "except ImportError:\n",
+    "    print(\"❌ GLiNER not available. Install with: pip install datafog[nlp-advanced]\")\n",
+    "except Exception as e:\n",
+    "    print(f\"⚠️  GLiNER error: {e}\")\n",
+    "    print(\"Falling back to regex engine...\")\n",
+    "    \n",
+    "    # Fallback to regex\n",
+    "    regex_service = TextService(engine=\"regex\")\n",
+    "    results = regex_service.annotate_text_sync(complex_text)\n",
+    "    print(\"\\n🚀 Regex Detection Results:\")\n",
+    "    for entity_type, entities in results.items():\n",
+    "        if entities:\n",
+    "            print(f\"  {entity_type}: {entities}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Smart Cascading - Best of All Worlds\n",
+    "\n",
+    "The \"smart\" engine combines regex speed with GLiNER accuracy by using a cascading approach:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Text mixing structured PII (perfect for regex) and entities (better with GLiNER)\n",
+    "mixed_text = \"\"\"\n",
+    "From: john.doe@techcorp.com\n",
+    "To: legal@company.com\n",
+    "Subject: Employee Data Update\n",
+    "\n",
+    "Dear Legal Team,\n",
+    "\n",
+    "Please update the employee record for Sarah Williams (ID: EMP-12345).\n",
+    "Her new phone number is (555) 987-6543 and SSN is 987-65-4321.\n",
+    "She works at our Seattle office and reports to Manager David Chen.\n",
+    "Her emergency contact is her spouse, Michael Williams, at (555) 111-2222.\n",
+    "\n",
+    "Best regards,\n",
+    "HR Department\n",
+    "\"\"\"\n",
+    "\n",
+    "try:\n",
+    "    # Smart engine: Uses regex first (fast), then GLiNER for missed entities\n",
+    "    smart_service = TextService(engine=\"smart\")\n",
+    "    \n",
+    "    print(\"🧠 Smart Cascading Detection:\")\n",
+    "    print(\"=\" * 40)\n",
+    "    print(\"Strategy: Regex (speed) → GLiNER (accuracy)\\n\")\n",
+    "    \n",
+    "    start_time = time.time()\n",
+    "    results = smart_service.annotate_text_sync(mixed_text)\n",
+    "    end_time = time.time()\n",
+    "    \n",
+    "    # Organize results by category\n",
+    "    structured_pii = ['EMAIL', 'PHONE', 'SSN', 'CREDIT_CARD']\n",
+    "    entity_pii = ['PERSON', 'ORG', 'LOC', 'DATE_TIME']\n",
+    "    \n",
+    "    print(\"📧 Structured PII (Regex-optimized):\")\n",
+    "    for entity_type in structured_pii:\n",
+    "        if entity_type in results and results[entity_type]:\n",
+    "            print(f\"  {entity_type}: {results[entity_type]}\")\n",
+    "    \n",
+    "    print(\"\\n👤 Named Entities (GLiNER-optimized):\")\n",
+    "    for entity_type in entity_pii:\n",
+    "        if entity_type in results and results[entity_type]:\n",
+    "            print(f\"  {entity_type}: {results[entity_type]}\")\n",
+    "    \n",
+    "    processing_time = (end_time - start_time) * 1000\n",
+    "    print(f\"\\n⏱️  Total processing time: {processing_time:.2f}ms\")\n",
+    "    print(f\"✅ Combined detection power with optimized speed!\")\n",
+    "    \n",
+    "except Exception as e:\n",
+    "    print(f\"⚠️  Smart engine error: {e}\")\n",
+    "    print(\"This usually means GLiNER dependencies are missing.\")\n",
+    "    print(\"Install with: pip install datafog[nlp-advanced]\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. Anonymization - Protect Your Data\n",
+    "\n",
+    "DataFog doesn't just detect PII - it can also anonymize it in various ways:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Sample sensitive data\n",
+    "sensitive_data = \"\"\"\n",
+    "Patient: John Smith\n",
+    "Email: john.smith@email.com\n",
+    "Phone: (555) 123-4567\n",
+    "SSN: 123-45-6789\n",
+    "Credit Card: 4111-1111-1111-1111\n",
+    "Address: 123 Main St, Anytown, NY 12345\n",
+    "\"\"\"\n",
+    "\n",
+    "print(\"🔒 DataFog Anonymization Methods\\n\")\n",
+    "print(\"Original text:\")\n",
+    "print(sensitive_data)\n",
+    "print(\"=\" * 60)\n",
+    "\n",
+    "# Method 1: Redaction (replace with [REDACTED])\n",
+    "redactor = DataFog(operations=[\"scan\", \"redact\"])\n",
+    "redacted_text = redactor.process_text(sensitive_data)\n",
+    "print(\"\\n🚫 REDACTED:\")\n",
+    "print(redacted_text)\n",
+    "\n",
+    "# Method 2: Replacement (replace with fake but realistic data)\n",
+    "replacer = DataFog(operations=[\"scan\", \"replace\"])\n",
+    "replaced_text = replacer.process_text(sensitive_data)\n",
+    "print(\"\\n🔄 REPLACED:\")\n",
+    "print(replaced_text)\n",
+    "\n",
+    "# Method 3: Hashing (one-way transformation)\n",
+    "from datafog.models.anonymizer import HashType\n",
+    "hasher = DataFog(\n",
+    "    operations=[\"scan\", \"hash\"],\n",
+    "    hash_type=HashType.SHA256\n",
+    ")\n",
+    "hashed_text = hasher.process_text(sensitive_data)\n",
+    "print(\"\\n#️⃣ HASHED (SHA256):\")\n",
+    "print(hashed_text)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6. Selective Processing - Target Specific PII Types\n",
+    "\n",
+    "Sometimes you only want to process certain types of PII:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Sample text with mixed PII\n",
+    "business_data = \"\"\"\n",
+    "Company Report:\n",
+    "CEO: Amanda Johnson (amanda@company.com)\n",
+    "CFO: Robert Davis (robert.davis@company.com) \n",
+    "Phone: (555) 100-2000\n",
+    "Headquarters: 456 Business Ave, Corporate City, CA 90210\n",
+    "Tax ID: 12-3456789\n",
+    "Employee SSN for payroll: 987-65-4321\n",
+    "\"\"\"\n",
+    "\n",
+    "print(\"🎯 Selective PII Processing\\n\")\n",
+    "print(\"Original text:\")\n",
+    "print(business_data)\n",
+    "print(\"=\" * 50)\n",
+    "\n",
+    "# Only process emails and SSNs, leave names and addresses\n",
+    "selective_redactor = DataFog(\n",
+    "    operations=[\"scan\", \"redact\"],\n",
+    "    entities=[\"EMAIL\", \"SSN\"]  # Only target these types\n",
+    ")\n",
+    "\n",
+    "selective_result = selective_redactor.process_text(business_data)\n",
+    "print(\"\\n🎯 Selective Redaction (EMAIL + SSN only):\")\n",
+    "print(selective_result)\n",
+    "print(\"\\n💡 Notice: Names and addresses are preserved!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 7. Batch Processing - Handle Multiple Documents\n",
+    "\n",
+    "Process multiple documents efficiently:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Sample document collection\n",
+    "documents = [\n",
+    "    \"Patient file 1: John Doe, DOB: 01/15/1980, Phone: (555) 111-1111\",\n",
+    "    \"Customer record: jane@email.com, Account: 4532-1234-5678-9012\", \n",
+    "    \"Employee data: Robert Smith, SSN: 123-45-6789, Manager: Sarah Lee\",\n",
+    "    \"Contact info: michael@company.com, Office: (555) 999-8888\",\n",
+    "    \"Invoice #1234: Bill to John at 123 Oak St, Los Angeles, CA 90001\"\n",
+    "]\n",
+    "\n",
+    "print(\"📚 Batch Processing Demo\\n\")\n",
+    "print(f\"Processing {len(documents)} documents...\\n\")\n",
+    "\n",
+    "# Process all documents at once\n",
+    "batch_detector = DataFog()\n",
+    "start_time = time.time()\n",
+    "batch_results = batch_detector.batch_process(documents)\n",
+    "end_time = time.time()\n",
+    "\n",
+    "# Summary results\n",
+    "total_entities = 0\n",
+    "entity_counts = {}\n",
+    "\n",
+    "for i, result in enumerate(batch_results):\n",
+    "    print(f\"📄 Document {i+1}:\")\n",
+    "    doc_entities = 0\n",
+    "    for entity_type, entities in result.items():\n",
+    "        if entities:\n",
+    "            count = len(entities)\n",
+    "            doc_entities += count\n",
+    "            entity_counts[entity_type] = entity_counts.get(entity_type, 0) + count\n",
+    "            print(f\"  {entity_type}: {entities}\")\n",
+    "    \n",
+    "    if doc_entities == 0:\n",
+    "        print(\"  No PII detected\")\n",
+    "    total_entities += doc_entities\n",
+    "    print()\n",
+    "\n",
+    "# Performance summary\n",
+    "processing_time = (end_time - start_time) * 1000\n",
+    "avg_time_per_doc = processing_time / len(documents)\n",
+    "\n",
+    "print(\"📊 Batch Processing Summary:\")\n",
+    "print(f\"  📚 Documents processed: {len(documents)}\")\n",
+    "print(f\"  🎯 Total entities found: {total_entities}\")\n",
+    "print(f\"  ⏱️  Total processing time: {processing_time:.2f}ms\")\n",
+    "print(f\"  📈 Average per document: {avg_time_per_doc:.2f}ms\")\n",
+    "print(f\"  🏃 Throughput: {len(documents) / (processing_time/1000):.1f} docs/sec\")\n",
+    "\n",
+    "if entity_counts:\n",
+    "    print(f\"\\n🏷️  Entity breakdown:\")\n",
+    "    for entity_type, count in entity_counts.items():\n",
+    "        print(f\"    {entity_type}: {count}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 8. Performance Showcase - See the Speed\n",
+    "\n",
+    "Let's demonstrate DataFog's performance advantage with a realistic document:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Realistic business document (similar to what you'd process in production)\n",
+    "large_document = \"\"\"\n",
+    "CONFIDENTIAL EMPLOYEE REPORT - Q1 2024\n",
+    "\n",
+    "=== EXECUTIVE SUMMARY ===\n",
+    "Report generated by: Sarah Johnson (sarah.johnson@company.com)\n",
+    "Date: March 15, 2024\n",
+    "Department: Human Resources\n",
+    "Contact: (555) 100-HR00 ext. 1234\n",
+    "\n",
+    "=== EMPLOYEE RECORDS ===\n",
+    "\n",
+    "1. John Smith (ID: EMP-001)\n",
+    "   Email: john.smith@company.com\n",
+    "   Phone: (555) 123-4567\n",
+    "   SSN: 123-45-6789\n",
+    "   Address: 123 Oak Street, San Francisco, CA 94102\n",
+    "   Manager: David Chen (david.chen@company.com)\n",
+    "   Salary: $85,000 annually\n",
+    "   Start Date: January 15, 2020\n",
+    "\n",
+    "2. Maria Rodriguez (ID: EMP-002)\n",
+    "   Email: maria.rodriguez@company.com\n",
+    "   Phone: (555) 987-6543\n",
+    "   SSN: 987-65-4321\n",
+    "   Address: 456 Pine Ave, Los Angeles, CA 90210\n",
+    "   Manager: Lisa Wang (lisa.wang@company.com)\n",
+    "   Emergency Contact: Carlos Rodriguez (555) 111-2233\n",
+    "\n",
+    "3. Michael Johnson (ID: EMP-003)\n",
+    "   Email: michael.j@company.com\n",
+    "   Personal Email: mike.personal@gmail.com\n",
+    "   Phone: (555) 456-7890\n",
+    "   SSN: 456-78-9012\n",
+    "   Credit Card on file: 4532-1234-5678-9012 (expires 12/26)\n",
+    "   \n",
+    "=== PAYROLL INFORMATION ===\n",
+    "Bank routing: 123456789\n",
+    "Direct deposit accounts verified on 2024-03-01\n",
+    "Tax ID: 12-3456789\n",
+    "\n",
+    "=== CONTACT INFORMATION ===\n",
+    "HR Helpline: (555) 888-4HR7\n",
+    "Benefits questions: benefits@company.com\n",
+    "IT Support: support@company.com\n",
+    "Office address: 789 Corporate Blvd, Suite 100, Business City, NY 10001\n",
+    "\n",
+    "This document contains sensitive employee information and should be handled according to \n",
+    "company privacy policies and applicable laws including GDPR, CCPA, and HIPAA where applicable.\n",
+    "\n",
+    "Report ID: RPT-2024-Q1-001\n",
+    "Classification: CONFIDENTIAL\n",
+    "Retention: 7 years from creation date\n",
+    "\"\"\"\n",
+    "\n",
+    "print(\"🚀 Performance Benchmark\\n\")\n",
+    "print(f\"📄 Document size: {len(large_document):,} characters\")\n",
+    "print(f\"📝 Lines of text: {len(large_document.splitlines())}\")\n",
+    "print(\"=\" * 60)\n",
+    "\n",
+    "# Test with different engines\n",
+    "engines_to_test = [\n",
+    "    (\"regex\", \"🚀 Regex Engine (Fastest)\"),\n",
+    "    (\"smart\", \"🧠 Smart Engine (Balanced)\"),\n",
+    "]\n",
+    "\n",
+    "results_comparison = {}\n",
+    "\n",
+    "for engine_name, description in engines_to_test:\n",
+    "    try:\n",
+    "        print(f\"\\n{description}\")\n",
+    "        print(\"-\" * 30)\n",
+    "        \n",
+    "        service = TextService(engine=engine_name)\n",
+    "        \n",
+    "        # Run multiple times for accurate timing\n",
+    "        times = []\n",
+    "        for _ in range(3):\n",
+    "            start = time.time()\n",
+    "            result = service.annotate_text_sync(large_document)\n",
+    "            end = time.time()\n",
+    "            times.append((end - start) * 1000)\n",
+    "        \n",
+    "        avg_time = sum(times) / len(times)\n",
+    "        \n",
+    "        # Count entities found\n",
+    "        total_entities = sum(len(entities) for entities in result.values() if entities)\n",
+    "        entity_types = len([k for k, v in result.items() if v])\n",
+    "        \n",
+    "        results_comparison[engine_name] = {\n",
+    "            'time': avg_time,\n",
+    "            'entities': total_entities,\n",
+    "            'types': entity_types\n",
+    "        }\n",
+    "        \n",
+    "        print(f\"⏱️  Average time: {avg_time:.2f}ms\")\n",
+    "        print(f\"🎯 Entities found: {total_entities}\")\n",
+    "        print(f\"🏷️  Entity types: {entity_types}\")\n",
+    "        print(f\"📊 Throughput: {len(large_document) / (avg_time/1000):,.0f} chars/sec\")\n",
+    "        \n",
+    "    except Exception as e:\n",
+    "        print(f\"❌ {engine_name} not available: {e}\")\n",
+    "\n",
+    "# Performance comparison\n",
+    "if len(results_comparison) > 1:\n",
+    "    print(\"\\n🏆 Performance Comparison:\")\n",
+    "    print(\"=\" * 40)\n",
+    "    \n",
+    "    fastest_time = min(r['time'] for r in results_comparison.values())\n",
+    "    \n",
+    "    for engine, stats in results_comparison.items():\n",
+    "        speedup = fastest_time / stats['time'] if stats['time'] > 0 else 1\n",
+    "        if speedup >= 1:\n",
+    "            print(f\"{engine}: {stats['time']:.2f}ms ({speedup:.1f}x faster) - {stats['entities']} entities\")\n",
+    "        else:\n",
+    "            slowdown = stats['time'] / fastest_time\n",
+    "            print(f\"{engine}: {stats['time']:.2f}ms ({slowdown:.1f}x slower) - {stats['entities']} entities\")\n",
+    "\n",
+    "print(\"\\n✅ DataFog delivers production-ready performance for real-world documents!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 🎉 Congratulations!\n",
+    "\n",
+    "You've completed the DataFog quick start guide! Here's what you've learned:\n",
+    "\n",
+    "### ✅ Key Takeaways\n",
+    "\n",
+    "1. **🚀 Speed**: DataFog is 190x faster than traditional NLP for structured PII\n",
+    "2. **🧠 Intelligence**: GLiNER and smart cascading provide excellent accuracy\n",
+    "3. **🔒 Flexibility**: Multiple anonymization options (redact, replace, hash)\n",
+    "4. **🎯 Precision**: Target specific PII types for selective processing\n",
+    "5. **📚 Scale**: Efficient batch processing for production workloads\n",
+    "\n",
+    "### 🛠️ Engine Selection Guide\n",
+    "\n",
+    "| Engine | Best For | Speed | Accuracy |\n",
+    "|--------|----------|-------|----------|\n",
+    "| `regex` | Structured PII (emails, phones, SSN) | 🚀🚀🚀 | ⭐⭐⭐ |\n",
+    "| `gliner` | Named entities (people, orgs, locations) | 🚀🚀 | ⭐⭐⭐⭐ |\n",
+    "| `smart` | **Production use (recommended)** | 🚀🚀 | ⭐⭐⭐⭐⭐ |\n",
+    "\n",
+    "### 🚀 Next Steps\n",
+    "\n",
+    "- **Production**: Use `engine=\"smart\"` for best balance of speed and accuracy\n",
+    "- **High Volume**: Use `engine=\"regex\"` for maximum speed on structured data\n",
+    "- **Custom Entities**: Explore GLiNER models for specialized use cases\n",
+    "- **Integration**: Check out our [documentation](https://docs.datafog.ai) for API details\n",
+    "\n",
+    "### 💬 Get Help\n",
+    "\n",
+    "- 📖 [Documentation](https://docs.datafog.ai)\n",
+    "- 💬 [Discord Community](https://discord.gg/bzDth394R4)\n",
+    "- 🐛 [GitHub Issues](https://github.com/datafog/datafog-python/issues)\n",
+    "- 📧 [Contact Us](mailto:hi@datafog.ai)\n",
+    "\n",
+    "**Happy data processing with DataFog! 🌟**"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
\ No newline at end of file
diff --git a/tests/benchmark_text_service.py b/tests/benchmark_text_service.py
index 5ac8ec3f..52fb1783 100644
--- a/tests/benchmark_text_service.py
+++ b/tests/benchmark_text_service.py
@@ -9,7 +9,10 @@
 
 @pytest.fixture
 def sample_text_10kb():
-    """Generate a 10KB sample text with various PII entities."""
+    """Generate a sample text with various PII entities.
+
+    Note: Reduced size for CI environments to prevent memory issues.
+    """
     # Base text with PII entities
     base_text = (
         "Contact John Doe at john.doe@example.com or call (555) 123-4567. "
@@ -20,8 +23,16 @@ def sample_text_10kb():
         "Her phone number is 555-987-6543 and email is jane.smith@company.org. "
     )
 
-    # Repeat the text to reach approximately 10KB
-    repetitions = 10000 // len(base_text) + 1
+    # Check if running in CI environment
+    import os
+
+    if os.environ.get("CI") or os.environ.get("GITHUB_ACTIONS"):
+        # Use smaller sample in CI to prevent memory issues
+        repetitions = 50
+    else:
+        # Use full size for local development
+        repetitions = 10000 // len(base_text) + 1
+
     return base_text * repetitions