diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..7bb60651 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "workbench.colorCustomizations": { + "statusBar.noFolderBackground": "#5f00af" + } +} \ No newline at end of file diff --git a/PARADEDB_USAGE.md b/PARADEDB_USAGE.md new file mode 100644 index 00000000..627359b0 --- /dev/null +++ b/PARADEDB_USAGE.md @@ -0,0 +1,169 @@ +# Using pg_search with ParadeDB + +This document explains how to use the pg_search gem with ParadeDB's pg_search PostgreSQL extension for BM25-based full-text search. + +## Prerequisites + +1. Install the ParadeDB pg_search PostgreSQL extension in your database +2. Use pg_search gem version 2.3.7 or later with ParadeDB support + +## Setup + +### 1. Configure Multisearch to use ParadeDB + +In your Rails initializer (e.g., `config/initializers/pg_search.rb`): + +```ruby +PgSearch.multisearch_options = { + using: :paradedb +} +``` + +### 2. Run the ParadeDB migration + +Generate and run the migration to set up ParadeDB: + +```bash +rails generate pg_search:migration:paradedb +rails db:migrate +``` + +This migration will: +- Install the pg_search PostgreSQL extension +- Create a BM25 index on the pg_search_documents table + +### 3. Usage Examples + +#### Basic Search + +```ruby +# Perform a multisearch using ParadeDB's BM25 algorithm +results = PgSearch.multisearch("red shoes") + +# Results are automatically ranked by BM25 score +results.each do |document| + puts "#{document.searchable_type} ##{document.searchable_id}" + puts "Content: #{document.content}" +end +``` + +#### Advanced Query Types + +```ruby +# Configure ParadeDB with query options +PgSearch.multisearch_options = { + using: { + paradedb: { + # Phrase search - finds exact phrases + query_type: :phrase + } + } +} + +# Search for an exact phrase +PgSearch.multisearch("red running shoes") + +# Prefix search - finds words starting with prefix +PgSearch.multisearch_options = { + using: { + paradedb: { + query_type: :prefix + } + } +} +PgSearch.multisearch("sho") # Finds: shoes, shopping, etc. + +# Fuzzy search - finds similar words +PgSearch.multisearch_options = { + using: { + paradedb: { + query_type: :fuzzy, + fuzzy_distance: 2 # Allow up to 2 character differences + } + } +} +PgSearch.multisearch("sheos") # Finds: shoes +``` + +### 4. Model-specific Search + +You can also use ParadeDB for model-specific searches: + +```ruby +class Product < ApplicationRecord + include PgSearch::Model + + pg_search_scope :search_products, + against: [:name, :description], + using: { + paradedb: { + key_field: 'id' # Specify the primary key field + } + } +end + +# Use it +products = Product.search_products("laptop") +``` + +### 5. Combining with Rankings + +ParadeDB results are automatically ordered by BM25 score. You can access the rank: + +```ruby +results = PgSearch.multisearch("shoes").with_pg_search_rank + +results.each do |result| + puts "Score: #{result.pg_search_rank}" + puts "Result: #{result.searchable}" +end +``` + +## Migration Details + +The ParadeDB migration creates a BM25 index with this structure: + +```sql +CREATE INDEX pg_search_documents_bm25_idx +ON pg_search_documents +USING bm25 (searchable_id, searchable_type, content) +WITH (key_field='searchable_id'); +``` + +The `key_field` parameter is crucial as it's used by ParadeDB's `score()` function for ranking. + +## Performance Considerations + +1. **BM25 vs TSearch**: ParadeDB's BM25 algorithm often provides better relevance ranking than PostgreSQL's built-in TSearch, especially for longer documents. + +2. **Index Size**: BM25 indexes can be larger than TSearch indexes but provide faster query performance. + +3. **Query Syntax**: ParadeDB supports a rich query syntax including wildcards, fuzzy matching, and phrase queries. + +## Limitations + +1. ParadeDB features are only available when the pg_search PostgreSQL extension is installed +2. Some advanced TSearch features (like language-specific stemming) may work differently with ParadeDB +3. The multisearch table must have a numeric key field for scoring to work properly + +## Troubleshooting + +If you encounter errors: + +1. Ensure the pg_search PostgreSQL extension is installed: + ```sql + CREATE EXTENSION IF NOT EXISTS pg_search; + ``` + +2. Verify the BM25 index exists: + ```sql + \di pg_search_documents_bm25_idx + ``` + +3. Check that your queries are properly escaped (single quotes are automatically handled by the gem) + +## Further Reading + +- [ParadeDB Documentation](https://docs.paradedb.com/) +- [pg_search Extension on Neon](https://neon.tech/docs/extensions/pg_search) +- [BM25 Algorithm](https://en.wikipedia.org/wiki/Okapi_BM25) \ No newline at end of file diff --git a/PG_SEARCH_SCOPE_PARADEDB.md b/PG_SEARCH_SCOPE_PARADEDB.md new file mode 100644 index 00000000..51f0c2b4 --- /dev/null +++ b/PG_SEARCH_SCOPE_PARADEDB.md @@ -0,0 +1,288 @@ +# Using ParadeDB with pg_search_scope + +This document explains how to use ParadeDB's BM25 search algorithm with pg_search_scope for model-specific searches. + +## Basic Usage + +### Simple Search + +```ruby +class Product < ApplicationRecord + include PgSearch::Model + + # Basic ParadeDB search on a single column + pg_search_scope :search_by_name, + against: :name, + using: :paradedb +end + +# Usage +Product.search_by_name("laptop") +``` + +### Multi-Column Search + +```ruby +class Article < ApplicationRecord + include PgSearch::Model + + # Search across multiple columns with BM25 + pg_search_scope :search_content, + against: [:title, :body, :summary], + using: :paradedb +end + +# Usage +Article.search_content("ruby programming") +``` + +## Advanced Configuration + +### ParadeDB-Specific Options + +```ruby +class Document < ApplicationRecord + include PgSearch::Model + + pg_search_scope :advanced_search, + against: [:title, :content], + using: { + paradedb: { + # Query types + query_type: :phrase, # :standard (default), :phrase, :prefix, :fuzzy + fuzzy_distance: 2, # For fuzzy search (edit distance) + + # Index management + auto_create_index: true, # Auto-create BM25 index (default: true) + check_extension: true, # Check pg_search extension (default: true) + index_name: 'custom_idx', # Custom index name + + # Key field for scoring + key_field: 'id' # Primary key field (default: model's primary key) + } + } +end + +# Different query types +Document.advanced_search("exact phrase") # Phrase search +Document.advanced_search("pref") # With query_type: :prefix +Document.advanced_search("similr") # With query_type: :fuzzy +``` + +### Combining with Other Search Methods + +```ruby +class Product < ApplicationRecord + include PgSearch::Model + + # Combine ParadeDB with other search methods + pg_search_scope :hybrid_search, + against: [:name, :description], + using: { + paradedb: {}, # BM25 ranking + tsearch: { prefix: true }, # Full-text with prefix + trigram: { threshold: 0.3 } # Fuzzy matching + } +end +``` + +### Weighted Columns + +```ruby +class BlogPost < ApplicationRecord + include PgSearch::Model + + # Weighted search (ParadeDB will consider weights in BM25 scoring) + pg_search_scope :weighted_search, + against: { + title: 'A', # Highest weight + summary: 'B', # Medium weight + content: 'C' # Lower weight + }, + using: :paradedb +end +``` + +## Performance Optimization + +### Disable Checks in Production + +```ruby +class Product < ApplicationRecord + include PgSearch::Model + + # Disable runtime checks for better performance + pg_search_scope :fast_search, + against: :name, + using: { + paradedb: { + check_extension: false, # Skip extension check + auto_create_index: false # Don't auto-create indexes + } + } +end +``` + +### Custom Index Names + +```ruby +class LargeTable < ApplicationRecord + include PgSearch::Model + + # Use shorter index names to avoid PostgreSQL's 63-char limit + pg_search_scope :search_all, + against: [:very_long_column_name_1, :very_long_column_name_2], + using: { + paradedb: { + index_name: 'large_table_search_idx' + } + } +end +``` + +## Index Management + +### Automatic Index Creation + +When you define a pg_search_scope with ParadeDB, it automatically creates a BM25 index on first use: + +```sql +-- Automatically created index +CREATE INDEX CONCURRENTLY products_name_bm25_idx +ON products +USING bm25 (id, name) +WITH (key_field='id'); +``` + +### Manual Index Creation + +You can also create indexes manually for better control: + +```ruby +class CreateProductSearchIndex < ActiveRecord::Migration[7.0] + def up + execute <<~SQL + CREATE INDEX products_search_idx + ON products + USING bm25 (id, name, description, category) + WITH (key_field='id') + SQL + end + + def down + execute "DROP INDEX IF EXISTS products_search_idx" + end +end +``` + +Then reference it in your model: + +```ruby +pg_search_scope :search, + against: [:name, :description, :category], + using: { + paradedb: { + index_name: 'products_search_idx', + auto_create_index: false # Don't create another index + } + } +``` + +## Ranking and Ordering + +### Using ParadeDB Ranking + +```ruby +class Product < ApplicationRecord + include PgSearch::Model + + # ParadeDB automatically ranks by BM25 score + pg_search_scope :ranked_search, + against: [:name, :description], + using: :paradedb +end + +# Results are automatically ordered by relevance +products = Product.ranked_search("gaming laptop") + +# Access the rank score +products_with_rank = Product.ranked_search("gaming laptop").with_pg_search_rank +products_with_rank.each do |product| + puts "#{product.name}: #{product.pg_search_rank}" +end +``` + +### Custom Ranking + +```ruby +class Article < ApplicationRecord + include PgSearch::Model + + pg_search_scope :custom_ranked_search, + against: [:title, :content], + using: :paradedb, + ranked_by: ":paradedb * 0.8 + :popularity * 0.2" +end +``` + +## Migration Generator + +Generate a migration to set up ParadeDB: + +```bash +rails generate pg_search:migration:paradedb +rails db:migrate +``` + +This creates: +1. The pg_search PostgreSQL extension +2. A BM25 index on pg_search_documents (for multisearch) + +## Troubleshooting + +### Extension Not Found + +If you get an error about the pg_search extension: + +```ruby +PgSearch::Features::ParadeDB::ExtensionNotInstalled: ParadeDB pg_search extension is not installed. +``` + +Run this SQL command: +```sql +CREATE EXTENSION IF NOT EXISTS pg_search; +``` + +### Index Creation Failed + +If automatic index creation fails, create it manually: + +```sql +CREATE INDEX your_table_columns_bm25_idx +ON your_table +USING bm25 (id, column1, column2) +WITH (key_field='id'); +``` + +### Performance Issues + +For large tables, disable automatic checks: + +```ruby +pg_search_scope :search, + against: :content, + using: { + paradedb: { + check_extension: Rails.env.development?, + auto_create_index: Rails.env.development? + } + } +``` + +## Differences from Other Search Methods + +- **BM25 Algorithm**: ParadeDB uses BM25 ranking, often providing better relevance than TF-IDF +- **No Language Processing**: Unlike tsearch, ParadeDB doesn't do stemming or stop-word removal +- **Case Sensitive**: ParadeDB searches are case-sensitive by default +- **Exact Matching**: More precise than trigram's fuzzy matching +- **Performance**: Generally faster than trigram for large datasets \ No newline at end of file diff --git a/SETUP_LOCAL_GEM.md b/SETUP_LOCAL_GEM.md new file mode 100644 index 00000000..8c408545 --- /dev/null +++ b/SETUP_LOCAL_GEM.md @@ -0,0 +1,96 @@ +# Setting up Local pg_search Gem in Backend + +Since the backend container isn't recognizing the ParadeDB feature, you need to configure it to use your local pg_search gem. Here are the steps: + +## Option 1: Using Bundle Config (Recommended for Docker) + +1. First, exit the Rails console and the Docker container + +2. In your host machine, navigate to the backend directory: + ```bash + cd /Users/jak/projects/SimplerQMS/backend + ``` + +3. Configure bundler to use the local pg_search gem: + ```bash + bundle config set --local local.pg_search /Users/jak/projects/pg_search + ``` + +4. Update the Gemfile to use the local path: + ```ruby + # In /Users/jak/projects/SimplerQMS/backend/Gemfile + # Change this line: + gem "pg_search" + + # To this: + gem "pg_search", path: "/Users/jak/projects/pg_search" + ``` + +5. Rebuild the Docker container with the new gem: + ```bash + docker compose down + docker compose build api + docker compose up -d + ``` + +## Option 2: Using Docker Volume Mount + +Add a volume mount to your docker-compose.yml to map the local pg_search gem: + +```yaml +services: + api: + volumes: + - ./backend:/usr/src/app + - /Users/jak/projects/pg_search:/usr/local/bundle/gems/pg_search-2.3.7 +``` + +## Option 3: Quick Test Without Rebuilding + +For a quick test without rebuilding, you can manually copy the files into the running container: + +1. Copy the new ParadeDB files into the container: + ```bash + docker cp /Users/jak/projects/pg_search/lib/pg_search/features/paradedb.rb simplerqms-api-1:/usr/local/bundle/gems/pg_search-2.3.7/lib/pg_search/features/ + docker cp /Users/jak/projects/pg_search/lib/pg_search/features.rb simplerqms-api-1:/usr/local/bundle/gems/pg_search-2.3.7/lib/pg_search/ + docker cp /Users/jak/projects/pg_search/lib/pg_search/scope_options.rb simplerqms-api-1:/usr/local/bundle/gems/pg_search-2.3.7/lib/pg_search/ + ``` + +2. Enter the container and restart Rails: + ```bash + docker compose exec api bash + rails console + ``` + +## Verification + +After applying one of the above methods, verify the setup: + +```ruby +# In Rails console +require 'pg_search/features/paradedb' +PgSearch::Features::ParadeDB # Should not raise an error + +# Check if ParadeDB is in the feature list +PgSearch::ScopeOptions::FEATURE_CLASSES.keys +# Should include :paradedb + +# Then test the search +::PgSearch.multisearch("policy") +``` + +## Alternative: Fallback to Standard Features + +If you need to test immediately without the gem setup, you can temporarily revert the configuration: + +```ruby +# In Rails console +PgSearch.multisearch_options = { + using: [:tsearch, :trigram, :dmetaphone], + ignoring: :accents, + ranked_by: ":dmetaphone + (0.25 * :trigram)" +} + +# This should work with the existing gem +::PgSearch.multisearch("policy") +``` \ No newline at end of file diff --git a/TEST_INSTRUCTIONS.md b/TEST_INSTRUCTIONS.md new file mode 100644 index 00000000..066d38b7 --- /dev/null +++ b/TEST_INSTRUCTIONS.md @@ -0,0 +1,94 @@ +# Testing ParadeDB Integration + +Follow these steps to test the ParadeDB integration: + +## 1. Ensure the gem changes are available in the backend + +Since the pg_search gem is symlinked, the changes should be available. If not, you may need to: +```bash +cd /Users/jak/projects/SimplerQMS/backend +bundle install +``` + +## 2. Connect to the backend container + +```bash +docker compose exec api bash +``` + +## 3. Run the Rails console + +```bash +rails console +``` + +## 4. Test basic multisearch + +```ruby +# Test basic search +results = ::PgSearch.multisearch("policy") +puts "Found #{results.count} results" +results.first(5).each { |r| puts "#{r.searchable_type} ##{r.searchable_id}: #{r.content[0..100]}..." } +``` + +## 5. Test with ranking + +```ruby +# Test search with ranking +ranked_results = ::PgSearch.multisearch("policy").with_pg_search_rank +puts "Found #{ranked_results.count} results with ranking" +ranked_results.first(5).each do |r| + puts "Score: #{r.pg_search_rank} - #{r.searchable_type} ##{r.searchable_id}" +end +``` + +## 6. Test different query types + +```ruby +# Test phrase search +PgSearch.multisearch_options = { using: { paradedb: { query_type: :phrase } } } +phrase_results = ::PgSearch.multisearch("quality management") +puts "Phrase search found #{phrase_results.count} results" + +# Test prefix search +PgSearch.multisearch_options = { using: { paradedb: { query_type: :prefix } } } +prefix_results = ::PgSearch.multisearch("pol") +puts "Prefix search found #{prefix_results.count} results" + +# Reset to default +PgSearch.multisearch_options = { using: :paradedb } +``` + +## Expected Results + +If the integration is working correctly: +1. Searches should return results without errors +2. The `with_pg_search_rank` method should provide ranking scores +3. Different query types should produce different results + +## Troubleshooting + +If you get errors: + +1. Check if the ParadeDB extension is installed: + ```sql + SELECT * FROM pg_extension WHERE extname = 'pg_search'; + ``` + +2. Check if the BM25 index exists: + ```sql + \di pg_search_documents* + ``` + +3. Enable SQL logging to see the generated queries: + ```ruby + ActiveRecord::Base.logger = Logger.new(STDOUT) + ``` + +## Restoring Original Configuration + +After testing, restore the original configuration: +```bash +# Edit /Users/jak/projects/SimplerQMS/backend/config/initializers/global_search.rb +# Restore the original multisearch options +``` \ No newline at end of file diff --git a/create_bm25_index.rb b/create_bm25_index.rb new file mode 100644 index 00000000..75186325 --- /dev/null +++ b/create_bm25_index.rb @@ -0,0 +1,113 @@ +# Create BM25 index for ParadeDB +# Run this in Rails console to set up the required index + +puts "=" * 60 +puts "Creating BM25 Index for ParadeDB" +puts "=" * 60 + +# First, check if pg_search extension is installed +puts "\n1. Checking pg_search extension..." +begin + result = ActiveRecord::Base.connection.execute("SELECT * FROM pg_extension WHERE extname = 'pg_search'") + if result.any? + puts "✓ pg_search extension is installed" + else + puts "✗ pg_search extension is NOT installed" + puts " Installing pg_search extension..." + ActiveRecord::Base.connection.execute("CREATE EXTENSION IF NOT EXISTS pg_search") + puts "✓ pg_search extension installed" + end +rescue => e + puts "✗ Error with extension: #{e.message}" +end + +# Create the BM25 index +puts "\n2. Creating BM25 index on pg_search_documents..." +begin + # First, drop any existing BM25 index + ActiveRecord::Base.connection.execute("DROP INDEX IF EXISTS pg_search_documents_bm25_idx") + + # Create the BM25 index + # Note: We need to include all columns we want to search in the index + create_index_sql = <<-SQL + CREATE INDEX pg_search_documents_bm25_idx + ON pg_search_documents + USING bm25 (id, content, searchable_id, searchable_type) + WITH (key_field='id') + SQL + + ActiveRecord::Base.connection.execute(create_index_sql) + puts "✓ BM25 index created successfully" +rescue => e + puts "✗ Error creating index: #{e.message}" + puts " Trying alternative index configuration..." + + # Try a simpler index + begin + simpler_index_sql = <<-SQL + CREATE INDEX pg_search_documents_bm25_idx + ON pg_search_documents + USING bm25 (content) + WITH (key_field='id') + SQL + + ActiveRecord::Base.connection.execute(simpler_index_sql) + puts "✓ Simpler BM25 index created" + rescue => e2 + puts "✗ Still failed: #{e2.message}" + end +end + +# Verify the index was created +puts "\n3. Verifying BM25 index..." +begin + result = ActiveRecord::Base.connection.execute("SELECT indexname, indexdef FROM pg_indexes WHERE indexdef LIKE '%bm25%'") + if result.any? + puts "✓ BM25 indexes found:" + result.each do |row| + puts " - #{row['indexname']}" + puts " #{row['indexdef']}" + end + else + puts "✗ No BM25 indexes found after creation attempt" + end +rescue => e + puts "✗ Error checking indexes: #{e.message}" +end + +# Test the search again +puts "\n4. Testing ParadeDB search with new index..." +begin + sql = "SELECT * FROM pg_search_documents WHERE content @@@ 'policy' LIMIT 5" + result = ActiveRecord::Base.connection.execute(sql) + puts "✓ ParadeDB search works! Found #{result.count} results" + + if result.any? + puts "\n Sample results:" + result.each_with_index do |row, i| + puts " #{i+1}. #{row['searchable_type']} ##{row['searchable_id']}" + puts " Content: #{row['content'][0..80]}..." + end + end +rescue => e + puts "✗ Search still failing: #{e.message}" +end + +# Test with score +puts "\n5. Testing ParadeDB with scoring..." +begin + sql = "SELECT *, paradedb.score(id) as rank FROM pg_search_documents WHERE content @@@ 'policy' ORDER BY rank DESC LIMIT 5" + result = ActiveRecord::Base.connection.execute(sql) + puts "✓ ParadeDB search with scoring works!" + + result.each_with_index do |row, i| + puts " #{i+1}. Score: #{row['rank']} - #{row['searchable_type']} ##{row['searchable_id']}" + end +rescue => e + puts "✗ Scoring error: #{e.message}" +end + +puts "\n" + "=" * 60 +puts "Setup Complete" +puts "=" * 60 +puts "\nNow try: ::PgSearch.multisearch('policy')" \ No newline at end of file diff --git a/lib/pg_search/features.rb b/lib/pg_search/features.rb index b95cbd50..d41aace3 100644 --- a/lib/pg_search/features.rb +++ b/lib/pg_search/features.rb @@ -5,6 +5,7 @@ require "pg_search/features/dmetaphone" require "pg_search/features/trigram" require "pg_search/features/tsearch" +require "pg_search/features/paradedb" module PgSearch module Features diff --git a/lib/pg_search/features/paradedb.rb b/lib/pg_search/features/paradedb.rb new file mode 100644 index 00000000..5f0787e6 --- /dev/null +++ b/lib/pg_search/features/paradedb.rb @@ -0,0 +1,270 @@ +# frozen_string_literal: true + +require "active_support/core_ext/module/delegation" + +module PgSearch + module Features + class ParadeDB < Feature + class ExtensionNotInstalled < StandardError; end + class IndexNotFound < StandardError; end + + # Class variables for one-time checks + @@extension_checked = false + @@extension_valid = false + + def self.valid_options + super + %i[ + index_name key_field text_fields numeric_fields boolean_fields + json_fields range_fields query_type limit offset + fuzzy_distance prefix_search phrase_search + auto_create_index check_extension + ] + end + + # Reset the extension check cache (useful for testing) + def self.reset_extension_check! + @@extension_checked = false + @@extension_valid = false + end + + def initialize(query, options, all_columns, model, normalizer) + super + # Default to checking extension and auto-creating index + # Handle both simple syntax (using: :paradedb) and complex syntax (using: { paradedb: {...} }) + @options = options || {} + @check_extension = @options.fetch(:check_extension, true) + @auto_create_index = @options.fetch(:auto_create_index, true) + + ensure_paradedb_ready! if @check_extension + end + + def conditions + # ParadeDB uses the @@@ operator for BM25 search + # We need direct column references without coalesce + if columns.any? + conditions = columns.map do |column| + # Use column.full_name to avoid coalesce wrapper + Arel::Nodes::InfixOperation.new("@@@", + arel_wrap(column.full_name), + arel_wrap(formatted_query) + ) + end + + # Combine all conditions with OR + if conditions.size > 1 + conditions.reduce do |combined, condition| + Arel::Nodes::Or.new(combined, condition) + end + else + conditions.first + end + else + # Fallback to content column for multisearch + Arel::Nodes::InfixOperation.new("@@@", + arel_wrap("#{quoted_table_name}.content"), + arel_wrap(formatted_query) + ) + end + end + + def rank + # Return an Arel node for ParadeDB scoring + # Use the id column as the key field for scoring + # Wrap in NamedFunction to ensure it has to_sql method + Arel::Nodes::NamedFunction.new( + "paradedb.score", + [Arel.sql("#{quoted_table_name}.#{connection.quote_column_name(key_field)}")] + ) + end + + private + + def key_field + # For pg_search_documents table, always use 'id' as the key field + # For other tables, allow customization + if model.table_name == 'pg_search_documents' + 'id' + else + @options[:key_field] || model.primary_key || 'id' + end + end + + def ensure_paradedb_ready! + check_extension! + ensure_bm25_index! if @auto_create_index + end + + def check_extension! + # Use cached result if already checked + if @@extension_checked + unless @@extension_valid + raise ExtensionNotInstalled, <<~ERROR + ParadeDB pg_search extension is not installed. + + To fix this, run the following SQL command: + CREATE EXTENSION IF NOT EXISTS pg_search; + + Or generate and run the ParadeDB migration: + rails generate pg_search:migration:paradedb + rails db:migrate + ERROR + end + return + end + + # Perform the check only once + @@extension_checked = true + + begin + result = connection.execute(<<-SQL) + SELECT 1 FROM pg_extension WHERE extname = 'pg_search' LIMIT 1 + SQL + + @@extension_valid = result.any? + + unless @@extension_valid + raise ExtensionNotInstalled, <<~ERROR + ParadeDB pg_search extension is not installed. + + To fix this, run the following SQL command: + CREATE EXTENSION IF NOT EXISTS pg_search; + + Or generate and run the ParadeDB migration: + rails generate pg_search:migration:paradedb + rails db:migrate + ERROR + end + rescue ActiveRecord::StatementInvalid => e + # Handle case where pg_extension table doesn't exist (unlikely but possible) + @@extension_valid = false + raise ExtensionNotInstalled, "Could not verify pg_search extension: #{e.message}" + end + end + + def ensure_bm25_index! + # Determine the appropriate index name based on model + index_name = if model.table_name == 'pg_search_documents' + @options[:index_name] || 'pg_search_documents_bm25_idx' + else + # For model-specific searches, create an index on the model's table + searchable_columns = columns.map(&:name).join('_') + @options[:index_name] || "#{model.table_name}_#{searchable_columns}_bm25_idx" + end + + # Check if BM25 index exists + result = connection.execute(<<-SQL) + SELECT 1 + FROM pg_indexes + WHERE tablename = '#{model.table_name}' + AND indexname = '#{index_name}' + LIMIT 1 + SQL + + # Create index if it doesn't exist + unless result.any? + create_bm25_index!(index_name) + end + end + + def create_bm25_index!(index_name) + # Determine which columns to include in the index + if model.table_name == 'pg_search_documents' + # For multisearch, index the key columns + index_columns = 'id, content, searchable_id, searchable_type' + else + # For single model search, we need to include: + # 1. The primary key (for key_field) + # 2. All searchable columns + pk_column = connection.quote_column_name(model.primary_key) + search_columns = columns.map { |col| connection.quote_column_name(col.name) } + + # Remove duplicate if primary key is in search columns + all_columns = ([pk_column] + search_columns).uniq + index_columns = all_columns.join(', ') + end + + begin + # Truncate long index names to PostgreSQL's 63 character limit + truncated_index_name = index_name[0..62] + + connection.execute(<<-SQL) + CREATE INDEX CONCURRENTLY IF NOT EXISTS #{connection.quote_column_name(truncated_index_name)} + ON #{quoted_table_name} + USING bm25 (#{index_columns}) + WITH (key_field='#{key_field}') + SQL + + Rails.logger.info "[pg_search] Created BM25 index: #{truncated_index_name} on #{quoted_table_name}" if defined?(Rails) + rescue ActiveRecord::StatementInvalid => e + if e.message.include?("already exists") + # Index already exists, that's fine + elsif e.message.include?("CONCURRENTLY") + # Retry without CONCURRENTLY (might be in a transaction) + truncated_index_name = index_name[0..62] + connection.execute(<<-SQL) + CREATE INDEX IF NOT EXISTS #{connection.quote_column_name(truncated_index_name)} + ON #{quoted_table_name} + USING bm25 (#{index_columns}) + WITH (key_field='#{key_field}') + SQL + else + raise IndexNotFound, <<~ERROR + Failed to create BM25 index: #{e.message} + + Please create the index manually: + CREATE INDEX #{index_name} + ON #{quoted_table_name} + USING bm25 (#{index_columns}) + WITH (key_field='#{key_field}') + ERROR + end + end + end + + def formatted_query + return "''" if query.blank? + + # Handle different query types + case @options[:query_type] + when :phrase + phrase_query + when :prefix + prefix_query + when :fuzzy + fuzzy_query + else + standard_query + end + end + + def standard_query + # Escape single quotes and wrap in quotes + escaped = query.gsub("'", "''") + "'#{escaped}'" + end + + def phrase_query + # For phrase search, wrap the query in double quotes within the SQL string + escaped = query.gsub("'", "''").gsub('"', '""') + "'\"#{escaped}\"'" + end + + def prefix_query + # For prefix search, add wildcard at the end + escaped = query.gsub("'", "''") + "'#{escaped}*'" + end + + def fuzzy_query + # For fuzzy search, use the ~N syntax where N is the distance + distance = @options[:fuzzy_distance] || 1 + escaped = query.gsub("'", "''") + "'#{escaped}~#{distance}'" + end + + def arel_wrap(sql_string) + Arel::Nodes::Grouping.new(Arel.sql(sql_string)) + end + end + end +end \ No newline at end of file diff --git a/lib/pg_search/migration/paradedb_generator.rb b/lib/pg_search/migration/paradedb_generator.rb new file mode 100644 index 00000000..5fb68cec --- /dev/null +++ b/lib/pg_search/migration/paradedb_generator.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +require "pg_search/migration/generator" + +module PgSearch + module Migration + class ParadedbGenerator < Generator + def migration_name + "add_pg_search_paradedb_support" + end + end + end +end \ No newline at end of file diff --git a/lib/pg_search/migration/templates/add_pg_search_paradedb_support.rb.erb b/lib/pg_search/migration/templates/add_pg_search_paradedb_support.rb.erb new file mode 100644 index 00000000..1d8f1c43 --- /dev/null +++ b/lib/pg_search/migration/templates/add_pg_search_paradedb_support.rb.erb @@ -0,0 +1,58 @@ +class AddPgSearchParadedbSupport < ActiveRecord::Migration<%= migration_version %> + def up + # Install the ParadeDB pg_search extension + say_with_time("Installing pg_search extension by ParadeDB") do + execute "CREATE EXTENSION IF NOT EXISTS pg_search" + end + + # Only create index if pg_search_documents table exists + if table_exists?(:pg_search_documents) + say_with_time("Creating BM25 index on pg_search_documents") do + # Check if index already exists + index_exists = select_value(<<~SQL) + SELECT 1 FROM pg_indexes + WHERE tablename = 'pg_search_documents' + AND indexname = 'pg_search_documents_bm25_idx' + LIMIT 1 + SQL + + unless index_exists + # Create a BM25 index on the pg_search_documents table + # Using id as the key field for ParadeDB's score() function + execute <<~SQL + CREATE INDEX CONCURRENTLY pg_search_documents_bm25_idx + ON pg_search_documents + USING bm25 (id, content, searchable_id, searchable_type) + WITH (key_field='id') + SQL + else + say "BM25 index already exists, skipping creation" + end + end + else + say "pg_search_documents table not found. Run pg_search:multisearch:rebuild after creating the table." + end + end + + def down + say_with_time("Removing BM25 index from pg_search_documents") do + execute "DROP INDEX IF EXISTS pg_search_documents_bm25_idx" + end + + # Note: We don't drop the pg_search extension as it might be used by other tables + # If you want to remove it, uncomment the following: + # say_with_time("Removing pg_search extension") do + # execute "DROP EXTENSION IF EXISTS pg_search CASCADE" + # end + end + + private + + def table_exists?(table_name) + connection.table_exists?(table_name) + end + + def select_value(sql) + connection.select_value(sql) + end +end \ No newline at end of file diff --git a/lib/pg_search/scope_options.rb b/lib/pg_search/scope_options.rb index 6fe515dd..2c2d0bfb 100644 --- a/lib/pg_search/scope_options.rb +++ b/lib/pg_search/scope_options.rb @@ -136,7 +136,8 @@ def subquery_join FEATURE_CLASSES = { dmetaphone: Features::DMetaphone, tsearch: Features::TSearch, - trigram: Features::Trigram + trigram: Features::Trigram, + paradedb: Features::ParadeDB }.freeze def feature_for(feature_name) diff --git a/spec/lib/pg_search/features/paradedb_spec.rb b/spec/lib/pg_search/features/paradedb_spec.rb new file mode 100644 index 00000000..d244ec52 --- /dev/null +++ b/spec/lib/pg_search/features/paradedb_spec.rb @@ -0,0 +1,157 @@ +# frozen_string_literal: true + +require "spec_helper" + +describe PgSearch::Features::ParadeDB do + subject(:feature) do + described_class.new(query, options, columns, Model, normalizer) + end + + let(:query) { "search query" } + let(:options) { {} } + let(:columns) { [column_double] } + let(:normalizer) { PgSearch::Normalizer.new(config_double) } + let(:config_double) { instance_double(PgSearch::Configuration, ignore: []) } + let(:column_double) do + instance_double(PgSearch::Configuration::Column, + name: "content", + to_sql: %("#{Model.table_name}"."content") + ) + end + + with_model :Model do + table do |t| + t.string :content + t.integer :searchable_id + t.timestamps + end + end + + describe "#conditions" do + context "with a simple query" do + let(:query) { "shoes" } + + it "generates the correct ParadeDB search condition" do + condition = feature.conditions + expect(condition.to_sql).to match(/@@@ 'shoes'/) + end + end + + context "with special characters" do + let(:query) { "men's shoes" } + + it "escapes single quotes properly" do + condition = feature.conditions + expect(condition.to_sql).to match(/@@@ 'men''s shoes'/) + end + end + + context "with phrase search" do + let(:options) { { query_type: :phrase } } + let(:query) { "red shoes" } + + it "wraps the query in double quotes" do + condition = feature.conditions + expect(condition.to_sql).to match(/@@@ '"red shoes"'/) + end + end + + context "with prefix search" do + let(:options) { { query_type: :prefix } } + let(:query) { "sho" } + + it "adds wildcard to the query" do + condition = feature.conditions + expect(condition.to_sql).to match(/@@@ 'sho\*'/) + end + end + + context "with fuzzy search" do + let(:options) { { query_type: :fuzzy, fuzzy_distance: 2 } } + let(:query) { "sheos" } + + it "adds fuzzy distance to the query" do + condition = feature.conditions + expect(condition.to_sql).to match(/@@@ 'sheos~2'/) + end + end + + context "with multiple columns" do + let(:columns) do + [ + instance_double(PgSearch::Configuration::Column, + name: "title", + to_sql: %("#{Model.table_name}"."title") + ), + instance_double(PgSearch::Configuration::Column, + name: "content", + to_sql: %("#{Model.table_name}"."content") + ) + ] + end + + it "creates OR conditions for each column" do + condition = feature.conditions + sql = condition.to_sql + expect(sql).to include("OR") + expect(sql).to match(/"title" @@@ 'search query'/) + expect(sql).to match(/"content" @@@ 'search query'/) + end + end + end + + describe "#rank" do + context "for regular models" do + let(:options) { { key_field: "id" } } + + it "generates paradedb.score with the specified key field" do + rank = feature.rank + expect(rank.to_sql).to match(/paradedb\.score\(.*"id"\)/) + end + end + + context "for PgSearch::Document model" do + before do + allow(Model).to receive(:name).and_return("PgSearch::Document") + end + + it "uses searchable_id as the key field" do + rank = feature.rank + expect(rank.to_sql).to match(/paradedb\.score\(.*"searchable_id"\)/) + end + end + + context "without specified key field" do + it "defaults to id for regular models" do + rank = feature.rank + expect(rank.to_sql).to match(/paradedb\.score\(.*"id"\)/) + end + end + end + + describe ".valid_options" do + it "includes ParadeDB-specific options" do + valid_options = described_class.valid_options + expect(valid_options).to include( + :index_name, + :key_field, + :text_fields, + :numeric_fields, + :boolean_fields, + :json_fields, + :range_fields, + :query_type, + :limit, + :offset, + :fuzzy_distance, + :prefix_search, + :phrase_search + ) + end + + it "includes base feature options" do + valid_options = described_class.valid_options + expect(valid_options).to include(:only, :sort_only) + end + end +end \ No newline at end of file diff --git a/test_pg_search_scope_paradedb.rb b/test_pg_search_scope_paradedb.rb new file mode 100644 index 00000000..ec744d17 --- /dev/null +++ b/test_pg_search_scope_paradedb.rb @@ -0,0 +1,167 @@ +# Test script for pg_search_scope with ParadeDB +# Run this in a Rails console to test ParadeDB integration with model-specific searches + +puts "=" * 60 +puts "Testing pg_search_scope with ParadeDB" +puts "=" * 60 + +# Test 1: Create a test model with ParadeDB search +puts "\n1. Creating test model with ParadeDB search..." +begin + # Create a temporary table for testing + ActiveRecord::Base.connection.execute(<<-SQL) + CREATE TABLE IF NOT EXISTS test_products ( + id SERIAL PRIMARY KEY, + name VARCHAR(255), + description TEXT, + category VARCHAR(100), + created_at TIMESTAMP, + updated_at TIMESTAMP + ) + SQL + + # Define the model with ParadeDB search + class TestProduct < ActiveRecord::Base + self.table_name = 'test_products' + include PgSearch::Model + + # Basic ParadeDB search + pg_search_scope :search_by_name, + against: :name, + using: :paradedb + + # Multi-column ParadeDB search + pg_search_scope :search_all, + against: [:name, :description, :category], + using: :paradedb + + # ParadeDB with options + pg_search_scope :search_with_options, + against: [:name, :description], + using: { + paradedb: { + query_type: :prefix, + auto_create_index: true + } + } + end + + puts "✓ Test model created" +rescue => e + puts "✗ Error creating model: #{e.message}" +end + +# Test 2: Insert test data +puts "\n2. Inserting test data..." +begin + TestProduct.delete_all + + TestProduct.create!([ + { name: "Gaming Laptop", description: "High-performance laptop for gaming", category: "Electronics" }, + { name: "Office Laptop", description: "Business laptop for office work", category: "Electronics" }, + { name: "Gaming Mouse", description: "RGB gaming mouse with high DPI", category: "Accessories" }, + { name: "Laptop Stand", description: "Ergonomic stand for laptops", category: "Accessories" }, + { name: "Gaming Keyboard", description: "Mechanical keyboard for gaming", category: "Accessories" } + ]) + + puts "✓ Created #{TestProduct.count} test products" +rescue => e + puts "✗ Error inserting data: #{e.message}" +end + +# Test 3: Test basic search +puts "\n3. Testing basic ParadeDB search..." +begin + results = TestProduct.search_by_name("gaming") + puts "✓ Found #{results.count} products matching 'gaming'" + results.each { |p| puts " - #{p.name}" } +rescue => e + puts "✗ Error in basic search: #{e.message}" + puts e.backtrace.first(5) +end + +# Test 4: Test multi-column search +puts "\n4. Testing multi-column search..." +begin + results = TestProduct.search_all("laptop") + puts "✓ Found #{results.count} products matching 'laptop' in any column" + results.each { |p| puts " - #{p.name}: #{p.description}" } +rescue => e + puts "✗ Error in multi-column search: #{e.message}" +end + +# Test 5: Test with ranking +puts "\n5. Testing search with BM25 ranking..." +begin + results = TestProduct.search_all("gaming").with_pg_search_rank + puts "✓ Search with ranking:" + results.each do |p| + puts " - #{p.name} (Score: #{p.pg_search_rank})" + end +rescue => e + puts "✗ Error with ranking: #{e.message}" +end + +# Test 6: Test prefix search +puts "\n6. Testing prefix search..." +begin + results = TestProduct.search_with_options("gam") + puts "✓ Found #{results.count} products with prefix 'gam'" + results.each { |p| puts " - #{p.name}" } +rescue => e + puts "✗ Error in prefix search: #{e.message}" +end + +# Test 7: Check created indexes +puts "\n7. Checking BM25 indexes..." +begin + indexes = ActiveRecord::Base.connection.execute(<<-SQL) + SELECT indexname, indexdef + FROM pg_indexes + WHERE tablename = 'test_products' + AND indexdef LIKE '%bm25%' + SQL + + if indexes.any? + puts "✓ Found BM25 indexes:" + indexes.each do |idx| + puts " - #{idx['indexname']}" + end + else + puts "✗ No BM25 indexes found" + end +rescue => e + puts "✗ Error checking indexes: #{e.message}" +end + +# Test 8: Test combined search methods +puts "\n8. Testing combined search methods..." +begin + class TestProduct < ActiveRecord::Base + # Add a combined search scope + pg_search_scope :hybrid_search, + against: [:name, :description], + using: { + paradedb: {}, + tsearch: { prefix: true } + } + end + + results = TestProduct.hybrid_search("gaming") + puts "✓ Hybrid search found #{results.count} results" +rescue => e + puts "✗ Error in hybrid search: #{e.message}" +end + +# Cleanup +puts "\n9. Cleanup..." +begin + ActiveRecord::Base.connection.execute("DROP TABLE IF EXISTS test_products CASCADE") + puts "✓ Test table dropped" +rescue => e + puts "✗ Error during cleanup: #{e.message}" +end + +puts "\n" + "=" * 60 +puts "pg_search_scope ParadeDB Test Complete" +puts "=" * 60 \ No newline at end of file