Skip to content

Commit

Permalink
Added DatabaseAdapter#map_urls and #map_docs
Browse files Browse the repository at this point in the history
  • Loading branch information
michaeltelford committed Mar 25, 2024
1 parent cbcce8c commit fac9218
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 38 deletions.
17 changes: 5 additions & 12 deletions lib/wgit/database/adapters/in_memory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ def size
# DB.
# @return [Array<Wgit::Document>] The search results obtained from the DB.
def search(
query, case_sensitive: false, whole_sentence: true, limit: 10, skip: 0
query, case_sensitive: false, whole_sentence: true,
limit: 10, skip: 0, &block
)
regex = if query.is_a?(Regexp)
query
Expand All @@ -78,11 +79,7 @@ def search(
return [] unless results

results = results[0...limit] if limit.positive?
results.map do |doc|
doc = Wgit::Document.new(doc)
yield(doc) if block_given?
doc
end
map_documents(results, &block)
end

# Deletes everything in the urls and documents collections.
Expand All @@ -101,17 +98,13 @@ def empty
# @param skip [Integer] Skip n amount of Url's.
# @yield [url] Given each Url object (Wgit::Url) returned from the DB.
# @return [Array<Wgit::Url>] The uncrawled Urls obtained from the DB.
def uncrawled_urls(limit: 0, skip: 0)
def uncrawled_urls(limit: 0, skip: 0, &block)
uncrawled = @urls.reject { |url| url['crawled'] }
uncrawled = uncrawled[skip..]
return [] unless uncrawled

uncrawled = uncrawled[0...limit] if limit.positive?
uncrawled.map do |url_doc|
url = Wgit::Url.new(url_doc)
yield url if block_given?
url
end
map_urls(uncrawled, &block)
end

# Inserts or updates the object in the in-memory database.
Expand Down
24 changes: 6 additions & 18 deletions lib/wgit/database/adapters/mongo_db.rb
Original file line number Diff line number Diff line change
Expand Up @@ -240,14 +240,10 @@ def bulk_upsert(objs)
# @return [Array<Wgit::Document>] The Documents obtained from the DB.
def docs(limit: 0, skip: 0, &block)
results = retrieve(DOCUMENTS_COLLECTION, {},
sort: { date_added: 1 }, limit:, skip:)
sort: { date_added: 1 }, limit:, skip:)
return [] if results.count < 1 # results#empty? doesn't exist.

# results.respond_to? :map! is false so we use map and overwrite the var.
results = results.map { |doc_hash| Wgit::Document.new(doc_hash) }
results.each(&block) if block_given?

results
map_documents(results, &block)
end

# Returns all Url records from the DB.
Expand All @@ -267,11 +263,7 @@ def urls(crawled: nil, limit: 0, skip: 0, &block)
results = retrieve(URLS_COLLECTION, query, sort:, limit:, skip:)
return [] if results.count < 1 # results#empty? doesn't exist.

# results.respond_to? :map! is false so we use map and overwrite the var.
results = results.map { |url_doc| Wgit::Url.new(url_doc) }
results.each(&block) if block_given?

results
map_urls(results, &block)
end

# Returns Url records that have been crawled.
Expand Down Expand Up @@ -315,7 +307,8 @@ def uncrawled_urls(limit: 0, skip: 0, &block)
# DB.
# @return [Array<Wgit::Document>] The search results obtained from the DB.
def search(
query, case_sensitive: false, whole_sentence: true, limit: 10, skip: 0
query, case_sensitive: false, whole_sentence: true,
limit: 10, skip: 0, &block
)
query = query.to_s.strip
query.replace("\"#{query}\"") if whole_sentence
Expand All @@ -333,12 +326,7 @@ def search(
results = retrieve(DOCUMENTS_COLLECTION, query,
sort: sort_proj, projection: sort_proj,
limit:, skip:)

results.map do |mongo_doc|
doc = Wgit::Document.new(mongo_doc)
yield(doc) if block_given?
doc
end
map_documents(results, &block)
end

# Searches the database's Documents for the given query and then searches
Expand Down
20 changes: 20 additions & 0 deletions lib/wgit/database/database_adapter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -110,5 +110,25 @@ def build_model(obj)
Wgit::Database::Model.document(obj)
end
end

# Map each DB hash object into a Wgit::Document. Each Document is yielded
# if a block is given before returning the mapped Array of Documents.
def map_documents(doc_hashes)
doc_hashes.map do |doc|
doc = Wgit::Document.new(doc)
yield(doc) if block_given?
doc
end
end

# Map each DB hash object into a Wgit::Url. Each Url is yielded
# if a block is given before returning the mapped Array of Urls.
def map_urls(url_hashes)
url_hashes.map do |url|
url = Wgit::Url.new(url)
yield(url) if block_given?
url
end
end
end
end
23 changes: 19 additions & 4 deletions test/test_in_memory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,8 @@ def test_search
assert results.first.score > results.last.score
end

def test_search__case_sensitive__whole_sentence
def test_search__case_sensitive
@docs.last.text << 'Foo Bar'

seed { docs @docs }

# Test case_sensitive: false and block.
Expand All @@ -155,6 +154,11 @@ def test_search__case_sensitive__whole_sentence

# Test case_sensitive: true.
assert_empty db.search('foo bar', case_sensitive: true)
end

def test_search__whole_sentence
@docs.last.text << 'Foo Bar'
seed { docs @docs }

# Test whole_sentence: false.
results = db.search('bar foo', whole_sentence: false)
Expand All @@ -170,8 +174,11 @@ def test_search__case_sensitive__whole_sentence
assert results.all? { |doc| doc.instance_of? Wgit::Document }
end

def test_search__limit__skip
# All dev data docs contain the word 'Everest'.
def test_search__limit
# First doc has highest textScore and so on...
@docs.reverse.each_with_index do |doc, i|
i.times { doc.text << 'Everest' }
end
seed { docs @docs }

assert_equal 3, db.search('everest').length
Expand All @@ -185,6 +192,14 @@ def test_search__limit__skip
assert_equal @docs[i], doc
assert_equal @docs[i].url.to_h, doc.url.to_h
end
end

def test_search__skip
# First doc has highest textScore and so on...
@docs.reverse.each_with_index do |doc, i|
i.times { doc.text << 'Everest' }
end
seed { docs @docs }

# Test skip.
results = db.search('everest', skip: 1)
Expand Down
24 changes: 20 additions & 4 deletions test/test_mongo_db.rb
Original file line number Diff line number Diff line change
Expand Up @@ -278,9 +278,8 @@ def test_urls__with_redirects
assert_equal redirects_hash, urls.first.redirects
end

def test_search__case_sensitive__whole_sentence
def test_search__case_sensitive
@docs.last.text << 'Foo Bar'

seed { docs @docs }

# Test no results.
Expand All @@ -299,6 +298,11 @@ def test_search__case_sensitive__whole_sentence

# Test case_sensitive: true.
assert_empty db.search('foo bar', case_sensitive: true)
end

def test_search__whole_sentence
@docs.last.text << 'Foo Bar'
seed { docs @docs }

# Test whole_sentence: false.
results = db.search('bar foo', whole_sentence: false)
Expand All @@ -314,10 +318,14 @@ def test_search__case_sensitive__whole_sentence
assert results.all? { |doc| doc.instance_of? Wgit::Document }
end

def test_search__limit__skip
# All dev data docs contain the word 'Everest'.
def test_search__limit
# First doc has highest textScore and so on...
@docs.reverse.each_with_index do |doc, i|
i.times { doc.text << 'Everest' }
end
seed { docs @docs }

# Test search.
assert_equal 3, db.search('everest').length
assert_equal 3, db.last_result&.count

Expand All @@ -331,6 +339,14 @@ def test_search__limit__skip
assert_equal @docs[i], doc
assert_equal @docs[i].url.to_h, doc.url.to_h
end
end

def test_search__skip
# First doc has highest textScore and so on...
@docs.reverse.each_with_index do |doc, i|
i.times { doc.text << 'Everest' }
end
seed { docs @docs }

# Test skip.
results = db.search('everest', skip: 1)
Expand Down

0 comments on commit fac9218

Please sign in to comment.