Skip to content

Commit c21c801

Browse files
author
Sylvain Pace
authored
Merge pull request #429 from mojavelinux/issue-425-bypass-nb_hits_updater
resolves #425 bypass nb_hit_updater if UPDATE_NB_HITS is set or term is not a tty
2 parents ca08e70 + 73ed0bc commit c21c801

File tree

3 files changed

+17
-6
lines changed

3 files changed

+17
-6
lines changed

scraper/src/config/config_loader.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
"""
77

88
from collections import OrderedDict
9+
from distutils.util import strtobool
910
import json
1011
import os
1112
import copy
@@ -49,6 +50,7 @@ class ConfigLoader(object):
4950
strategy = 'default'
5051
strict_redirect = True
5152
strip_chars = u".,;:§¶"
53+
update_nb_hits = None
5254
use_anchors = False
5355
user_agent = 'Algolia DocSearch Crawler'
5456
only_content_level = False
@@ -112,6 +114,9 @@ def _parse(self):
112114
# Parse Env
113115
self.app_id = os.environ.get('APPLICATION_ID', None)
114116
self.api_key = os.environ.get('API_KEY', None)
117+
self.update_nb_hits = os.environ.get('UPDATE_NB_HITS', None)
118+
if self.update_nb_hits is not None:
119+
self.update_nb_hits = bool(strtobool(self.update_nb_hits))
115120

116121
# Parse config
117122
self.selectors = SelectorsParser().parse(self.selectors)
@@ -124,15 +129,15 @@ def _parse(self):
124129
self.allowed_domains = UrlsParser.build_allowed_domains(
125130
self.start_urls, self.stop_urls)
126131

127-
def update_nb_hits(self, nb_hits):
132+
def update_nb_hits_value(self, nb_hits):
128133
if self.config_file is not None:
129134
# config loaded from file
130135
previous_nb_hits = None if 'nb_hits' not in self.config_content else \
131136
self.config_content['nb_hits']
132137
nb_hit_updater = NbHitsUpdater(self.config_file,
133138
self.config_content,
134139
previous_nb_hits, nb_hits)
135-
nb_hit_updater.update()
140+
nb_hit_updater.update(self.update_nb_hits)
136141

137142
def get_extra_facets(self):
138143
return UrlsParser.get_extra_facets(self.start_urls)

scraper/src/config/nb_hits_updater.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from ..helpers import confirm
22
import json
33
import copy
4+
import sys
45

56

67
class NbHitsUpdater(object):
@@ -16,12 +17,17 @@ def __init__(self, config_file, config_content, previous_nb_hits,
1617
self.new_nb_hit = new_nb_hit
1718
self.previous_nb_hits = previous_nb_hits
1819

19-
def update(self):
20+
def update(self, perform_update):
2021
if self._update_needed():
2122
print("previous nb_hits: " + str(self.previous_nb_hits) + "\n")
2223

23-
if confirm(
24-
'Do you want to update the nb_hits in ' + self.config_file + ' ?'):
24+
if perform_update is None:
25+
if sys.stdout.isatty():
26+
perform_update = confirm('Do you want to update the nb_hits in ' + self.config_file + ' ?')
27+
else:
28+
perform_update = True
29+
30+
if perform_update:
2531
try:
2632
self._update_config()
2733
print("\n[OK] " + self.config_file + " has been updated")

scraper/src/index.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def run_config(config):
8484
if DocumentationSpider.NB_INDEXED > 0:
8585
algolia_helper.commit_tmp_index()
8686
print('Nb hits: ' + str(DocumentationSpider.NB_INDEXED))
87-
config.update_nb_hits(DocumentationSpider.NB_INDEXED)
87+
config.update_nb_hits_value(DocumentationSpider.NB_INDEXED)
8888
else:
8989
print('Crawling issue: nbHits 0 for ' + config.index_name)
9090
algolia_helper.report_crawling_issue()

0 commit comments

Comments
 (0)