From 36f1f11169cfd1c19b3107abf33e85f1de5b0e6a Mon Sep 17 00:00:00 2001 From: joe Date: Thu, 29 Jul 2021 23:38:16 +0530 Subject: [PATCH 01/17] initialized stats.py and added some lines of code on which howdoi/stats.py will be built on --- howdoi/stats.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 howdoi/stats.py diff --git a/howdoi/stats.py b/howdoi/stats.py new file mode 100644 index 000000000..f5b3b934e --- /dev/null +++ b/howdoi/stats.py @@ -0,0 +1,36 @@ +# importing appdirs as howdoi uses it +import appdirs +from cachelib import FileSystemCache +from time import time +from datetime import * + +DEFAULT_DIR = appdirs.user_cache_dir('howdoi-local-stats') + + +# GLOBAL VARIABLES - changes for every object hence made +# store the date for first installation +FIRST_INSTALLED = 'dummy' +# permission for dashboard initially set to true +# if set to true only then send the data to dashboard +DASHBOARD_PERMISSION = True +# a check for checking redundant words +REDUNDANT_WORDS = ['a','an','the','is','for','on','it','in'] + + +# class to collect stats +class CollectStats: + # needed functions + # constructor + def __init__(self): + # to show user his/her most used search engines + def search_engine_stats(): + + # stores the number of queries done with howdoi + def howdoi_usage_stats(): + + # check how many times cache was used and how many times servers were pinged + def cache_vs_requests_hit(): +# class to show the collected stats +class RenderStats: + + From 9879f02a07cc05d69f125985bdc729407d14fe98 Mon Sep 17 00:00:00 2001 From: joe Date: Thu, 29 Jul 2021 23:51:42 +0530 Subject: [PATCH 02/17] fixed some bugs in whitespaces --- howdoi/stats.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/howdoi/stats.py b/howdoi/stats.py index f5b3b934e..da6ca9b74 100644 --- a/howdoi/stats.py +++ b/howdoi/stats.py @@ -22,15 +22,17 @@ class CollectStats: # needed functions # constructor def __init__(self): + print("in constructor") # to show user his/her most used search engines - def search_engine_stats(): - + def search_engine_stats(self): + print("in search engine fun") # stores the number of queries done with howdoi - def howdoi_usage_stats(): - + def howdoi_usage_stats(self): + print("in howdoi usage") # check how many times cache was used and how many times servers were pinged - def cache_vs_requests_hit(): + def cache_vs_requests_hit(self): + print("in cache vs hits") # class to show the collected stats -class RenderStats: +# class RenderStats: From e15bbdb2224f3f33e510d343c571c67cf3a635a5 Mon Sep 17 00:00:00 2001 From: joe Date: Fri, 30 Jul 2021 03:48:42 +0530 Subject: [PATCH 03/17] added object call from howdoi.py to stats.py, added more data in stats.py --- howdoi/howdoi.py | 21 +++++++++++++++++- howdoi/stats.py | 55 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 59 insertions(+), 17 deletions(-) diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py index 24a2ef56a..9dbe353a9 100755 --- a/howdoi/howdoi.py +++ b/howdoi/howdoi.py @@ -44,6 +44,10 @@ from howdoi import __version__ from howdoi.errors import GoogleValidationError, BingValidationError, DDGValidationError +from .stats import CollectStats + +DEFAULT_DIR = appdirs.user_cache_dir('howdoi-local-stats') + logging.basicConfig(format='%(levelname)s: %(message)s') if os.getenv('HOWDOI_DISABLE_SSL'): # Set http instead of https SCHEME = 'http://' @@ -110,6 +114,9 @@ else: cache = FileSystemCache(CACHE_DIR, CACHE_ENTRY_MAX, default_timeout=0) +ENABLE_USER_STATS = True +# creating object -> initialiing constructor +CollectStats_obj = CollectStats(cache) howdoi_session = requests.session() @@ -603,6 +610,8 @@ def howdoi(raw_query): if _is_help_query(args['query']): return _get_help_instructions() + '\n' + if(ENABLE_USER_STATS): + CollectStats_obj.run(args) res = cache.get(cache_key) # pylint: disable=assignment-from-none if res: @@ -665,6 +674,8 @@ def get_parser(): action='store_true') parser.add_argument('--sanity-check', help=argparse.SUPPRESS, action='store_true') + parser.add_argument('--stats', help='view your local statistics for howdoi', action='store_true') + parser.add_argument('--disable_stats', help='disable local stats collection for howdoi', action='store_true') return parser @@ -737,7 +748,8 @@ def perform_sanity_check(): return exit_code -def command_line_runner(): # pylint: disable=too-many-return-statements,too-many-branches +def command_line_runner(): + # pylint: disable=too-many-return-statements,too-many-branches parser = get_parser() args = vars(parser.parse_args()) @@ -754,6 +766,13 @@ def command_line_runner(): # pylint: disable=too-many-return-statements,too-man perform_sanity_check() ) + if args['disable_stats']: + ENABLE_USER_STATS = False + + if args['stats']: + ENABLE_USER_STATS = True + # TODO -> render stats on graph + if args['clear_cache']: if _clear_cache(): print(f'{GREEN}Cache cleared successfully{END_FORMAT}') diff --git a/howdoi/stats.py b/howdoi/stats.py index da6ca9b74..145a91928 100644 --- a/howdoi/stats.py +++ b/howdoi/stats.py @@ -2,37 +2,60 @@ import appdirs from cachelib import FileSystemCache from time import time -from datetime import * - -DEFAULT_DIR = appdirs.user_cache_dir('howdoi-local-stats') - - +from datetime import datetime, timedelta # GLOBAL VARIABLES - changes for every object hence made # store the date for first installation FIRST_INSTALLED = 'dummy' # permission for dashboard initially set to true # if set to true only then send the data to dashboard DASHBOARD_PERMISSION = True -# a check for checking redundant words +# redundant words REDUNDANT_WORDS = ['a','an','the','is','for','on','it','in'] +# user can choose qny, set by default to index 1 +DATESTRING_FORMATS = ["%Y-%m-%d", "%d-%m-%Y", "%m-%d-%Y"] +# stores the total number of queries done in howdoi +TOTAL_REQUESTS = 'dummy' +SEARCH_ENGINES = "dummy" - +# class to show the collected stats +class RenderStats: + def __init__(self): + print("inside graph function") + # class to collect stats class CollectStats: - # needed functions # constructor - def __init__(self): - print("in constructor") - # to show user his/her most used search engines + def __init__(self, cache): + self.cache = cache + # if this is the first time howdoi is installed + if not self.cache.has(FIRST_INSTALLED): + self.cache.clear() + # SET FORMAT TO DEFAULT BUT CAN BE CHANGED BY USER + self.cache.set(FIRST_INSTALLED, datetime.today().strftime(DATESTRING_FORMATS[0])) + + # to store user's most used search engines def search_engine_stats(self): - print("in search engine fun") - # stores the number of queries done with howdoi - def howdoi_usage_stats(self): + # print("in search engine fun") + frequency = self[SEARCH_ENGINES] + # if frequency not None : + # maximum_frequency = max(frequency,) + + # stores the top queries done with howdoi + def howdoi_queries_distribution(self): print("in howdoi usage") + # check how many times cache was used and how many times servers were pinged def cache_vs_requests_hit(self): print("in cache vs hits") -# class to show the collected stats -# class RenderStats: + + # <-----------------counter functions ------------------------> + def increase_requests(self): + print("increasing requests") + + # main runner calling every function + def run(self, args): + # task 1 -> increase query counter by 1 since used howdoi + self.increase_requests() + # print("i am working") From 2413321193629524daa69625c6dfd7439af91055 Mon Sep 17 00:00:00 2001 From: joe Date: Fri, 30 Jul 2021 04:07:54 +0530 Subject: [PATCH 04/17] removed some flakes --- howdoi/howdoi.py | 10 ++++------ howdoi/stats.py | 9 +++------ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py index 9dbe353a9..083f86765 100755 --- a/howdoi/howdoi.py +++ b/howdoi/howdoi.py @@ -610,7 +610,7 @@ def howdoi(raw_query): if _is_help_query(args['query']): return _get_help_instructions() + '\n' - if(ENABLE_USER_STATS): + if ENABLE_USER_STATS: CollectStats_obj.run(args) res = cache.get(cache_key) # pylint: disable=assignment-from-none @@ -748,8 +748,7 @@ def perform_sanity_check(): return exit_code -def command_line_runner(): - # pylint: disable=too-many-return-statements,too-many-branches +def command_line_runner(): # pylint: disable=too-many-return-statements,too-many-branches parser = get_parser() args = vars(parser.parse_args()) @@ -770,9 +769,8 @@ def command_line_runner(): ENABLE_USER_STATS = False if args['stats']: - ENABLE_USER_STATS = True - # TODO -> render stats on graph - + ENABLE_USER_STATS = True # TODO -> render stats on graph + if args['clear_cache']: if _clear_cache(): print(f'{GREEN}Cache cleared successfully{END_FORMAT}') diff --git a/howdoi/stats.py b/howdoi/stats.py index 145a91928..f2d96b5e0 100644 --- a/howdoi/stats.py +++ b/howdoi/stats.py @@ -1,6 +1,3 @@ -# importing appdirs as howdoi uses it -import appdirs -from cachelib import FileSystemCache from time import time from datetime import datetime, timedelta # GLOBAL VARIABLES - changes for every object hence made @@ -10,7 +7,7 @@ # if set to true only then send the data to dashboard DASHBOARD_PERMISSION = True # redundant words -REDUNDANT_WORDS = ['a','an','the','is','for','on','it','in'] +REDUNDANT_WORDS = ['a', 'an', 'the', 'is', 'for', 'on', 'it', 'in'] # user can choose qny, set by default to index 1 DATESTRING_FORMATS = ["%Y-%m-%d", "%d-%m-%Y", "%m-%d-%Y"] # stores the total number of queries done in howdoi @@ -35,8 +32,8 @@ def __init__(self, cache): # to store user's most used search engines def search_engine_stats(self): - # print("in search engine fun") - frequency = self[SEARCH_ENGINES] + print("in search engine fun") + # frequency = 0 # if frequency not None : # maximum_frequency = max(frequency,) From 01f8f828e746a59257a0ca1541ced0ca46deed33 Mon Sep 17 00:00:00 2001 From: joe Date: Sun, 1 Aug 2021 02:59:05 +0530 Subject: [PATCH 05/17] created test for stats.py and added search engine count functionality with increase date functionality --- howdoi/howdoi.py | 8 +----- howdoi/stats.py | 71 ++++++++++++++++++++++++++++++------------------ test_stats.py | 21 ++++++++++++++ 3 files changed, 67 insertions(+), 33 deletions(-) create mode 100644 test_stats.py diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py index 083f86765..e86e07c29 100755 --- a/howdoi/howdoi.py +++ b/howdoi/howdoi.py @@ -748,7 +748,7 @@ def perform_sanity_check(): return exit_code -def command_line_runner(): # pylint: disable=too-many-return-statements,too-many-branches +def command_line_runner(): # pylint: disable=too-many-return-statements,too-many-branches parser = get_parser() args = vars(parser.parse_args()) @@ -765,12 +765,6 @@ def command_line_runner(): # pylint: disable=too-many-return-statements,too-many perform_sanity_check() ) - if args['disable_stats']: - ENABLE_USER_STATS = False - - if args['stats']: - ENABLE_USER_STATS = True # TODO -> render stats on graph - if args['clear_cache']: if _clear_cache(): print(f'{GREEN}Cache cleared successfully{END_FORMAT}') diff --git a/howdoi/stats.py b/howdoi/stats.py index f2d96b5e0..d09cb2af7 100644 --- a/howdoi/stats.py +++ b/howdoi/stats.py @@ -1,5 +1,6 @@ -from time import time -from datetime import datetime, timedelta +import collections +from datetime import datetime + # GLOBAL VARIABLES - changes for every object hence made # store the date for first installation FIRST_INSTALLED = 'dummy' @@ -11,48 +12,66 @@ # user can choose qny, set by default to index 1 DATESTRING_FORMATS = ["%Y-%m-%d", "%d-%m-%Y", "%m-%d-%Y"] # stores the total number of queries done in howdoi -TOTAL_REQUESTS = 'dummy' +TOTAL_REQUESTS = 'TOTAL_REQUESTS' SEARCH_ENGINES = "dummy" + # class to show the collected stats -class RenderStats: - def __init__(self): - print("inside graph function") - -# class to collect stats +# class RenderStats: +# def __init__(self): +# print("inside graph function") + + class CollectStats: - # constructor + def __init__(self, cache): self.cache = cache - # if this is the first time howdoi is installed if not self.cache.has(FIRST_INSTALLED): self.cache.clear() # SET FORMAT TO DEFAULT BUT CAN BE CHANGED BY USER self.cache.set(FIRST_INSTALLED, datetime.today().strftime(DATESTRING_FORMATS[0])) - + # to store user's most used search engines - def search_engine_stats(self): - print("in search engine fun") - # frequency = 0 - # if frequency not None : - # maximum_frequency = max(frequency,) - + def search_engine_stats(self, search_engine): + # print("in search engine fun") + if search_engine: + search_engines_storage = self.cache.get(SEARCH_ENGINES) + if search_engines_storage is None: + search_engines_storage = collections.Counter() + + search_engines_storage[search_engine] += 1 + # print(search_engines_storage) + self.cache.set(SEARCH_ENGINES, search_engines_storage) + print("working") + # stores the top queries done with howdoi - def howdoi_queries_distribution(self): - print("in howdoi usage") - + # def howdoi_queries_distribution(self): + # print("in howdoi usage") + + def increase_key(self, key): + self.cache.inc(key) + + def increase_days_used(self): + # function to inc the number of days howdoi was used + print("working") + current_date = datetime.today().strftime(DATESTRING_FORMATS[0]) + processed_date = str(current_date) + self.increase_key(processed_date) + # check how many times cache was used and how many times servers were pinged - def cache_vs_requests_hit(self): - print("in cache vs hits") + # def cache_vs_requests_hit(self): + # print("in cache vs hits") # <-----------------counter functions ------------------------> def increase_requests(self): - print("increasing requests") - + # print("increasing requests") + print("called") + self.cache.inc(TOTAL_REQUESTS) + # print(TOTAL_REQUESTS) + # main runner calling every function def run(self, args): # task 1 -> increase query counter by 1 since used howdoi self.increase_requests() + self.search_engine_stats(args.get('search_engine')) # print("i am working") - - diff --git a/test_stats.py b/test_stats.py new file mode 100644 index 000000000..926a8ffb6 --- /dev/null +++ b/test_stats.py @@ -0,0 +1,21 @@ +# testing for stats.py +import unittest +from tempfile import mkdtemp +from cachelib import FileSystemCache +from howdoi.stats import CollectStats + + +class TestStats(unittest.TestCase): + def getting_started(self): + self.cache_dir = mkdtemp(prefix='howdoi_test') + cache = FileSystemCache(self.cache_dir, default_timeout=0) + self.stats_obj = CollectStats(cache) + self.args = [ + {'query': 'print hello in python'}, + {'query': 'create a linked list in python'} + ] + # print("tests working yeahhh") + + +if __name__ == '__main__': + unittest.main() From 723867134a0f7cfaa8c491d5c0124bc88a648db7 Mon Sep 17 00:00:00 2001 From: joe Date: Tue, 3 Aug 2021 00:40:00 +0530 Subject: [PATCH 06/17] added hour counter functionality --- howdoi/stats.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/howdoi/stats.py b/howdoi/stats.py index d09cb2af7..78c2f10b3 100644 --- a/howdoi/stats.py +++ b/howdoi/stats.py @@ -58,6 +58,10 @@ def increase_days_used(self): processed_date = str(current_date) self.increase_key(processed_date) + def increase_hours_used(self): + current_hour = datetime.now().hour + processed_hour = str(current_hour) + self.increase_key(processed_hour) # check how many times cache was used and how many times servers were pinged # def cache_vs_requests_hit(self): # print("in cache vs hits") @@ -67,11 +71,12 @@ def increase_requests(self): # print("increasing requests") print("called") self.cache.inc(TOTAL_REQUESTS) - # print(TOTAL_REQUESTS) + # print(self.cache) # main runner calling every function def run(self, args): # task 1 -> increase query counter by 1 since used howdoi self.increase_requests() + self.increase_days_used() self.search_engine_stats(args.get('search_engine')) # print("i am working") From f8a705dbef3f6a071ee3c3d296259941a000f8cc Mon Sep 17 00:00:00 2001 From: joe Date: Tue, 3 Aug 2021 01:13:27 +0530 Subject: [PATCH 07/17] added functionality to process the links and store their counter --- howdoi/howdoi.py | 2 +- howdoi/stats.py | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py index e86e07c29..ddedcf4ed 100755 --- a/howdoi/howdoi.py +++ b/howdoi/howdoi.py @@ -407,7 +407,7 @@ def _get_links_with_cache(query): question_links = _get_questions(links) cache.set(cache_key, question_links or CACHE_EMPTY_VAL) - + CollectStats_obj.process_links(question_links) return question_links diff --git a/howdoi/stats.py b/howdoi/stats.py index 78c2f10b3..265790292 100644 --- a/howdoi/stats.py +++ b/howdoi/stats.py @@ -14,7 +14,8 @@ # stores the total number of queries done in howdoi TOTAL_REQUESTS = 'TOTAL_REQUESTS' SEARCH_ENGINES = "dummy" - +# aid in checking for the process links and checking its frequency +PROCESSED_LINKS = "processed links" # class to show the collected stats # class RenderStats: @@ -73,6 +74,19 @@ def increase_requests(self): self.cache.inc(TOTAL_REQUESTS) # print(self.cache) + def process_links(self, question_links): + print("processing links ") + if not question_links: #checking for empty links + return + else: + links_storage = self.cache.get(PROCESSED_LINKS) + if links_storage is None: + links_storage = collections.Counter() + # increase freq by 1 of the processed link + for i in question_links: + links_storage[i] += 1 + self.cache.set(PROCESSED_LINKS,links_storage) + # main runner calling every function def run(self, args): # task 1 -> increase query counter by 1 since used howdoi From 597f9f5dacaaa4346db6717e6f959baa690e0286 Mon Sep 17 00:00:00 2001 From: joe Date: Tue, 3 Aug 2021 01:46:00 +0530 Subject: [PATCH 08/17] added cache hits and error vs valid response coutner --- howdoi/howdoi.py | 4 +++- howdoi/stats.py | 19 ++++++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py index ddedcf4ed..50df553d2 100755 --- a/howdoi/howdoi.py +++ b/howdoi/howdoi.py @@ -615,6 +615,8 @@ def howdoi(raw_query): res = cache.get(cache_key) # pylint: disable=assignment-from-none if res: + CollectStats_obj.increase_cache_hits() + CollectStats_obj.process_response(res) logging.info('Using cached response (add -C to clear the cache)') return _parse_cmd(args, res) @@ -630,7 +632,7 @@ def howdoi(raw_query): cache.set(cache_key, res) except (RequestsConnectionError, SSLError): res = {'error': f'Unable to reach {args["search_engine"]}. Do you need to use a proxy?\n'} - + CollectStats_obj.process_response(res) return _parse_cmd(args, res) diff --git a/howdoi/stats.py b/howdoi/stats.py index 265790292..c9d616aaf 100644 --- a/howdoi/stats.py +++ b/howdoi/stats.py @@ -16,7 +16,11 @@ SEARCH_ENGINES = "dummy" # aid in checking for the process links and checking its frequency PROCESSED_LINKS = "processed links" - +CACHE_HITS = "CACHE_HITS" +# variabe for checking if res is errored +ERROR_IN_RES = "ERROR_IN_RES" +# variable for checking if the res is not errored +VALID_RES = "VALID_RES" # class to show the collected stats # class RenderStats: # def __init__(self): @@ -49,6 +53,9 @@ def search_engine_stats(self, search_engine): # def howdoi_queries_distribution(self): # print("in howdoi usage") + def increase_cache_hits(self): + self.cache.inc(CACHE_HITS) + def increase_key(self, key): self.cache.inc(key) @@ -74,6 +81,16 @@ def increase_requests(self): self.cache.inc(TOTAL_REQUESTS) # print(self.cache) + def process_response(self,res): + # checking for error in respomnse + ans = "" + # check for errored reponse + if not res or (type(res)==dict and res.get('error')): + ans = ERROR_IN_RES + else: + ans = VALID_RES + self.cache.inc(ans) + def process_links(self, question_links): print("processing links ") if not question_links: #checking for empty links From 87f0a24e6a747a2bc01006e34d1471cfd800a530 Mon Sep 17 00:00:00 2001 From: joe Date: Tue, 3 Aug 2021 02:08:43 +0530 Subject: [PATCH 09/17] removed some lines from test to clean flakes --- howdoi/stats.py | 31 +++++++++++++++---------------- test_stats.py | 14 ++------------ 2 files changed, 17 insertions(+), 28 deletions(-) diff --git a/howdoi/stats.py b/howdoi/stats.py index c9d616aaf..82ff4bdf0 100644 --- a/howdoi/stats.py +++ b/howdoi/stats.py @@ -55,7 +55,7 @@ def search_engine_stats(self, search_engine): def increase_cache_hits(self): self.cache.inc(CACHE_HITS) - + def increase_key(self, key): self.cache.inc(key) @@ -81,29 +81,28 @@ def increase_requests(self): self.cache.inc(TOTAL_REQUESTS) # print(self.cache) - def process_response(self,res): + def process_response(self, res): # checking for error in respomnse - ans = "" - # check for errored reponse - if not res or (type(res)==dict and res.get('error')): + ans = "" + # check for errored response + if not res or (isinstance(res) == dict and res.get('error')): ans = ERROR_IN_RES else: ans = VALID_RES self.cache.inc(ans) - + def process_links(self, question_links): print("processing links ") - if not question_links: #checking for empty links + if not question_links: # checking for empty links return - else: - links_storage = self.cache.get(PROCESSED_LINKS) - if links_storage is None: - links_storage = collections.Counter() - # increase freq by 1 of the processed link - for i in question_links: - links_storage[i] += 1 - self.cache.set(PROCESSED_LINKS,links_storage) - + links_storage = self.cache.get(PROCESSED_LINKS) + if links_storage is None: + links_storage = collections.Counter() + # increase freq by 1 of the processed link + for i in question_links: + links_storage[i] += 1 + self.cache.set(PROCESSED_LINKS, links_storage) + # main runner calling every function def run(self, args): # task 1 -> increase query counter by 1 since used howdoi diff --git a/test_stats.py b/test_stats.py index 926a8ffb6..4966e9798 100644 --- a/test_stats.py +++ b/test_stats.py @@ -1,20 +1,10 @@ # testing for stats.py import unittest -from tempfile import mkdtemp -from cachelib import FileSystemCache -from howdoi.stats import CollectStats class TestStats(unittest.TestCase): - def getting_started(self): - self.cache_dir = mkdtemp(prefix='howdoi_test') - cache = FileSystemCache(self.cache_dir, default_timeout=0) - self.stats_obj = CollectStats(cache) - self.args = [ - {'query': 'print hello in python'}, - {'query': 'create a linked list in python'} - ] - # print("tests working yeahhh") + def get_started(self): + print("tests working") if __name__ == '__main__': From 62c996ee7abc4279ee98cc735a238c7ba433112e Mon Sep 17 00:00:00 2001 From: joe Date: Tue, 3 Aug 2021 02:15:07 +0530 Subject: [PATCH 10/17] checking for fLAKES --- howdoi/howdoi.py | 1 + 1 file changed, 1 insertion(+) diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py index 50df553d2..e8f4369c7 100755 --- a/howdoi/howdoi.py +++ b/howdoi/howdoi.py @@ -632,6 +632,7 @@ def howdoi(raw_query): cache.set(cache_key, res) except (RequestsConnectionError, SSLError): res = {'error': f'Unable to reach {args["search_engine"]}. Do you need to use a proxy?\n'} + CollectStats_obj.process_response(res) return _parse_cmd(args, res) From 6d20564e8e7beea4a1a395c5ede3fe9f6803ba1e Mon Sep 17 00:00:00 2001 From: joe Date: Tue, 3 Aug 2021 02:16:35 +0530 Subject: [PATCH 11/17] checking for fLAKES --- howdoi/howdoi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py index e8f4369c7..aae8fab04 100755 --- a/howdoi/howdoi.py +++ b/howdoi/howdoi.py @@ -632,7 +632,7 @@ def howdoi(raw_query): cache.set(cache_key, res) except (RequestsConnectionError, SSLError): res = {'error': f'Unable to reach {args["search_engine"]}. Do you need to use a proxy?\n'} - + CollectStats_obj.process_response(res) return _parse_cmd(args, res) From 4349f1bf473c1b34078b7f1b6afd9d40b5bff17d Mon Sep 17 00:00:00 2001 From: joe Date: Tue, 3 Aug 2021 02:18:13 +0530 Subject: [PATCH 12/17] checking for fLAKES --- howdoi/howdoi.py | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py index a6ea81763..aae8fab04 100755 --- a/howdoi/howdoi.py +++ b/howdoi/howdoi.py @@ -92,7 +92,7 @@ CACHE_ENTRY_MAX = 128 HTML_CACHE_PATH = 'page_cache' -SUPPORTED_HELP_QUERIES = ['use howdoi', 'howdoi', 'run howdoi', 'setup howdoi', +SUPPORTED_HELP_QUERIES = ['use howdoi', 'howdoi', 'run howdoi', 'do howdoi', 'howdoi howdoi', 'howdoi use howdoi'] # variables for text formatting, prepend to string to begin text formatting. @@ -108,8 +108,6 @@ STASH_REMOVE = 'remove' STASH_EMPTY = 'empty' -BLOCKED_ENGINES = [] - if os.getenv('HOWDOI_DISABLE_CACHE'): # works like an always empty cache cache = NullCache() @@ -287,7 +285,7 @@ def _get_links(query): result = None if not result or _is_blocked(result): logging.error('%sUnable to find an answer because the search engine temporarily blocked the request. ' - 'Attempting to use a different search engine.%s', RED, END_FORMAT) + 'Please wait a few minutes or select a different search engine.%s', RED, END_FORMAT) raise BlockError('Temporary block by search engine') html = pq(result) @@ -598,8 +596,8 @@ def howdoi(raw_query): else: args = raw_query - search_engine = args['search_engine'] or os.getenv('HOWDOI_SEARCH_ENGINE') or 'google' - os.environ['HOWDOI_SEARCH_ENGINE'] = search_engine + os.environ['HOWDOI_SEARCH_ENGINE'] = args['search_engine'] or os.getenv('HOWDOI_SEARCH_ENGINE') or 'google' + search_engine = os.getenv('HOWDOI_SEARCH_ENGINE') if search_engine not in SUPPORTED_SEARCH_ENGINES: supported_search_engines = ', '.join(SUPPORTED_SEARCH_ENGINES) message = f'Unsupported engine {search_engine}. The supported engines are: {supported_search_engines}' @@ -633,22 +631,9 @@ def howdoi(raw_query): res = {'error': message} cache.set(cache_key, res) except (RequestsConnectionError, SSLError): - res = {'error': f'Unable to reach {args["search_engine"]}. Do you need to use a proxy?\n'} CollectStats_obj.process_response(res) - res = {'error': f'Unable to reach {search_engine}. Do you need to use a proxy?\n'} - except BlockError: - BLOCKED_ENGINES.append(search_engine) - next_engine = next((engine for engine in SUPPORTED_SEARCH_ENGINES if engine not in BLOCKED_ENGINES), None) - if next_engine is None: - res = {'error': 'Unable to get a response from any search engine\n'} - else: - args['search_engine'] = next_engine - args['query'] = args['query'].split() - logging.info('%sRetrying search with %s%s', GREEN, next_engine, END_FORMAT) - return howdoi(args) - return _parse_cmd(args, res) From 32e5219c1a683e6d4d64db36855f0afb72b47a2e Mon Sep 17 00:00:00 2001 From: joe Date: Tue, 3 Aug 2021 02:20:22 +0530 Subject: [PATCH 13/17] deleted testing file --- test_stats.py | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 test_stats.py diff --git a/test_stats.py b/test_stats.py deleted file mode 100644 index 4966e9798..000000000 --- a/test_stats.py +++ /dev/null @@ -1,11 +0,0 @@ -# testing for stats.py -import unittest - - -class TestStats(unittest.TestCase): - def get_started(self): - print("tests working") - - -if __name__ == '__main__': - unittest.main() From 2de5bdcd41d13c48d488db5a328a1d57f170d9e3 Mon Sep 17 00:00:00 2001 From: joe Date: Sat, 7 Aug 2021 02:35:43 +0530 Subject: [PATCH 14/17] added logic for making graph and rendering search engine stats --- howdoi/howdoi.py | 7 +++ howdoi/stats.py | 108 ++++++++++++++++++++++++++++++++++++++++++++++- setup.py | 1 + 3 files changed, 114 insertions(+), 2 deletions(-) diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py index aae8fab04..b2ef1f9ba 100755 --- a/howdoi/howdoi.py +++ b/howdoi/howdoi.py @@ -768,6 +768,13 @@ def command_line_runner(): # pylint: disable=too-many-return-statements,too-man perform_sanity_check() ) + if args['stats']: + CollectStats_obj.render_stats() + + if args['disable-stats']: + ENABLE_USER_STATS = False + print("STATS DISABLED") + if args['clear_cache']: if _clear_cache(): print(f'{GREEN}Cache cleared successfully{END_FORMAT}') diff --git a/howdoi/stats.py b/howdoi/stats.py index 82ff4bdf0..58f93d432 100644 --- a/howdoi/stats.py +++ b/howdoi/stats.py @@ -1,6 +1,7 @@ import collections from datetime import datetime - +from termgraph import termgraph +import sys # GLOBAL VARIABLES - changes for every object hence made # store the date for first installation FIRST_INSTALLED = 'dummy' @@ -25,17 +26,90 @@ # class RenderStats: # def __init__(self): # print("inside graph function") +QUERY_KEY = "query key" +WORD_OF_QUERY = "WORD OF QUERY" + + +# ----------------------------> termgraph logic +TERMGRAPH_DEFAULT_ARGS = {'filename': '-', 'title': None, 'width': 50, 'format': '{:<5.1f}', 'suffix': '', 'no_labels': False, 'no_values': False, 'color': None, 'vertical': False, 'stacked': False, + 'histogram': False, 'bins': 5, 'different_scale': False, 'calendar': False, 'start_dt': None, 'custom_tick': '', 'delim': '', 'verbose': False, 'label_before': False, 'version': False} + +Report = collections.namedtuple('Report', ['group', 'content']) + +def draw_graph(data, labels, custom_args = None): + if sys.version>= '3.6': + # create graph using the folloing logic + assert len(data) == len(labels) + if custom_args is None: + custom_args = {} + args = {} + args.update(TERMGRAPH_DEFAULT_ARGS) + args.update(custom_args) + termgraph.chart([], [[datap] for datap in data], args, [str(label) for label in labels]) +# ----------------------------------------> + +class RenderStats: + + def __init__(self, args, colors=[]): + self.termgraph_args = args + self.COLORS = colors + self._report_group_map = collections.OrderedDict() + + def add(self, report): + assert isinstance(report, Report) + if report.group not in self._report_group_map: + self._report_group_map[report.group] = [] + self._report_group_map[report.group].append(report) + + def render_report(self, report): + if callable(report.content): + report.content() + elif isinstance(report.content, str): + print(report.content) + + def render_report_separator(self, length, separator_char="*"): + separation_string = separator_char*length + print(separation_string) + + def report(self): + for key in self._report_group_map: + self.render_report_separator(70) + for report in self._report_group_map[key]: + self.render_report(report) class CollectStats: def __init__(self, cache): self.cache = cache + self.rs = RenderStats(TERMGRAPH_DEFAULT_ARGS) if not self.cache.has(FIRST_INSTALLED): self.cache.clear() # SET FORMAT TO DEFAULT BUT CAN BE CHANGED BY USER self.cache.set(FIRST_INSTALLED, datetime.today().strftime(DATESTRING_FORMATS[0])) + def render_search_engine_stats(self): + rs = self.rs + search_engine_frequency = self[SEARCH_ENGINES] + if search_engine_frequency is not None: + max_search_engine = max(search_engine_frequency, key= lambda engine : search_engine_frequency[engine]) + rs.add(Report('Search-engine=stats', 'Your most used search engine is {}'.format(max_search_engine.title()))) + + se_keys = [] + se_values = [] + # get values for search engine : get stats + for i in search_engine_frequency: + se_keys.append(i) + se_values.append(search_engine_frequency[i]) + + # now add those values to the termgraph + rs.add(Report('search-engine-stats'), lambda : + draw_graph( + data = se_values, + labels = se_keys, custom_args = {'suffix':'uses', 'format':'{:<1d}'} + )) + + # to store user's most used search engines def search_engine_stats(self, search_engine): # print("in search engine fun") @@ -53,6 +127,10 @@ def search_engine_stats(self, search_engine): # def howdoi_queries_distribution(self): # print("in howdoi usage") + def render_stats(self): + print("RENDERING STATS, to disable : howdoi --disable_stats") + self.render_search_engine_stats() + def increase_cache_hits(self): self.cache.inc(CACHE_HITS) @@ -85,7 +163,7 @@ def process_response(self, res): # checking for error in respomnse ans = "" # check for errored response - if not res or (isinstance(res) == dict and res.get('error')): + if not res or (isinstance(res, dict) and res.get('error')): ans = ERROR_IN_RES else: ans = VALID_RES @@ -102,6 +180,30 @@ def process_links(self, question_links): for i in question_links: links_storage[i] += 1 self.cache.set(PROCESSED_LINKS, links_storage) + + def create_storage(self, key, value): + map_storage = self.cache.get(key) + if map_storage is None: + map_storage = collections.Counter() + + map_storage[value]+=1 + self.cache.set(key, map_storage) + + def process_user_query(self, query): + if not query: + return + query = query.strip() + query_storage = self.cache.get(QUERY_KEY) + if query_storage is None: + query_storage = collections.Counter() + + query_storage[query]+=1 + self.cache.set(QUERY_KEY, query_storage) + tokens = query.split(" ") + for token in tokens: + token = token.lower() + if token not in REDUNDANT_WORDS: + self.create_storage(WORD_OF_QUERY, token) # main runner calling every function def run(self, args): @@ -110,3 +212,5 @@ def run(self, args): self.increase_days_used() self.search_engine_stats(args.get('search_engine')) # print("i am working") + self.increase_hours_used() + self.process_user_query(args.get('query')) diff --git a/setup.py b/setup.py index 6550967b2..884f202cd 100644 --- a/setup.py +++ b/setup.py @@ -100,6 +100,7 @@ def read(*names): 'cachelib', 'appdirs', 'keep', + 'termgraph' ], cmdclass={ 'lint': Lint From 18900fc92fb0c2130dc7f1a9a503d14ccf4c918b Mon Sep 17 00:00:00 2001 From: joe Date: Mon, 6 Sep 2021 02:32:14 +0530 Subject: [PATCH 15/17] added termgraph logic completely --- howdoi/stats.py | 166 ++++++++++++++++++++++++++++++++++++++++++++---- howdoi/utils.py | 17 +++++ 2 files changed, 170 insertions(+), 13 deletions(-) create mode 100644 howdoi/utils.py diff --git a/howdoi/stats.py b/howdoi/stats.py index 58f93d432..4339fbd37 100644 --- a/howdoi/stats.py +++ b/howdoi/stats.py @@ -1,6 +1,7 @@ import collections from datetime import datetime from termgraph import termgraph +from .utils import get_top_n_key_val_pairs_from_dict, safe_divide import sys # GLOBAL VARIABLES - changes for every object hence made # store the date for first installation @@ -10,6 +11,10 @@ DASHBOARD_PERMISSION = True # redundant words REDUNDANT_WORDS = ['a', 'an', 'the', 'is', 'for', 'on', 'it', 'in'] +HOUR_OF_DAY_KEY = 'DUMMY' +QUERY_WORD_KEY = 'dummy' + +SUCCESS_RESULT_KEY = 'dummy' # user can choose qny, set by default to index 1 DATESTRING_FORMATS = ["%Y-%m-%d", "%d-%m-%Y", "%m-%d-%Y"] # stores the total number of queries done in howdoi @@ -29,13 +34,15 @@ QUERY_KEY = "query key" WORD_OF_QUERY = "WORD OF QUERY" +ERROR_RESULT_KEY= 'dummy' -# ----------------------------> termgraph logic +# ----------------------------> termgraph DEPENDENCIES TERMGRAPH_DEFAULT_ARGS = {'filename': '-', 'title': None, 'width': 50, 'format': '{:<5.1f}', 'suffix': '', 'no_labels': False, 'no_values': False, 'color': None, 'vertical': False, 'stacked': False, 'histogram': False, 'bins': 5, 'different_scale': False, 'calendar': False, 'start_dt': None, 'custom_tick': '', 'delim': '', 'verbose': False, 'label_before': False, 'version': False} Report = collections.namedtuple('Report', ['group', 'content']) + def draw_graph(data, labels, custom_args = None): if sys.version>= '3.6': # create graph using the folloing logic @@ -88,7 +95,42 @@ def __init__(self, cache): # SET FORMAT TO DEFAULT BUT CAN BE CHANGED BY USER self.cache.set(FIRST_INSTALLED, datetime.today().strftime(DATESTRING_FORMATS[0])) + def load_time_stats(self): + # get termgraph object instance + rs = self.rs + + days_since_first_install = self.get_days_since_first_install() or 0 + total_request_count = self[TOTAL_REQUESTS] or 0 + + # add total time howdoi was used + rs.add(Report('Time stats', 'You have been using howdoi for {}'.format(days_since_first_install))) + + # add termgraph information about time stats + rs.add(Report('Time stats', 'The average queires made by your are {}'.format(safe_divide(total_request_count, days_since_first_install)))) + + hour_of_day_map = self[HOUR_OF_DAY_KEY] + + if total_request_count > 0 and hour_of_day_map: + most_active_hour_of_the_day = max(hour_of_day_map, key = lambda hour: hour_of_day_map[hour]) + + rs.add(Report('Time stats', 'You are most active between {}:00 and {}:00'.format(most_active_hour_of_the_day, most_active_hour_of_the_day+1))) + + keys, values = [], [] + for k in hour_of_day_map: + lower_time_bound = str(k) + ":00" + upper_time_bound = str(k+1) + ":00" if k+1<24 else "00:00" + keys.append(lower_time_bound+"-"+upper_time_bound) + values.append(hour_of_day_map[k]) + + rs.add( + Report( + 'time-related-stats', lambda: draw_graph(data=values, labels=keys, custom_args={ + 'suffix': ' uses', 'format': '{:<1d}'}) + ) + ) + def render_search_engine_stats(self): + rs = self.rs search_engine_frequency = self[SEARCH_ENGINES] if search_engine_frequency is not None: @@ -103,33 +145,131 @@ def render_search_engine_stats(self): se_values.append(search_engine_frequency[i]) # now add those values to the termgraph + rs.add(Report('search-engine-stats'), lambda : draw_graph( data = se_values, labels = se_keys, custom_args = {'suffix':'uses', 'format':'{:<1d}'} )) - + def render_query_stats(self): + rs = self.rs + # get your keys + + query_map = self[QUERY_KEY] + + # get the map of queries + query_words_map = self[QUERY_WORD_KEY] + + # get the top 5 since we are concerned with those + top_5_query_key_vals = get_top_n_key_val_pairs_from_dict(query_map, 5) + + top_5_query_words_key_vals = get_top_n_key_val_pairs_from_dict(query_words_map, 5) + + if len(top_5_query_key_vals) > 0: + most_common_query = top_5_query_key_vals[0][0] + rs.add( + Report( + 'query-stats', 'The query you\'ve made the most times is {}'.format( + most_common_query + ) + ) + ) + if len(top_5_query_words_key_vals) > 0: + most_common_query_word = top_5_query_words_key_vals[0][0] + rs.add( + Report( + 'query-stats', 'The most common word in your queries is {}'.format( + most_common_query_word + ) + ) + ) + + data = [val for _, val in top_5_query_words_key_vals] + labels = [key for key, _ in top_5_query_words_key_vals] + + rs.add( + Report('query-stats', lambda: draw_graph(data=data, labels=labels, + custom_args={'suffix': ' uses', 'format': '{:<1d}'}) + )) # to store user's most used search engines - def search_engine_stats(self, search_engine): - # print("in search engine fun") - if search_engine: - search_engines_storage = self.cache.get(SEARCH_ENGINES) - if search_engines_storage is None: - search_engines_storage = collections.Counter() - - search_engines_storage[search_engine] += 1 - # print(search_engines_storage) - self.cache.set(SEARCH_ENGINES, search_engines_storage) - print("working") + # def search_engine_stats(self, search_engine): + # # print("in search engine fun") + # if search_engine: + # search_engines_storage = self.cache.get(SEARCH_ENGINES) + # if search_engines_storage is None: + # search_engines_storage = collections.Counter() + + # search_engines_storage[search_engine] += 1 + # # print(search_engines_storage) + # self.cache.set(SEARCH_ENGINES, search_engines_storage) + # print("working") # stores the top queries done with howdoi # def howdoi_queries_distribution(self): # print("in howdoi usage") + def render_request_stats(self): + rs = self.rs + + + total_request_count = self[TOTAL_REQUESTS] or 0 + + + cached_request_count = self[CACHE_HITS] or 0 + + + total_request_count = self[TOTAL_REQUESTS] or 0 + + + outbound_request_count = total_request_count - cached_request_count + + + successful_requests = self[SUCCESS_RESULT_KEY] or 0 + failed_requests = self[ERROR_RESULT_KEY] or 0 + + rs.add( + Report('Network Stats', 'Of the {} requests you have made using howdoi, {} have been saved by howdoi\'s cache'.format( + total_request_count, cached_request_count)) + ) + + rs.add( + Report('Network Stats', 'Also, {} requests have succeeded, while {} have failed due to connection issues, or some other problem.'.format( + successful_requests, failed_requests)) + ) + + if total_request_count > 0: + rs.add( + Report( + 'network-request-stats', lambda: draw_graph( + data=[safe_divide(outbound_request_count*100, total_request_count), + safe_divide(cached_request_count*100, total_request_count)], + labels=['Outbound Requests', 'Cache Saved Requests'], + custom_args={'suffix': '%', } + ) + ) + ) + + if successful_requests+failed_requests > 0: + rs.add( + Report('network-request-stats', lambda: draw_graph( + data=[safe_divide(successful_requests*100, successful_requests+failed_requests), + safe_divide(failed_requests*100, successful_requests+failed_requests)], + labels=['Succesful Requests', 'Failed Requests'], + custom_args={'suffix': '%', } + ) + ) + ) + + + # main fuction for termgraph stats def render_stats(self): print("RENDERING STATS, to disable : howdoi --disable_stats") self.render_search_engine_stats() + self.load_time_stats() + self.render_query_stats() + self.render_request_stats() + self.rs.render() def increase_cache_hits(self): self.cache.inc(CACHE_HITS) diff --git a/howdoi/utils.py b/howdoi/utils.py new file mode 100644 index 000000000..f0371b332 --- /dev/null +++ b/howdoi/utils.py @@ -0,0 +1,17 @@ +import heapq + + +def get_top_n_key_val_pairs_from_dict(dict_, N): + top_n_key_value_pairs = [] + if isinstance(dict_, dict): + for key in dict_: + heapq.heappush(top_n_key_value_pairs, (dict_[key], key)) + if len(top_n_key_value_pairs) > N: + heapq.heappop(top_n_key_value_pairs) + + top_n_key_value_pairs.sort(reverse=True) + return [(k, v) for v, k in top_n_key_value_pairs] + + +def safe_divide(numerator, denominator): + return numerator/denominator if denominator != 0 else 0 From b1ccce7a151c26d7f5aeefe202f2751abc336901 Mon Sep 17 00:00:00 2001 From: "Jyoti Bisht (Joe)" Date: Tue, 12 Oct 2021 22:50:05 +0530 Subject: [PATCH 16/17] --rebase failed hence added manually --- howdoi/howdoi.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py index b2ef1f9ba..2faf2a8a4 100755 --- a/howdoi/howdoi.py +++ b/howdoi/howdoi.py @@ -192,6 +192,16 @@ def _get_result(url): 'HTTPS by setting the environment variable "HOWDOI_DISABLE_SSL".\n%s', RED, END_FORMAT) raise error +def _get_from_cache(cache_key): + # As of cachelib 0.3.0, it internally logging a warning on cache miss + current_log_level = logging.getLogger().getEffectiveLevel() + # Reduce the log level so the warning is not printed + logging.getLogger().setLevel(logging.ERROR) + page = cache.get(cache_key) # pylint: disable=assignment-from-none + # Restore the log level + logging.getLogger().setLevel(current_log_level) + return page + def _add_links_to_text(element): hyperlinks = element.find('a') @@ -612,7 +622,7 @@ def howdoi(raw_query): if ENABLE_USER_STATS: CollectStats_obj.run(args) - res = cache.get(cache_key) # pylint: disable=assignment-from-none + res = _get_from_cache(cache_key) # pylint: disable=assignment-from-none if res: CollectStats_obj.increase_cache_hits() From aae1c90110e2baf5d68d86e5f804ff5ef09babcc Mon Sep 17 00:00:00 2001 From: "Jyoti Bisht (Joe)" Date: Tue, 12 Oct 2021 23:11:02 +0530 Subject: [PATCH 17/17] fixed object error --- howdoi/howdoi.py | 4 ---- howdoi/stats.py | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/howdoi/howdoi.py b/howdoi/howdoi.py index 2faf2a8a4..610091abc 100755 --- a/howdoi/howdoi.py +++ b/howdoi/howdoi.py @@ -781,10 +781,6 @@ def command_line_runner(): # pylint: disable=too-many-return-statements,too-man if args['stats']: CollectStats_obj.render_stats() - if args['disable-stats']: - ENABLE_USER_STATS = False - print("STATS DISABLED") - if args['clear_cache']: if _clear_cache(): print(f'{GREEN}Cache cleared successfully{END_FORMAT}') diff --git a/howdoi/stats.py b/howdoi/stats.py index 4339fbd37..ff5868763 100644 --- a/howdoi/stats.py +++ b/howdoi/stats.py @@ -132,7 +132,7 @@ def load_time_stats(self): def render_search_engine_stats(self): rs = self.rs - search_engine_frequency = self[SEARCH_ENGINES] + search_engine_frequency = SEARCH_ENGINES if search_engine_frequency is not None: max_search_engine = max(search_engine_frequency, key= lambda engine : search_engine_frequency[engine]) rs.add(Report('Search-engine=stats', 'Your most used search engine is {}'.format(max_search_engine.title())))