diff --git a/pyquotes/__init__.py b/pyquotes/__init__.py index e69de29..c7aba32 100644 --- a/pyquotes/__init__.py +++ b/pyquotes/__init__.py @@ -0,0 +1 @@ +__all__ = ["pyquotes", "brainyquote", "scrapper"] diff --git a/pyquotes/brainyquote/brainyquote.py b/pyquotes/brainyquote/brainyquote.py index 1da25e1..d3bb385 100644 --- a/pyquotes/brainyquote/brainyquote.py +++ b/pyquotes/brainyquote/brainyquote.py @@ -11,17 +11,17 @@ def get_author_link(person): author_name = person.lower() - author_name_split = author_name.split(' ') - author_url_link = '' + author_name_split = author_name.split(" ") + author_url_link = "" count = 0 for i in author_name_split: author_url_link += i count += 1 if count is not len(author_name_split): - author_url_link += '_' + author_url_link += "_" - author_url_link = author_url_link.replace('.', '_') + author_url_link = author_url_link.replace(".", "_") return author_url_link @@ -36,20 +36,20 @@ def get_quotes(person, category): """ URL = "https://www.brainyquote.com/authors/" + get_author_link(person) respone_author = requests.get(URL) - soup_author = BeautifulSoup(respone_author.content, 'html5lib') - categories = soup_author.find_all('div', class_='kw-box') + soup_author = BeautifulSoup(respone_author.content, "html5lib") + categories = soup_author.find_all("div", class_="kw-box") check = False count = 0 for i in categories: a = i.text - replace = a.replace("\n", '') + replace = a.replace("\n", "") r = replace.lower() if category in r: check = True count += 1 # Getting the quote of the related author - get_quote = soup_author.find_all('a', attrs={'title': 'view quote'}) + get_quote = soup_author.find_all("a", attrs={"title": "view quote"}) quote_list = [] big_list = [] for i in range(count): @@ -57,12 +57,12 @@ def get_quotes(person, category): big_list.append(quote_list) if len(quote_list) == 0: - return('''Oops! It seems that there are no quotes of the author of that + return """Oops! It seems that there are no quotes of the author of that category. - \nYou may consider changing the category or the author ''') + \nYou may consider changing the category or the author """ quote_list.append(person) - return(quote_list) + return quote_list def get_quote(person, category): @@ -76,16 +76,16 @@ def get_quote(person, category): """ quotes = get_quotes(person, category) length = len(quotes) - if(length == 0): + if length == 0: # In case no quote of the author exist for that category. - return("No quotes found of that category") + return "No quotes found of that category" else: random_number = random.randint(0, length - 1) list = [] list.append(quotes[random_number]) list.append(person) - return(tuple(list)) + return tuple(list) def get_quote_of_the_day(): @@ -99,26 +99,26 @@ def get_quote_of_the_day(): # Sending a HTTP request to the specified URL and saving the response # from server in a response object called response. response = requests.get(URL) - soup = BeautifulSoup(response.content, 'html5lib') - a_tags = soup.findAll('img', alt=True) + soup = BeautifulSoup(response.content, "html5lib") + a_tags = soup.findAll("img", alt=True) # Getting all the a tags of the page. quote_of_the_day_atag = str(a_tags[0]) # Grabbing the first a tag of the page - matches = re.findall(r'\"(.+?)\"', quote_of_the_day_atag) + matches = re.findall(r"\"(.+?)\"", quote_of_the_day_atag) # A regular expression which gives a list of all # text that is in between quotes. - quote_author_split_list = str(matches[0]).split('-') + quote_author_split_list = str(matches[0]).split("-") # Get a list of quote_of_the_day and the author - quote_of_the_day = matches[0].replace(quote_author_split_list[-1], '') - quote_of_the_day = quote_of_the_day.replace('-', '') + quote_of_the_day = matches[0].replace(quote_author_split_list[-1], "") + quote_of_the_day = quote_of_the_day.replace("-", "") author_name = quote_author_split_list[-1] # Gives the author_name - author_name = author_name.replace(' ', '') + author_name = author_name.replace(" ", "") # Removes any extra space return (quote_of_the_day, author_name) diff --git a/pyquotes/pyquotes.py b/pyquotes/pyquotes.py index 569fef1..38c909b 100644 --- a/pyquotes/pyquotes.py +++ b/pyquotes/pyquotes.py @@ -4,8 +4,7 @@ # 2. Get the quote of the day -def get_quotes(person: (None, str) = None, - category: (None, str) = None): +def get_quotes(person: (None, str) = None, category: (None, str) = None): """ This function returns all the quotes that matches the input. @@ -16,8 +15,7 @@ def get_quotes(person: (None, str) = None, pass -def get_quote(person: (None, str) = None, - category: (None, str) = None): +def get_quote(person: (None, str) = None, category: (None, str) = None): """ This function take a category and a person as a input and returns a random quote which matches the input. diff --git a/pyquotes/quotery/quotery.py b/pyquotes/quotery/quotery.py index 636ed07..a9ee4dc 100644 --- a/pyquotes/quotery/quotery.py +++ b/pyquotes/quotery/quotery.py @@ -11,8 +11,7 @@ # 2. Get the quote of the day -def get_quotes(person: (None, str) = None, - category: (None, str) = None): +def get_quotes(person: (None, str) = None, category: (None, str) = None): """ This function returns all the quotes that matches the input. :param person: Name of the person e.g. Albert Einstein @@ -22,8 +21,7 @@ def get_quotes(person: (None, str) = None, return crawler(person, category) -def get_quote(person: (None, str) = None, - category: (None, str) = None): +def get_quote(person: (None, str) = None, category: (None, str) = None): """ This function take a category and a person as a input and returns a random quote which matches the input. @@ -33,7 +31,7 @@ def get_quote(person: (None, str) = None, """ quotes_and_authors = crawler(person, category) if len(quotes_and_authors) > 1: - index = random.randint(0, len(quotes_and_authors)-1) + index = random.randint(0, len(quotes_and_authors) - 1) else: index = 0 return quotes_and_authors[index] @@ -46,11 +44,14 @@ def get_quote_of_the_day(): """ page_number = random.randint(1, 912) test = 1 - url = "https://api.quotery.com/wp-json/quotery/v1/quotes?orderby=popular&page=" + \ - str(page_number)+"&per_page=120" + url = ( + "https://api.quotery.com/wp-json/quotery/v1/quotes?orderby=popular&page=" + + str(page_number) + + "&per_page=120" + ) quote, authors, test = scraper(url, test) quotes_and_authors = selection_general(quote, authors) - index = random.randint(0, len(quotes_and_authors)-1) + index = random.randint(0, len(quotes_and_authors) - 1) return quotes_and_authors[index] @@ -58,40 +59,50 @@ def scraper(url, test): authors = [] quotes = [] # Used a header to fake a browser - source = requests.get(url, headers={ - 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36'}).text - soup = BeautifulSoup(source, 'lxml') + source = requests.get( + url, + headers={ + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36" + }, + ).text + soup = BeautifulSoup(source, "lxml") para = soup.p.text # all these random split is how the data was arranged in the source from which it had to be stripped - para = para.split("\"quotes\"") + para = para.split('"quotes"') if len(para) > 1: para = para[1] else: para = para[0] count = 0 - para = para.split("\"status\"") + para = para.split('"status"') if len(para) == 1: para = para[0] - quote_list = para.split("\"body\"") + quote_list = para.split('"body"') for index, element in enumerate(quote_list): - quote_element = element.split("\"images\"") + quote_element = element.split('"images"') if index > 0: for inner_index, quote in enumerate(quote_element): if inner_index == 0: # Cleaning the quote and using the encode decode to remove Unicode escape chracters - cleaned_quote = quote[2:len( - quote)-2].encode('utf-8').decode('unicode-escape') + cleaned_quote = ( + quote[2: len(quote) - 2] + .encode("utf-8") + .decode("unicode-escape") + ) quotes.append(cleaned_quote) - author_list = para.split("\"name\"") + author_list = para.split('"name"') for index, element in enumerate(author_list): - author_element = element.split("\"slug\"") + author_element = element.split('"slug"') if index > 0: for inner_index, author in enumerate(author_element): if inner_index == 0: # Cleaning the name of the author and using the encode decode to remove Unicode escape chracters - cleaned_author = author[2:len( - author)-2].encode('utf-8').decode('unicode-escape') + cleaned_author = ( + author[2: len(author) - 2] + .encode("utf-8") + .decode("unicode-escape") + ) authors.append(cleaned_author) count += 1 else: @@ -123,13 +134,13 @@ def crawler(user_author=None, user_topic=None): authors = [] quote = [] quotes_and_authors = [] - punctuations = (",", "-", "'", ".", '"', '_', '\\', '“', '”', '*') + punctuations = (",", "-", "'", ".", '"', "_", "\\", "“", "”", "*") new_word = "" expected_author = [] expected_topic = [] - file_topic = open('topics.txt', 'r') - file_author = open('authors.txt', 'r') + file_topic = open("topics.txt", "r") + file_author = open("authors.txt", "r") # Using FuzzyWuzzy to match input entry to the entries in the website. We are using text files from scrapped from the website for it. @@ -150,18 +161,24 @@ def crawler(user_author=None, user_topic=None): for index, char in enumerate(topic): if char not in punctuations: new_word = new_word + char - new_word = '-'.join(new_word.split(" ")) + new_word = "-".join(new_word.split(" ")) i = 1 test = 1 while test: - url = ("https://api.quotery.com/wp-json/quotery/v1/quotes?topic=" - + new_word + "&page=" + str(i) + "&per_page=120") + url = ( + "https://api.quotery.com/wp-json/quotery/v1/quotes?topic=" + + new_word + + "&page=" + + str(i) + + "&per_page=120" + ) new_word = "" i += 1 for author in expected_author: quote, authors, test = scraper(url, test) quotes_and_authors += selection_author( - quote, authors, author) + quote, authors, author + ) return quotes_and_authors else: # show random quotes from the topic @@ -173,9 +190,14 @@ def crawler(user_author=None, user_topic=None): for index, char in enumerate(topic): if char not in punctuations: new_word = new_word + char - new_word = '-'.join(new_word.split(" ")) - url = ("https://api.quotery.com/wp-json/quotery/v1/quotes?topic=" - + new_word + "&page=" + str(i) + "&per_page=120") + new_word = "-".join(new_word.split(" ")) + url = ( + "https://api.quotery.com/wp-json/quotery/v1/quotes?topic=" + + new_word + + "&page=" + + str(i) + + "&per_page=120" + ) new_word = "" i += 1 quote, authors, test = scraper(url, test) @@ -195,10 +217,15 @@ def crawler(user_author=None, user_topic=None): for index, char in enumerate(author): if char not in punctuations: new_word = new_word + char - new_word = '-'.join(new_word.split(" ")) + new_word = "-".join(new_word.split(" ")) while test: - url = ("https://api.quotery.com/wp-json/quotery/v1/quotes?author=" - + new_word + "&page=" + str(i) + "&per_page=120") + url = ( + "https://api.quotery.com/wp-json/quotery/v1/quotes?author=" + + new_word + + "&page=" + + str(i) + + "&per_page=120" + ) i += 1 quote, authors, test = scraper(url, test) quotes_and_authors += selection_general(quote, authors) diff --git a/tests/codeformattest.py b/tests/codeformattest.py new file mode 100644 index 0000000..8191fce --- /dev/null +++ b/tests/codeformattest.py @@ -0,0 +1,21 @@ +import os +import unittest +import pep8 + + +class TestCodeFormat(unittest.TestCase): + def test_pep8_conformance(self): + """Test that we conform to PEP8. checks all project files""" + errors = 0 + style = pep8.StyleGuide(quiet=False) + style.options.max_line_length = 120 + for root, dirs, files in os.walk("."): + python_files = [os.path.join(root, f) + for f in files if f.endswith(".py")] + errors = style.check_files(python_files).total_errors + + self.assertEqual(errors, 0, "PEP8 style errors: %d" % errors) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_brainyquotes.py b/tests/test_brainyquotes.py index e69de29..0551d9f 100644 --- a/tests/test_brainyquotes.py +++ b/tests/test_brainyquotes.py @@ -0,0 +1,13 @@ +import requests + + +class TestBrainyQuotes: + def TestBrainyQuotes(): + + r = requests.head("https://www.brainyquote.com/") + print("Website Up") + return r.status_code == 200 + + +if __name__ == "__main__": + TestBrainyQuotes.TestBrainyQuotes()