-
Notifications
You must be signed in to change notification settings - Fork 1.9k
/
Copy pathsearch_engine.py
50 lines (48 loc) · 1.72 KB
/
search_engine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import requests
import sys
# get the API KEY here: https://developers.google.com/custom-search/v1/overview
API_KEY = "<INSERT_YOUR_API_KEY_HERE>"
# get your Search Engine ID on your CSE control panel
SEARCH_ENGINE_ID = "<INSERT_YOUR_SEARCH_ENGINE_ID_HERE>"
# the search query you want, from the command line (e.g python search_engine.py 'python')
try:
query = sys.argv[1]
except:
print("Please specify a search query")
exit()
try:
page = int(sys.argv[2])
# make sure page is positive
assert page > 0
except:
print("Page number isn't specified, defaulting to 1")
page = 1
# constructing the URL
# doc: https://developers.google.com/custom-search/v1/using_rest
# calculating start, (page=2) => (start=11), (page=3) => (start=21)
start = (page - 1) * 10 + 1
url = f"https://www.googleapis.com/customsearch/v1?key={API_KEY}&cx={SEARCH_ENGINE_ID}&q={query}&start={start}"
# make the API request
data = requests.get(url).json()
# get the result items
search_items = data.get("items")
# iterate over 10 results found
for i, search_item in enumerate(search_items, start=1):
try:
long_description = search_item["pagemap"]["metatags"][0]["og:description"]
except KeyError:
long_description = "N/A"
# get the page title
title = search_item.get("title")
# page snippet
snippet = search_item.get("snippet")
# alternatively, you can get the HTML snippet (bolded keywords)
html_snippet = search_item.get("htmlSnippet")
# extract the page url
link = search_item.get("link")
# print the results
print("="*10, f"Result #{i+start-1}", "="*10)
print("Title:", title)
print("Description:", snippet)
print("Long description:", long_description)
print("URL:", link, "\n")