Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reworked IntelArk Cog, now uses web scraping #94

Open
wants to merge 1 commit into
base: rewrite
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
232 changes: 130 additions & 102 deletions Cogs/IntelArk.py
Original file line number Diff line number Diff line change
@@ -1,105 +1,133 @@
import asyncio
import discord
from discord.ext import commands
from Cogs import Message
from Cogs import DL
from Cogs import PickList
import urllib
from discord.ext import commands
from Cogs import Message
from urllib.request import urlopen
from googlesearch import search
from bs4 import BeautifulSoup


def setup(bot):
# Add the bot
bot.add_cog(IntelArk(bot))

# Add the bot
bot.add_cog(IntelArk(bot))


class IntelArk(commands.Cog):

def __init__(self, bot):
self.bot = bot
self.fields = {
"ProcessorNumber": "Processor Name",
"ProcessorBrandName": "Processor Brand String",
"BornOnDate": "Release Date",
"ClockSpeed": "Base Clock",
"ClockSpeedMax": "Max Clock",
"CoreCount": "Cores",
"ThreadCount": "Threads",
"MaxMem": "Max Memory",
"MaxTDP": "Max TDP",
"GraphicsModel": "Onboard Graphics",
"InstructionSet": "Instruction Set",
"InstructionSetExtensions": "Extensions"
}

@commands.command(pass_context=True, no_pm=True)
async def iark(self, ctx, *, text : str = None):
"""Search Ark for Intel CPU info."""

args = {
"title":"Intel Ark Search",
"description":'Usage: `{}iark [cpu model]`'.format(ctx.prefix),
"footer":"Powered by http://ark.intel.com",
"color":ctx.author
}

if text == None:
await Message.EmbedText(**args).send(ctx)
return

# Strip single quotes
text = text.replace("'","")
if not len(text):
await Message.EmbedText(**args).send(ctx)
return

# message = await Message.EmbedText(title="Intel Ark Search",description="Gathering info...",color=ctx.author,footer="Powered by http://ark.intel.com").send(ctx)

args["description"] = "Gathering info..."
message = await Message.EmbedText(**args).send(ctx)

search_url = "https://odata.intel.com/API/v1_0/Products/Processors()?&$filter=substringof(%27{}%27,ProductName)&$format=json&$top=5".format(urllib.parse.quote(text))
try:
# Get the response
response = await DL.async_json(search_url)
response = response["d"]
except:
response = []
# Check if we got nothing
if not len(response):
args["description"] = "No results returned for `{}`.".format(text.replace("`","").replace("\\",""))
await Message.EmbedText(**args).edit(ctx, message)
return
elif len(response) == 1:
# Set it to the first item
response = response[0]
# Check if we got more than one result (either not exact, or like 4790 vs 4790k)
elif len(response) > 1:
# Check the top result - and if the ProcessorNumber == our search term - we good
proc_num = response[0].get("ProcessorNumber","")
if proc_num.lower().strip() == text.lower().strip():
# found it
response = response[0]
else:
# Not exact - let's give options
index, message = await PickList.Picker(
message=message,
title="Multiple Matches Returned For `{}`:".format(text.replace("`","").replace("\\","")),
list=[x["ProcessorNumber"] for x in response],
ctx=ctx
).pick()
if index < 0:
args["description"] = "Search cancelled."
await Message.EmbedText(**args).edit(ctx, message)
return
# Got something
response = response[index]
# At this point - we should have a single response
# Let's format and display.
fields = [{"name":self.fields[x], "value":response.get(x,None), "inline":True} for x in self.fields]
await Message.Embed(
thumbnail=response.get("BrandBadge",None),
pm_after=12,
title=response.get("ProductName","Intel Ark Search"),
fields=fields,
url=response.get("Link",None),
footer="Powered by http://ark.intel.com",
color=ctx.author
).edit(ctx, message)
# CSS Selectors for processor name, thumbnail image
SELECTOR_NAME = ".h1"
SELECTOR_THUMBNAIL = ".badge-loaded > img"
SELECTOR_TABLE_ROW = ".specs-list li"
TAG_TABLE_CELL = "span"
desired_fields = ['Processor Number',
'Product Collection',
'Launch Date',
'Processor Base Frequency',
'# of Cores',
'# of Threads',
'# of Threads',
'Max Memory Size',
'TDP',
'Processor Graphics',
'Instruction Set']

def __init__(self, bot):
self.bot = bot
self.fields = dict()

@staticmethod
def get_ark_url(query):
"""Uses google search to retrieve iArk url for a query."""

res = search(query + " site:ark.intel.com", num=1, verify_ssl=False)
return next(res, None)

@staticmethod
def parse_ark_data(page):
"""Scrape ark URL for relevant info"""

results = page.select(IntelArk.SELECTOR_TABLE_ROW)
data = list()
for x in results:
spans = x.find_all(IntelArk.TAG_TABLE_CELL, recursive=False)
data.append([spans[0].get_text().strip(), spans[1].get_text().strip()])
return data

@staticmethod
def get_ark_name(page):
heading = page.select_one(IntelArk.SELECTOR_NAME)
if heading:
return heading.get_text().strip()
elif page.title:
return page.title.string
else:
return "Intel Ark"

@staticmethod
def get_ark_thumbnail(page):
img = page.select_one(IntelArk.SELECTOR_THUMBNAIL)
if not img:
return None
else:
return img['src']

@commands.command(pass_context=True, no_pm=True)
async def iark(self, ctx, *, text: str = None):
"""Search Ark for Intel CPU info."""

self.fields.clear()
args = {
"title": "Intel Ark Search",
"description": 'Usage: `{}iark [cpu model]`'.format(ctx.prefix),
"footer": "Powered by http://ark.intel.com",
"color": ctx.author
}

if text == None:
await Message.EmbedText(**args).send(ctx)
return

# Strip single quotes
text = text.replace("'", "")
if not len(text):
await Message.EmbedText(**args).send(ctx)
return

# message = await Message.EmbedText(title="Intel Ark Search",description="Gathering info...",color=ctx.author,footer="Powered by http://ark.intel.com").send(ctx)

args["description"] = "Gathering info..."
message = await Message.EmbedText(**args).send(ctx)

# Use google to search for ark URL
ark_url = self.get_ark_url(text)
if not ark_url:
args["description"] = "No results returned for `{}`.".format(text.replace("`", "").replace("\\", ""))
await Message.EmbedText(**args).edit(ctx, message)
return

html = urlopen(ark_url)
html = html.read().decode("utf-8")
page = BeautifulSoup(html, "lxml")
data = self.parse_ark_data(page)
print(data)

if not data:
args["description"] = "No results returned for `{}`.".format(text.replace("`", "").replace("\\", ""))
await Message.EmbedText(**args).edit(ctx, message)
return

for x in data:
if any(field in x[0] for field in self.desired_fields):
self.fields[x[0]] = x[1]

# At this point - we should have a single response
# Let's format and display.
fields = [{"name": x, "value": self.fields[x], "inline": True} for x in self.fields]
thumbnail = self.get_ark_thumbnail(page)
title = self.get_ark_name(page)
await Message.Embed(
thumbnail=thumbnail,
pm_after=12,
title=title,
fields=fields,
url=ark_url,
footer="Powered by http://ark.intel.com",
color=ctx.author
).edit(ctx, message)
5 changes: 4 additions & 1 deletion Install.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,10 @@ def run(self, command_list, leave_on_fail = False):
{"name":"numpy"},
{"name":"pymongo"},
{"name":"igdb_api_python"},
{"name":"geopy"}
{"name":"geopy"},
{"name":"lxml"},
{"name":"beautifulsoup4"},
{"name":"google"}
]
item = 0
for module in modules:
Expand Down