corpnewt · SheinH · Aug 26, 2020
diff --git a/Cogs/IntelArk.py b/Cogs/IntelArk.py
@@ -1,105 +1,133 @@
-import asyncio
-import discord
-from   discord.ext import commands
-from   Cogs import Message
-from   Cogs import DL
-from   Cogs import PickList
-import urllib
+from discord.ext import commands
+from Cogs import Message
+from urllib.request import urlopen
+from googlesearch import search
+from bs4 import BeautifulSoup
+
 
 def setup(bot):
-	# Add the bot
-	bot.add_cog(IntelArk(bot))
-
+    # Add the bot
+    bot.add_cog(IntelArk(bot))
+
+
 class IntelArk(commands.Cog):
-
-	def __init__(self, bot):
-		self.bot = bot
-		self.fields = {
-			"ProcessorNumber": "Processor Name",
-			"ProcessorBrandName": "Processor Brand String",
-			"BornOnDate": "Release Date",
-			"ClockSpeed": "Base Clock",
-			"ClockSpeedMax": "Max Clock",
-			"CoreCount": "Cores",
-			"ThreadCount": "Threads",
-			"MaxMem": "Max Memory",
-			"MaxTDP": "Max TDP",
-			"GraphicsModel": "Onboard Graphics",
-			"InstructionSet": "Instruction Set",
-			"InstructionSetExtensions": "Extensions"
-		}
-
-	@commands.command(pass_context=True, no_pm=True)
-	async def iark(self, ctx, *, text : str = None):
-		"""Search Ark for Intel CPU info."""
-
-		args = {
-			"title":"Intel Ark Search",
-			"description":'Usage: `{}iark [cpu model]`'.format(ctx.prefix),
-			"footer":"Powered by http://ark.intel.com",
-			"color":ctx.author
-		}
-
-		if text == None:
-			await Message.EmbedText(**args).send(ctx)
-			return
-
-		# Strip single quotes
-		text = text.replace("'","")
-		if not len(text):
-			await Message.EmbedText(**args).send(ctx)
-			return
-
-		# message = await Message.EmbedText(title="Intel Ark Search",description="Gathering info...",color=ctx.author,footer="Powered by http://ark.intel.com").send(ctx)
-
-		args["description"] = "Gathering info..."
-		message = await Message.EmbedText(**args).send(ctx)
-
-		search_url = "https://odata.intel.com/API/v1_0/Products/Processors()?&$filter=substringof(%27{}%27,ProductName)&$format=json&$top=5".format(urllib.parse.quote(text))
-		try:
-			# Get the response
-			response = await DL.async_json(search_url)
-			response = response["d"]
-		except:
-			response = []
-		# Check if we got nothing
-		if not len(response):
-			args["description"] = "No results returned for `{}`.".format(text.replace("`","").replace("\\",""))
-			await Message.EmbedText(**args).edit(ctx, message)
-			return
-		elif len(response) == 1:
-			# Set it to the first item
-			response = response[0]
-		# Check if we got more than one result (either not exact, or like 4790 vs 4790k)
-		elif len(response) > 1:
-			# Check the top result - and if the ProcessorNumber == our search term - we good
-			proc_num = response[0].get("ProcessorNumber","")
-			if proc_num.lower().strip() == text.lower().strip():
-				# found it
-				response = response[0]
-			else:
-				# Not exact - let's give options
-				index, message = await PickList.Picker(
-					message=message,
-					title="Multiple Matches Returned For `{}`:".format(text.replace("`","").replace("\\","")),
-					list=[x["ProcessorNumber"] for x in response],
-					ctx=ctx
-				).pick()
-				if index < 0:
-					args["description"] = "Search cancelled."
-					await Message.EmbedText(**args).edit(ctx, message)
-					return
-				# Got something
-				response = response[index]
-		# At this point - we should have a single response
-		# Let's format and display.
-		fields = [{"name":self.fields[x], "value":response.get(x,None), "inline":True} for x in self.fields]
-		await Message.Embed(
-			thumbnail=response.get("BrandBadge",None),
-			pm_after=12,
-			title=response.get("ProductName","Intel Ark Search"),
-			fields=fields,
-			url=response.get("Link",None),
-			footer="Powered by http://ark.intel.com",
-			color=ctx.author
-			).edit(ctx, message)
+    # CSS Selectors for processor name, thumbnail image
+    SELECTOR_NAME = ".h1"
+    SELECTOR_THUMBNAIL = ".badge-loaded > img"
+    SELECTOR_TABLE_ROW = ".specs-list li"
+    TAG_TABLE_CELL = "span"
+    desired_fields = ['Processor Number',
+                      'Product Collection',
+                      'Launch Date',
+                      'Processor Base Frequency',
+                      '# of Cores',
+                      '# of Threads',
+                      '# of Threads',
+                      'Max Memory Size',
+                      'TDP',
+                      'Processor Graphics',
+                      'Instruction Set']
+
+    def __init__(self, bot):
+        self.bot = bot
+        self.fields = dict()
+
+    @staticmethod
+    def get_ark_url(query):
+        """Uses google search to retrieve iArk url for a query."""
+
+        res = search(query + " site:ark.intel.com", num=1, verify_ssl=False)
+        return next(res, None)
+
+    @staticmethod
+    def parse_ark_data(page):
+        """Scrape ark URL for relevant info"""
+
+        results = page.select(IntelArk.SELECTOR_TABLE_ROW)
+        data = list()
+        for x in results:
+            spans = x.find_all(IntelArk.TAG_TABLE_CELL, recursive=False)
+            data.append([spans[0].get_text().strip(), spans[1].get_text().strip()])
+        return data
+
+    @staticmethod
+    def get_ark_name(page):
+        heading = page.select_one(IntelArk.SELECTOR_NAME)
+        if heading:
+            return heading.get_text().strip()
+        elif page.title:
+            return page.title.string
+        else:
+            return "Intel Ark"
+
+    @staticmethod
+    def get_ark_thumbnail(page):
+        img = page.select_one(IntelArk.SELECTOR_THUMBNAIL)
+        if not img:
+            return None
+        else:
+            return img['src']
+
+    @commands.command(pass_context=True, no_pm=True)
+    async def iark(self, ctx, *, text: str = None):
+        """Search Ark for Intel CPU info."""
+
+        self.fields.clear()
+        args = {
+            "title": "Intel Ark Search",
+            "description": 'Usage: `{}iark [cpu model]`'.format(ctx.prefix),
+            "footer": "Powered by http://ark.intel.com",
+            "color": ctx.author
+        }
+
+        if text == None:
+            await Message.EmbedText(**args).send(ctx)
+            return
+
+        # Strip single quotes
+        text = text.replace("'", "")
+        if not len(text):
+            await Message.EmbedText(**args).send(ctx)
+            return
+
+        # message = await Message.EmbedText(title="Intel Ark Search",description="Gathering info...",color=ctx.author,footer="Powered by http://ark.intel.com").send(ctx)
+
+        args["description"] = "Gathering info..."
+        message = await Message.EmbedText(**args).send(ctx)
+
+        # Use google to search for ark URL
+        ark_url = self.get_ark_url(text)
+        if not ark_url:
+            args["description"] = "No results returned for `{}`.".format(text.replace("`", "").replace("\\", ""))
+            await Message.EmbedText(**args).edit(ctx, message)
+            return
+
+        html = urlopen(ark_url)
+        html = html.read().decode("utf-8")
+        page = BeautifulSoup(html, "lxml")
+        data = self.parse_ark_data(page)
+        print(data)
+
+        if not data:
+            args["description"] = "No results returned for `{}`.".format(text.replace("`", "").replace("\\", ""))
+            await Message.EmbedText(**args).edit(ctx, message)
+            return
+
+        for x in data:
+            if any(field in x[0] for field in self.desired_fields):
+                self.fields[x[0]] = x[1]
+
+        # At this point - we should have a single response
+        # Let's format and display.
+        fields = [{"name": x, "value": self.fields[x], "inline": True} for x in self.fields]
+        thumbnail = self.get_ark_thumbnail(page)
+        title = self.get_ark_name(page)
+        await Message.Embed(
+            thumbnail=thumbnail,
+            pm_after=12,
+            title=title,
+            fields=fields,
+            url=ark_url,
+            footer="Powered by http://ark.intel.com",
+            color=ctx.author
+        ).edit(ctx, message)
diff --git a/Install.py b/Install.py
@@ -170,7 +170,10 @@ def run(self, command_list, leave_on_fail = False):
         {"name":"numpy"},
         {"name":"pymongo"},
         {"name":"igdb_api_python"},
-        {"name":"geopy"}
+        {"name":"geopy"},
+        {"name":"lxml"},
+        {"name":"beautifulsoup4"},
+        {"name":"google"}
     ]
     item = 0
     for module in modules: