diff --git a/InSpy.py b/InSpy.py
old mode 100644
new mode 100755
index a29b215..1187a33
--- a/InSpy.py
+++ b/InSpy.py
@@ -1,194 +1,124 @@
#!/usr/bin/env python2
-# InSpy - A LinkedIn employee enumerator
-# This script enumerates employees from any organization
-# using LinkedIn. Please note that this will not harvest all
-# employees within a given organization.
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-#
-# Author: Jonathan Broche
-# Contact: @g0jhonny
-# Version: 1.0.1
-# Date: 2015-11-22
-#
-# usage: ./inspy.py -c [-d dept/title] [-e email output format] [-i input file with dept/titles] [-o output file]
-# example: ./inspy.py -c abc -e flast@abc.com -o abc_employees.txt
-
-
-import requests, BeautifulSoup, argparse, signal, time, datetime, os
-
-start_time = time.time()
-
-class colors:
- lightblue = "\033[1;36m"
- blue = "\033[1;34m"
- normal = "\033[0;00m"
- red = "\033[1;31m"
- yellow = "\033[1;33m"
- white = "\033[1;37m"
- green = "\033[1;32m"
-
-#----------------------------------------#
-# HARVEST USERS #
-#----------------------------------------#
+from lib.logger import *
+from lib.soupify import *
+from lib.workbench import *
+from lib.crawler import *
+import os, argparse, sys, time
+
+parser = argparse.ArgumentParser(description='InSpy - A LinkedIn enumeration tool by Jonathan Broche (@g0jhonny)', version="2.0")
+parser.add_argument('company', help="Company name to use for tasks.")
+techgroup = parser.add_argument_group(title="Technology Search")
+techgroup.add_argument('--techspy', metavar='file', const="wordlists/tech-list-small.txt", nargs='?', help="Crawl LinkedIn job listings for technologies used by the company. Technologies imported from a new line delimited file. [Default: tech-list-small.txt]")
+techgroup.add_argument('--limit', metavar='int', type=int, default=50, help="Limit the number of job listings to crawl. [Default: 50]")
+empgroup = parser.add_argument_group(title="Employee Harvesting")
+empgroup.add_argument('--empspy', metavar='file', const="wordlists/title-list-small.txt", nargs='?', help="Discover employees by title and/or department. Titles and departments are imported from a new line delimited file. [Default: title-list-small.txt]")
+empgroup.add_argument('--emailformat', metavar='string', help="Create email addresses for discovered employees using a known format. [Accepted Formats: first.last@xyz.com, last.first@xyz.com, firstl@xyz.com, lfirst@xyz.com, flast@xyz.com, lastf@xyz.com, first@xyz.com, last@xyz.com]")
+outgroup = parser.add_argument_group(title="Output Options")
+outgroup.add_argument('--html', metavar='file', help="Print results in HTML file.")
+outgroup.add_argument('--csv', metavar='file', help="Print results in CSV format.")
+outgroup.add_argument('--json', metavar='file', help="Print results in JSON.")
+
+if len(sys.argv) == 1:
+ parser.print_help()
+ sys.exit(1)
+
+args = parser.parse_args()
+start_logger(args.company)
+
+print "\nInSpy {}\n".format(parser.version)
+
+if not args.techspy and not args.empspy:
+ print "You didn't provide any work for me to do."
+ sys.exit(1)
+
+stime = time.time()
+tech_html, employee_html, tech_csv, employee_csv, employee_json = [], [], [], [], []
+
+if args.techspy:
+ if os.path.exists(os.path.abspath(args.techspy)):
+ initial_crawl = crawl_jobs(args.company)
+ if initial_crawl:
+ soup = soupify(initial_crawl)
+ job_links = []
+ for link in get_job_links(soup, args.company):
+ if len(job_links) < args.limit:
+ job_links.append(link)
+ if len(job_links) != args.limit:
+ page_links = get_page_links(soup)
+ for page in range(len(page_links)):
+ if len(job_links) == args.limit: break
+ urlcrawl = crawl_url(page_links[page])
+ if urlcrawl:
+ for link in get_job_links(soupify(urlcrawl), args.company):
+ if len(job_links) < args.limit:
+ job_links.append(link)
+
+ pstatus("{} Jobs identified".format(len(job_links)))
+ if job_links:
+ techs = {}
+ for job in range(len(job_links)):
+ jobresponse = crawl_url(job_links[job])
+ if jobresponse:
+ jobsoup = soupify(jobresponse)
+ description = get_job_description(jobsoup)
+ matches = identify_tech(description, os.path.abspath(args.techspy))
+ if matches:
+ title = get_job_title(jobsoup)
+ techs[title] = {job_links[job]:matches}
+
+ tech_html, tech_csv, tech_json = craft_tech(techs)
+ else:
+ perror("No such file or directory: '{}'".format(args.techspy))
-def inspy_enum(company, dept, ifile):
- try:
- dept_dictionary = ['sales', 'marketing', 'human resources', 'finance', 'accounting', 'inventory', 'quality assurance', 'insurance', 'licenses', 'operational', 'customer service', 'staff', 'research & development', 'management', 'administration', 'engineering', 'it', 'is', 'strategy', 'other']
-
+if args.empspy:
+ if os.path.exists(os.path.abspath(args.empspy)):
employees = {}
-
- if dept is not None:
- dept_dictionary = [dept.lower()]
-
- if ifile is not None:
- try:
- if os.path.exists(ifile):
- with open(ifile, 'r') as f:
- dept_dictionary = []
- for line in f.readlines():
- if line.rstrip():
- dept_dictionary.append(line.rstrip())
- except IOError as e:
- print "{}[!]{} Problem opening the file. {}".format(e)
-
- for dd in dept_dictionary:
- print "{}[*]{} Searching for employees working at {} with '{}' in their title".format(colors.lightblue, colors.normal, company, dd)
-
- try:
- response = requests.get('https://www.linkedin.com/title/{}-at-{}'.format(dd.replace('-', ' '), company.replace('-', ' ')), timeout=2)
- if response.status_code == 200:
- soup = BeautifulSoup.BeautifulSoup(response.text)
+ emails = []
+ for response in crawl_employees(args.company, os.path.abspath(args.empspy)):
+ for name, title in get_employees(soupify(response)).items():
+ if args.company.lower() in title.lower():
+ if not name in employees:
+ employees[name] = title
+
+ pstatus("{} Employees identified".format(len(employees.keys())))
+ if employees:
+ if args.emailformat:
+ if args.emailformat[:args.emailformat.find('@')] in ['first.last', 'last.first', 'firstlast', 'lastfirst', 'first', 'last', 'firstl', 'lfirst', 'flast', 'lastf']:
+ employee_html, employee_csv, employee_json = craft_employees(employees, args.emailformat)
else:
- pass
- except requests.exceptions.Timeout:
- print "{}[!]{} Timeout enumerating the {} department".format(colors.red, colors.normal, dd)
- except requests.exceptions.ConnectionError:
- print "{}[!]{} Connection error.".format(colors.red, colors.normal)
- except requests.exceptions.HTTPError:
- print "{}[!]{} HTTP error.".format(colors.red, colors.normal)
-
- #get employee names
- for n, t in zip(soup.findAll('h3', { "class" : "name" }), soup.findAll('p', { "class" : "headline" })):
- name = u''.join(n.getText()).encode('utf-8')
- title = u''.join(t.getText()).encode('utf-8').replace('&', '&')
-
- if not name in employees:
- employees[name] = title
-
- return employees
- except Exception as e:
- print "{}[!]{} Error harvesting users. {}".format(colors.red, colors.normal, e)
-
-#----------------------------------------#
-# EMAILS #
-#----------------------------------------#
-
-def format_email(names, eformat):
- emails = []
- for name in names:
- spaces = []
- for x,y in enumerate(name):
- if ' ' in y:
- spaces.append(x)
-
- if eformat[:eformat.find('@')] == 'flast':
- emails.append('{}{}{}'.format(name[0], name[(spaces[-1]+1):], eformat[eformat.find('@'):]))
- elif eformat[:eformat.find('@')] == 'lfirst':
- emails.append('{}{}{}'.format(name[spaces[-1]+1], name[0:spaces[0]], eformat[eformat.find('@'):]))
- elif eformat[:eformat.find('@')] == 'first.last':
- emails.append('{}.{}{}'.format(name[0:spaces[0]], name[(spaces[-1]+1):], eformat[eformat.find('@'):]))
- elif eformat[:eformat.find('@')] == 'last.first':
- emails.append('{}.{}{}'.format(name[(spaces[-1]+1):], name[0:spaces[0]], eformat[eformat.find('@'):]))
-
- return [e.lower() for e in emails]
-
-#----------------------------------------#
-# OUTPUT #
-#----------------------------------------#
-
-def output(employees, email, company, ofile):
- counter = 0
- ge, be = {}, {}
- print '\n'
-
- if email:
- for k, e in zip(employees, email):
- if company in employees[k].lower():
- if ',' in k:
- be[e] = '{}, {}'.format(k, employees[k])
- else:
- ge[e] = '{}, {}'.format(k, employees[k])
- print "{}[*]{} {}, {}, {}".format(colors.green, colors.normal, k.replace('&', '&'), employees[k].replace('&', '&'), e)
- counter +=1
- else:
- for k in employees:
- if company in employees[k].lower():
- ge[k] = employees[k]
- print "{}[*]{} {} {}".format(colors.green, colors.normal, k.replace('&', '&'), employees[k].replace('&', '&'))
- counter +=1
- if be:
- print "\n{}[!]{} The following employees have commas in their names. Their emails were not accurate.".format(colors.red, colors.normal)
- for k in be:
- print "{}[*]{} {}".format(colors.yellow, colors.normal, be[k])
-
- if ofile:
- with open(ofile, 'w') as f:
- f.write("\n" + "-" * 69 + "\n" + "InSpy Output" + "\n" + "-" * 69 + "\n\n")
-
- if [e for e in ge.keys() if '@' in e]: #if emails in keys
- f.write("\n" + "E-mails" + "\n" + "-" * 25 + "\n\n")
- for k in ge.keys():
- f.write(k+'\n')
-
- f.write("\n" + "All" + "\n" + "-" * 25 + "\n\n")
- for k in ge:
- f.write('{}, {}\n'.format(ge[k], k))
+ pwarning("You didn't provide a valid e-mail format. See help (-h) for acceptable formats.")
+ employee_html, employee_csv, employee_json = craft_employees(employees, None)
else:
- for k in ge:
- f.write('{}, {}\n'.format(k, ge[k]))
-
- print "\n{}[*]{} Done! {}{}{} employees found.".format(colors.lightblue, colors.normal, colors.green, counter, colors.normal)
- print "{}[*]{} Completed in {:.1f}s\n".format(colors.lightblue, colors.normal, time.time()-start_time)
-
-#----------------------------------------#
-# MAIN #
-#----------------------------------------#
-
-def main():
- print "\n " + "-" * 74 + "\n " + colors.white + "InSpy v1.0 - LinkedIn Employee Enumerator by Jonathan Broche (@g0jhonny)\n " + colors.normal + "-" * 74 + "\n "
- parser = argparse.ArgumentParser(description='InSpy - A LinkedIn employee enumerator by Jonathan Broche (@g0jhonny)')
- parser.add_argument('-c', '--company', required=True, help='Company name')
- parser.add_argument('-d', '--dept', nargs='?', const='', help='Department or title to query employees against. Inspy searches through a predefined list by default.')
- parser.add_argument('-e', '--emailformat', help='Email output format. Acceptable formats: first.last@xyz.com, last.first@xyz.com, flast@xyz.com, lastf@xyz.com')
- parser.add_argument('-i', '--inputfilename', nargs='?', const='', help='File with list of departments or titles to query employees against (one item per line)')
- parser.add_argument('-o', '--outfilename', nargs='?', const='', help='Output results to text file')
- args = parser.parse_args()
-
- employees = inspy_enum(args.company, args.dept, args.inputfilename)
-
- if args.emailformat:
- if args.emailformat.find('@') and args.emailformat[:args.emailformat.find('@')] in {'flast', 'lfirst', 'first.last', 'last.first'}:
- if employees is not None:
- e = format_email(employees.keys(), args.emailformat)
- output(employees, e,args.company.lower(), args.outfilename)
- else:
- print "{}[!]{} Please provide a valid email address format (i.e., flast@xyz.com, lfirst@xyz.com, first.last@xyz.com, last.first@xyz.com)".format(colors.red, colors.normal)
+ employee_html, employee_csv, employee_json = craft_employees(employees, None)
else:
- if employees is not None:
- output(employees,'',args.company.lower(), args.outfilename)
+ print os.path.abspath(args.empspy)
+ perror("No such file or directory: '{}'".format(args.empspy))
+
+#output
+if args.html:
+ if tech_html or employee_html:
+ if tech_html and employee_html:
+ craft_html(args.company, tech_html, employee_html, args.html)
+ elif tech_html and not employee_html:
+ craft_html(args.company, tech_html, None, args.html)
+ else:
+ craft_html(args.company, None, employee_html, args.html)
+if args.csv:
+ if tech_csv or employee_csv:
+ if tech_csv and employee_csv:
+ craft_csv(tech_csv, employee_csv, args.csv)
+ elif tech_csv and not employee_csv:
+ craft_csv(tech_csv, None, args.csv)
+ else:
+ craft_csv(None, employee_csv, args.csv)
+if args.json:
+ if tech_json or employee_json:
+ if tech_json and employee_json:
+ craft_json(tech_json, employee_json, args.json)
+ elif tech_json and not employee_json:
+ craft_json(tech_json, None, args.json)
+ else:
+ craft_json(None, employee_json, args.json)
-if __name__ == '__main__':
- main()
\ No newline at end of file
+print "Completed in {:.1f}s".format(time.time()-stime)
\ No newline at end of file
diff --git a/README.md b/README.md
index 66828ab..e254ad5 100644
--- a/README.md
+++ b/README.md
@@ -1,93 +1,51 @@
# InSpy
-A python based LinkedIn employee enumerator. This script is great for social engineering assessments where clients ask one
-to provide employee emails.
+##Introduction
+-----
-### Help
+InSpy is a python based LinkedIn enumeration tool. Inspy has two functionalities: TechSpy and EmpSpy.
-```
-InSpy - A LinkedIn employee enumerator by Jonathan Broche (@g0jhonny)
-
-optional arguments:
- -h, --help show this help message and exit
- -c COMPANY, --company COMPANY
- Company name
- -d [DEPT], --dept [DEPT]
- Department or title to query employees against. Inspy
- searches through a predefined list by default.
- -e EMAILFORMAT, --emailformat EMAILFORMAT
- Email output format. Acceptable formats:
- first.last@xyz.com, last.first@xyz.com, flast@xyz.com,
- lastf@xyz.com
- -i [INPUTFILENAME], --inputfilename [INPUTFILENAME]
- File with list of departments or titles to query
- employees against (one item per line)
- -o [OUTFILENAME], --outfilename [OUTFILENAME]
- Output results to text file
-```
-### Examples
-
-```
-./InSpy.py -c "acme corp"
+- TechSpy - Crawls LinkedIn job listings for technlogoies used by the provided company. InSpy attempts to identify technologies by matching job descriptions to keywords from a new line delimited file.
- --------------------------------------------------------------------------
- InSpy v1.0 - LinkedIn User Enumerator by Jonathan Broche (@g0jhonny)
- --------------------------------------------------------------------------
-
-[*] Searching for employees working at acme corp with 'sales' in their title
-[*] Searching for employees working at acme corp with 'hr' in their title
-[*] Searching for employees working at acme corp with 'marketing' in their title
-[*] Searching for employees working at acme corp with 'finance' in their title
-[*] Searching for employees working at acme corp with 'accounting' in their title
-[*] Searching for employees working at acme corp with 'director' in their title
-[*] Searching for employees working at acme corp with 'administrative' in their title
-[*] Searching for employees working at acme corp with 'lawyer' in their title
-[*] Searching for employees working at acme corp with 'it' in their title
-[*] Searching for employees working at acme corp with 'security' in their title
+- EmpSpy - Crawls LinkedIn for employees working at the provided company. InSpy searches for employees by title and/or departments from a new line delimited file. InSpy may also create emails for the identified employees if the user specifies an email format.
+## Installation
+-----
-[*] Proud Arkie Accounts Receivable specialist at Acme Corp.
-[*] Brian Russo Finance Manager at Acme corp
-[*] Paul Samuelson Director of Customer Support at ACME Corp. Production Resources
-[*] Steve Smith Developer at Acme Corp
-[*] Sarah Rhodes Director of Sales at Acme Corp
-[*] Frances Jones Assistant to the Director at Acme Corp
- ...snip...
+Run `pip install -r requirements.txt` within the cloned InSpy directory.
-[*] Done! 29 employees found.
-[*] Completed in 28.7s
-```
-
-Provide InSpy with the email format of the respective corporation and it'll output the emails for you.
+## Usage
+-----
```
-./InSpy.py -c 'acme corp' -e flast@acme.com
-
- --------------------------------------------------------------------------
- InSpy v1.0 - LinkedIn User Enumerator by Jonathan Broche (@g0jhonny)
- --------------------------------------------------------------------------
-
-[*] Searching for employees working at acme corp with 'sales' in their title
-[*] Searching for employees working at acme corp with 'hr' in their title
-[*] Searching for employees working at acme corp with 'marketing' in their title
-[*] Searching for employees working at acme corp with 'finance' in their title
-[*] Searching for employees working at acme corp with 'accounting' in their title
-[*] Searching for employees working at acme corp with 'director' in their title
-[*] Searching for employees working at acme corp with 'administrative' in their title
-[*] Searching for employees working at acme corp with 'lawyer' in their title
-[*] Searching for employees working at acme corp with 'it' in their title
-[*] Searching for employees working at acme corp with 'security' in their title
-
+InSpy - A LinkedIn enumeration tool by Jonathan Broche (@g0jhonny)
-[*] Proud Arkie, Accounts Receivable specialist at Acme Corp., parkie@acme.com
-[*] Brian Russo, Finance Manager at Acme corp, brusso@acme.com
-[*] Paul Samuelson, Director of Customer Support at ACME Corp. Production Resources, psamuelson@acme.com
-[*] Steve Smith, Developer at Acme Corp, ssmith@acme.com
-[*] Sarah Rhodes, Director of Sales at Acme Corp, srhodes@acme.com
-[*] Frances Jones, Assistant to the Director at Acme Corp, fjones@acme.com
- ...snip...
-
-[*] Done! 29 employees found.
-[*] Completed in 29.0s
+positional arguments:
+ company Company name to use for tasks.
+optional arguments:
+ -h, --help show this help message and exit
+ -v, --version show program's version number and exit
+
+Technology Search:
+ --techspy [file] Crawl LinkedIn job listings for technologies used by
+ the company. Technologies imported from a new line
+ delimited file. [Default: tech-list-small.txt]
+ --limit int Limit the number of job listings to crawl. [Default:
+ 50]
+
+Employee Harvesting:
+ --empspy [file] Discover employees by title and/or department. Titles
+ and departments are imported from a new line delimited
+ file. [Default: title-list-small.txt]
+ --emailformat string Create email addresses for discovered employees using
+ a known format. [Accepted Formats: first.last@xyz.com,
+ last.first@xyz.com, firstl@xyz.com, lfirst@xyz.com,
+ flast@xyz.com, lastf@xyz.com, first@xyz.com,
+ last@xyz.com]
+
+Output Options:
+ --html file Print results in HTML file.
+ --csv file Print results in CSV format.
+ --json file Print results in JSON.
```
\ No newline at end of file
diff --git a/lib/__init__.py b/lib/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/lib/crawler.py b/lib/crawler.py
new file mode 100644
index 0000000..4fa6725
--- /dev/null
+++ b/lib/crawler.py
@@ -0,0 +1,50 @@
+from logger import *
+import requests
+requests.packages.urllib3.disable_warnings()
+
+headers={'Host':'www.linkedin.com', 'User-Agent':'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3'}
+
+def crawl_employees(company, file):
+ titles = []
+ responses = []
+ try:
+ with open(file) as f:
+ for title in f.readlines():
+ titles.append(title.rstrip())
+ for title in titles:
+ response = requests.get("https://www.linkedin.com/title/{}-at-{}".format(title.replace(' ', '-'), company.replace(' ', '-')), timeout=3, headers=headers)
+ responses.append(response.text)
+ except requests.exceptions.Timeout as e:
+ pwarning("Warning: Timed out crawling {}".format(title))
+ except Exception as e:
+ perror("Error: {}".format(e))
+ logging.error(e)
+ return responses
+
+def crawl_jobs(company): #initial crawl
+ url = "https://www.linkedin.com/jobs/{}-jobs".format(company.replace(' ', '-'))
+ try:
+ response = requests.get(url, timeout=3, headers=headers)
+ return response.text
+ except requests.exceptions.Timeout as e:
+ perror("Error: Timed out. Try again, LinkedIn doesn't like us sometimes")
+ logging.error(e)
+ except requests.exceptions.ReadTimeout as e:
+ perror("Error: Read time out")
+ logging.error(e)
+ except Exception as e:
+ perror("Error: {}".format(e))
+ logging.error(e)
+
+
+def crawl_url(url=None): #page crawls
+ try:
+ response = requests.get(url, timeout=3, headers=headers)
+ return response.text
+ except requests.exceptions.Timeout as e:
+ pwarning("Warning: Timed out")
+ except requests.exceptions.ReadTimeout as e:
+ pwarning("Warning: Read time out")
+ except Exception as e:
+ pwarning("Warning: {}".format(e))
+ logging.error(e)
\ No newline at end of file
diff --git a/lib/logger.py b/lib/logger.py
new file mode 100644
index 0000000..76998a5
--- /dev/null
+++ b/lib/logger.py
@@ -0,0 +1,32 @@
+import logging, sys, time
+
+time_format = time.strftime("%Y-%m-%d %H:%M:%S")
+
+def start_logger(company):
+ handler = logging.FileHandler('./logs/{}_{}.log'.format(company.replace(' ', '_'), time_format.replace(' ', '_')))
+ handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s - %(message)s"))
+
+ logger = logging.getLogger()
+ logger.propagate = False
+ logger.addHandler(handler)
+ logger.setLevel(logging.INFO)
+ logging.getLogger("requests").setLevel(logging.DEBUG)
+
+class colors(object):
+ grey = "\033[0;37m"
+ cyan = "\033[0;36m"
+ yellow = "\033[0;33m"
+ red = "\033[1;31m"
+ normal = "\033[0;00m"
+
+def pstatus(message):
+ print "{} {}{}{}".format(time_format, colors.grey, message, colors.normal)
+
+def presults(message):
+ print "{} {}{}{}".format(time_format, colors.cyan, message, colors.normal)
+
+def pwarning(message):
+ print "{} {}{}{}".format(time_format, colors.yellow, message, colors.normal)
+
+def perror(message):
+ print "{} {}{}{}".format(time_format, colors.red, message, colors.normal)
\ No newline at end of file
diff --git a/lib/soupify.py b/lib/soupify.py
new file mode 100644
index 0000000..570295d
--- /dev/null
+++ b/lib/soupify.py
@@ -0,0 +1,71 @@
+from logger import *
+import BeautifulSoup, json
+
+def soupify(response):
+ try:
+ soupd = BeautifulSoup.BeautifulSoup(response)
+ return soupd
+ except (AttributeError, TypeError) as e:
+ pass
+ except Exception as e:
+ perror("Error: {}".format(e))
+ logging.error("Soupify.py Error: {}".format(e))
+
+def get_employees(soup):
+ try:
+ employees = {}
+ for n, t in zip(soup.findAll('h3', { "class" : "name" }), soup.findAll('p', { "class" : "headline" })):
+ name = u''.join(n.getText()).encode('utf-8')
+ title = u''.join(t.getText()).encode('utf-8')
+ if name and title:
+ employees[name] = title
+ return employees
+ except (AttributeError, TypeError) as e:
+ pass
+ except Exception as e:
+ perror("Error: {}".format(e))
+ logging.error("Soupify.py Error: {}".format(e))
+
+def get_job_links(soup, company):
+ try:
+ job_links = []
+ for link, comp in zip(soup.findAll('a', { "class" : "job-title-link" }), soup.findAll('span', { "class" : "company-name-text" })):
+ if comp.text == company:
+ job_links.append(u''.join(link['href']).encode('utf-8'))
+ return job_links
+ except (AttributeError, TypeError) as e:
+ pass
+ except Exception as e:
+ perror("Error: {}".format(e))
+ logging.error("Soupify.py Error: {}".format(e))
+
+def get_page_links(soup):
+ page_links = []
+ try:
+ for page in soup.findAll('li', { "class" : "page-number"}):
+ a = page.findAll('a')
+ page_links.append(u''.join("https://linkedin.com{}".format(a[0]['href'])).encode('utf-8'))
+ return page_links
+ except (AttributeError, TypeError) as e:
+ pass
+ except Exception as e:
+ perror("Error: {}".format(e))
+ logging.error("Soupify.py Error: {}".format(e))
+
+def get_job_title(soup):
+ try:
+ return u''.join(json.loads(soup.find('code', {"id" : "decoratedJobPostingModule"}).string)['decoratedJobPosting']['jobPosting'].get('title')).encode('utf-8')
+ except (AttributeError, TypeError) as e:
+ pass
+ except Exception as e:
+ perror("Error: {}".format(e))
+ logging.error("Soupify.py Error: {}".format(e))
+
+def get_job_description(soup):
+ try:
+ return u''.join(json.loads(soup.find('code', {"id" : "decoratedJobPostingModule"}).string)['decoratedJobPosting']['jobPosting']['description'].get('rawText')).encode('utf-8')
+ except (AttributeError, TypeError):
+ pass
+ except Exception as e:
+ perror("Error: {}".format(e))
+ logging.error("Soupify.py Error: {}".format(e))
\ No newline at end of file
diff --git a/lib/workbench.py b/lib/workbench.py
new file mode 100644
index 0000000..b2eb2b4
--- /dev/null
+++ b/lib/workbench.py
@@ -0,0 +1,181 @@
+#todo: fix special character output
+import re, json, os, csv, time, codecs
+from logger import *
+
+def identify_tech(data, file):
+ matches = []
+ with open(file) as f:
+ keywords = f.readlines()
+
+ for sentence in data.lower().split("."):
+ keyword_found = []
+ for keyword in keywords:
+ if re.findall('\\b{}\\b'.format(re.escape(keyword.rstrip())), re.escape(sentence)):
+ keyword_found.append(keyword.rstrip())
+ if keyword_found:
+ matches.append({sentence:keyword_found})
+ return matches
+
+def craft_tech(matches):
+ logging.info(matches)
+ unique_techs, html_out, csv_out, json_out = [], [], [], []
+ for title, link in matches.items():
+ techs_per_job = []
+ for url in link.keys():
+ for data in link.get(url):
+ for sentence, techs in data.items():
+ highlight_techs = sentence
+ for tech in techs:
+ if tech not in unique_techs: unique_techs.append(tech)
+ if tech not in techs_per_job: techs_per_job.append(tech)
+ highlight_techs = re.sub('\\b{}\\b'.format(tech), '{}'.format(tech), highlight_techs)
+ html_out.append("{title} | {techs} | {sentence} |
".format(title=title,techs=', '.join(techs),sentence=highlight_techs.replace("\xe2\x80\xa2", " * "),url=url))
+ csv_out.append({"Job Title": title, "Technologies": ', '.join(techs), "Excerpt": sentence, "URL": url})
+ json_out.append({"jobtitle": title, "technologies": ', '.join(techs), "excerpt": sentence, "url": url})
+
+ pstatus('Title: {}'.format(title))
+ presults(', '.join(techs_per_job))
+
+ if unique_techs:
+ pstatus("Unique Technologies:")
+ presults(', '.join(unique_techs))
+
+ return html_out, csv_out, json_out
+
+def craft_employees(employees, eformat):
+ html_out, csv_out, json_out = [], [], []
+ emails = {}
+ if eformat:
+ format = eformat[:eformat.find('@')]
+ domain = eformat[eformat.find('@'):]
+
+ for name in employees.keys():
+ first = [n.split() for n in name.split(',',1)][0][0]
+ last = [n.split() for n in name.split(',',1)][0][-1]
+ email = "{}{}".format(format_email(format, first.lower(), last.lower()), domain)
+ if email:
+ emails[name] = email
+
+ for name, title in employees.items():
+ presults("{} {}".format(name, title[:50].replace('&', '&')))
+ logging.info("Employees identified: {}".format(employees))
+
+ #html output
+ if emails:
+ html_out.append("{name} | {title} | {email} |
".format(name=name, title=title, email=emails.get(name)))
+ csv_out.append({"Employee Name": name, "Title": title, "Email": emails.get(name)})
+ json_out.append({"employeename": name, "title": title, "email": emails.get(name)})
+ else:
+ html_out.append("{name} | {title} | -- |
".format(name=name, title=title))
+ csv_out.append({"Employee Name": name, "Title": title, "Email": "--"})
+ json_out.append({"employeename": name, "title": title, "email": "--"})
+
+ if emails:
+ pstatus("Emails crafted")
+ for name, email in emails.items():
+ presults(email)
+
+
+
+ return html_out, csv_out, json_out
+
+def format_email(format, first, last):
+ try:
+ formats = {
+ 'first.last': '{}.{}'.format(first,last),
+ 'last.first': '{}.{}'.format(last,first),
+ 'firstlast': '{}{}'.format(first,last),
+ 'lastfirst': '{}{}'.format(last,first),
+ 'firstl':'{}{}'.format(first,last[0]),
+ 'lfirst':'{}{}'.format(last[0],first),
+ 'flast': '{}{}'.format(first[0],last),
+ 'lastf': '{}{}'.format(last,first[0]),
+ 'first': first,
+ 'last': last
+ }
+ return formats[format]
+ except Exception as e:
+ logging.error(e)
+
+
+def craft_html(company, tech_html, employee_html, filename):
+ if tech_html:
+ tech_table = """
+ Technologies Identified
+
+
+ Job Title |
+ Technologies |
+ Excerpt |
+
+
+ {techs}
+
+ """.format(techs=' '.join(tech_html))
+ else: tech_table = ""
+
+ if employee_html:
+ employee_table = """
+ Employees Identified
+
+
+ Employee Name |
+ Title |
+ E-mail |
+
+ {html}
+
+ """.format(html=' '.join(employee_html))
+ else: employee_table = ""
+
+ page = """
+
+ InSpy - {company}
+
+
+ InSpy
+ Company: {company}
Date: {time}
+ {tech}
+ {emp}
+
+
+
+ """.format(company=company, time=time.strftime("%Y/%m/%d %H:%M:%S"), tech=tech_table, emp=employee_table)
+
+ with open(os.path.abspath(filename), 'w') as f:
+ f.write(page)
+
+def craft_csv(tech_csv, employee_csv, filename):
+
+ if tech_csv:
+ with open(os.path.abspath(filename), 'w') as csvfile:
+ fieldnames = ["Job Title", "Technologies", "Excerpt", "URL"]
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+ writer.writeheader()
+ for row in tech_csv:
+ writer.writerow(row)
+ writer.writerow({})
+
+ if employee_csv:
+ with open(os.path.abspath(filename), 'a') as csvfile:
+ fieldnames = ["Employee Name", "Title", "Email"]
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+ writer.writeheader()
+ for row in employee_csv:
+ writer.writerow(row)
+
+def craft_json(tech_json, employee_json, filename):
+ if tech_json and employee_json:
+ tech = {"technologies":tech_json}
+ emp = {"employees":employee_json}
+ full_json = tech.copy()
+ full_json.update(emp)
+ elif tech_json:
+ tech = {"technologies":tech_json}
+ full_json = tech
+ elif employee_json:
+ emp = {"employees":employee_json}
+ full_json = emp
+
+ with open(os.path.abspath(filename), 'w') as f:
+ f.write(json.dumps(full_json))
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..bdc7597
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+requests == 2.7.0
+BeautifulSoup == 3.2.1
diff --git a/wordlists/tech-list-large.txt b/wordlists/tech-list-large.txt
new file mode 100644
index 0000000..5c482e8
--- /dev/null
+++ b/wordlists/tech-list-large.txt
@@ -0,0 +1,55 @@
+c#
+ruby
+python
+cms
+azure
+java
+javascript
+cisco
+asa
+meraki
+apache
+iis
+sql
+mysql
+windows
+linux
+unix
+apple
+adobe
+android
+blackberry
+broadband
+cloud
+computing
+dropbox
+ebay
+exchange
+postfix
+sendmail
+encryption
+filesharing
+microsoft
+mobile
+oracle
+juniper
+avaya
+software
+sunos
+as400
+mainframe
+bluecoat
+siem
+intrusion prevention
+intrusion detection
+ids
+ips
+web proxy
+web filter
+antivirus
+anti virus
+dlp
+endpoint detection
+mobile security
+active directory
+vmware
\ No newline at end of file
diff --git a/wordlists/tech-list-small.txt b/wordlists/tech-list-small.txt
new file mode 100644
index 0000000..ac13c8e
--- /dev/null
+++ b/wordlists/tech-list-small.txt
@@ -0,0 +1,10 @@
+c#
+ruby
+python
+windows
+unix
+linux
+antivirus
+ips
+ids
+cisco
\ No newline at end of file
diff --git a/wordlists/title-list-large.txt b/wordlists/title-list-large.txt
new file mode 100644
index 0000000..1e10013
--- /dev/null
+++ b/wordlists/title-list-large.txt
@@ -0,0 +1,45 @@
+chairman
+president
+executive
+deputy
+manager
+staff
+chief
+director
+partner
+owner
+treasurer
+secretary
+associate
+supervisor
+foreman
+counsel
+consultant
+recruiter
+human resources
+hr
+payroll
+administrator
+training
+coordinator
+therapist
+nurse
+pharmacist
+pathologist
+occupational
+marketing
+product
+development
+senior
+project
+software
+developer
+analyst
+engineer
+technician
+accountant
+controller
+financial
+auditor
+tax
+security
\ No newline at end of file
diff --git a/wordlists/title-list-small.txt b/wordlists/title-list-small.txt
new file mode 100644
index 0000000..5b25ecd
--- /dev/null
+++ b/wordlists/title-list-small.txt
@@ -0,0 +1,20 @@
+sales
+marketing
+human resources
+finance
+accounting
+inventory
+quality assurance
+insurance
+licenses
+operational
+customer service
+staff
+research & development
+management
+administration
+engineering
+it
+is
+strategy
+other
\ No newline at end of file