Skip to content

Commit

Permalink
version 2 commit
Browse files Browse the repository at this point in the history
InSpy v2.0 release
  • Loading branch information
jobroche committed Feb 16, 2016
1 parent 668ba9c commit d44710d
Show file tree
Hide file tree
Showing 12 changed files with 620 additions and 266 deletions.
302 changes: 116 additions & 186 deletions InSpy.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,194 +1,124 @@
#!/usr/bin/env python2

# InSpy - A LinkedIn employee enumerator
# This script enumerates employees from any organization
# using LinkedIn. Please note that this will not harvest all
# employees within a given organization.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Author: Jonathan Broche
# Contact: @g0jhonny
# Version: 1.0.1
# Date: 2015-11-22
#
# usage: ./inspy.py -c <company> [-d dept/title] [-e email output format] [-i input file with dept/titles] [-o output file]
# example: ./inspy.py -c abc -e [email protected] -o abc_employees.txt


import requests, BeautifulSoup, argparse, signal, time, datetime, os

start_time = time.time()

class colors:
lightblue = "\033[1;36m"
blue = "\033[1;34m"
normal = "\033[0;00m"
red = "\033[1;31m"
yellow = "\033[1;33m"
white = "\033[1;37m"
green = "\033[1;32m"

#----------------------------------------#
# HARVEST USERS #
#----------------------------------------#
from lib.logger import *
from lib.soupify import *
from lib.workbench import *
from lib.crawler import *
import os, argparse, sys, time

parser = argparse.ArgumentParser(description='InSpy - A LinkedIn enumeration tool by Jonathan Broche (@g0jhonny)', version="2.0")
parser.add_argument('company', help="Company name to use for tasks.")
techgroup = parser.add_argument_group(title="Technology Search")
techgroup.add_argument('--techspy', metavar='file', const="wordlists/tech-list-small.txt", nargs='?', help="Crawl LinkedIn job listings for technologies used by the company. Technologies imported from a new line delimited file. [Default: tech-list-small.txt]")
techgroup.add_argument('--limit', metavar='int', type=int, default=50, help="Limit the number of job listings to crawl. [Default: 50]")
empgroup = parser.add_argument_group(title="Employee Harvesting")
empgroup.add_argument('--empspy', metavar='file', const="wordlists/title-list-small.txt", nargs='?', help="Discover employees by title and/or department. Titles and departments are imported from a new line delimited file. [Default: title-list-small.txt]")
empgroup.add_argument('--emailformat', metavar='string', help="Create email addresses for discovered employees using a known format. [Accepted Formats: [email protected], [email protected], [email protected], [email protected], [email protected], [email protected], [email protected], [email protected]]")
outgroup = parser.add_argument_group(title="Output Options")
outgroup.add_argument('--html', metavar='file', help="Print results in HTML file.")
outgroup.add_argument('--csv', metavar='file', help="Print results in CSV format.")
outgroup.add_argument('--json', metavar='file', help="Print results in JSON.")

if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)

args = parser.parse_args()
start_logger(args.company)

print "\nInSpy {}\n".format(parser.version)

if not args.techspy and not args.empspy:
print "You didn't provide any work for me to do."
sys.exit(1)

stime = time.time()
tech_html, employee_html, tech_csv, employee_csv, employee_json = [], [], [], [], []

if args.techspy:
if os.path.exists(os.path.abspath(args.techspy)):
initial_crawl = crawl_jobs(args.company)
if initial_crawl:
soup = soupify(initial_crawl)
job_links = []
for link in get_job_links(soup, args.company):
if len(job_links) < args.limit:
job_links.append(link)
if len(job_links) != args.limit:
page_links = get_page_links(soup)
for page in range(len(page_links)):
if len(job_links) == args.limit: break
urlcrawl = crawl_url(page_links[page])
if urlcrawl:
for link in get_job_links(soupify(urlcrawl), args.company):
if len(job_links) < args.limit:
job_links.append(link)

pstatus("{} Jobs identified".format(len(job_links)))
if job_links:
techs = {}
for job in range(len(job_links)):
jobresponse = crawl_url(job_links[job])
if jobresponse:
jobsoup = soupify(jobresponse)
description = get_job_description(jobsoup)
matches = identify_tech(description, os.path.abspath(args.techspy))
if matches:
title = get_job_title(jobsoup)
techs[title] = {job_links[job]:matches}

tech_html, tech_csv, tech_json = craft_tech(techs)
else:
perror("No such file or directory: '{}'".format(args.techspy))

def inspy_enum(company, dept, ifile):
try:
dept_dictionary = ['sales', 'marketing', 'human resources', 'finance', 'accounting', 'inventory', 'quality assurance', 'insurance', 'licenses', 'operational', 'customer service', 'staff', 'research & development', 'management', 'administration', 'engineering', 'it', 'is', 'strategy', 'other']

if args.empspy:
if os.path.exists(os.path.abspath(args.empspy)):
employees = {}

if dept is not None:
dept_dictionary = [dept.lower()]

if ifile is not None:
try:
if os.path.exists(ifile):
with open(ifile, 'r') as f:
dept_dictionary = []
for line in f.readlines():
if line.rstrip():
dept_dictionary.append(line.rstrip())
except IOError as e:
print "{}[!]{} Problem opening the file. {}".format(e)

for dd in dept_dictionary:
print "{}[*]{} Searching for employees working at {} with '{}' in their title".format(colors.lightblue, colors.normal, company, dd)

try:
response = requests.get('https://www.linkedin.com/title/{}-at-{}'.format(dd.replace('-', ' '), company.replace('-', ' ')), timeout=2)
if response.status_code == 200:
soup = BeautifulSoup.BeautifulSoup(response.text)
emails = []
for response in crawl_employees(args.company, os.path.abspath(args.empspy)):
for name, title in get_employees(soupify(response)).items():
if args.company.lower() in title.lower():
if not name in employees:
employees[name] = title

pstatus("{} Employees identified".format(len(employees.keys())))
if employees:
if args.emailformat:
if args.emailformat[:args.emailformat.find('@')] in ['first.last', 'last.first', 'firstlast', 'lastfirst', 'first', 'last', 'firstl', 'lfirst', 'flast', 'lastf']:
employee_html, employee_csv, employee_json = craft_employees(employees, args.emailformat)
else:
pass
except requests.exceptions.Timeout:
print "{}[!]{} Timeout enumerating the {} department".format(colors.red, colors.normal, dd)
except requests.exceptions.ConnectionError:
print "{}[!]{} Connection error.".format(colors.red, colors.normal)
except requests.exceptions.HTTPError:
print "{}[!]{} HTTP error.".format(colors.red, colors.normal)

#get employee names
for n, t in zip(soup.findAll('h3', { "class" : "name" }), soup.findAll('p', { "class" : "headline" })):
name = u''.join(n.getText()).encode('utf-8')
title = u''.join(t.getText()).encode('utf-8').replace('&amp;', '&')

if not name in employees:
employees[name] = title

return employees
except Exception as e:
print "{}[!]{} Error harvesting users. {}".format(colors.red, colors.normal, e)

#----------------------------------------#
# EMAILS #
#----------------------------------------#

def format_email(names, eformat):
emails = []
for name in names:
spaces = []
for x,y in enumerate(name):
if ' ' in y:
spaces.append(x)

if eformat[:eformat.find('@')] == 'flast':
emails.append('{}{}{}'.format(name[0], name[(spaces[-1]+1):], eformat[eformat.find('@'):]))
elif eformat[:eformat.find('@')] == 'lfirst':
emails.append('{}{}{}'.format(name[spaces[-1]+1], name[0:spaces[0]], eformat[eformat.find('@'):]))
elif eformat[:eformat.find('@')] == 'first.last':
emails.append('{}.{}{}'.format(name[0:spaces[0]], name[(spaces[-1]+1):], eformat[eformat.find('@'):]))
elif eformat[:eformat.find('@')] == 'last.first':
emails.append('{}.{}{}'.format(name[(spaces[-1]+1):], name[0:spaces[0]], eformat[eformat.find('@'):]))

return [e.lower() for e in emails]

#----------------------------------------#
# OUTPUT #
#----------------------------------------#

def output(employees, email, company, ofile):
counter = 0
ge, be = {}, {}
print '\n'

if email:
for k, e in zip(employees, email):
if company in employees[k].lower():
if ',' in k:
be[e] = '{}, {}'.format(k, employees[k])
else:
ge[e] = '{}, {}'.format(k, employees[k])
print "{}[*]{} {}, {}, {}".format(colors.green, colors.normal, k.replace('&amp;', '&'), employees[k].replace('&amp;', '&'), e)
counter +=1
else:
for k in employees:
if company in employees[k].lower():
ge[k] = employees[k]
print "{}[*]{} {} {}".format(colors.green, colors.normal, k.replace('&amp;', '&'), employees[k].replace('&amp;', '&'))
counter +=1
if be:
print "\n{}[!]{} The following employees have commas in their names. Their emails were not accurate.".format(colors.red, colors.normal)
for k in be:
print "{}[*]{} {}".format(colors.yellow, colors.normal, be[k])

if ofile:
with open(ofile, 'w') as f:
f.write("\n" + "-" * 69 + "\n" + "InSpy Output" + "\n" + "-" * 69 + "\n\n")

if [e for e in ge.keys() if '@' in e]: #if emails in keys
f.write("\n" + "E-mails" + "\n" + "-" * 25 + "\n\n")
for k in ge.keys():
f.write(k+'\n')

f.write("\n" + "All" + "\n" + "-" * 25 + "\n\n")
for k in ge:
f.write('{}, {}\n'.format(ge[k], k))
pwarning("You didn't provide a valid e-mail format. See help (-h) for acceptable formats.")
employee_html, employee_csv, employee_json = craft_employees(employees, None)
else:
for k in ge:
f.write('{}, {}\n'.format(k, ge[k]))

print "\n{}[*]{} Done! {}{}{} employees found.".format(colors.lightblue, colors.normal, colors.green, counter, colors.normal)
print "{}[*]{} Completed in {:.1f}s\n".format(colors.lightblue, colors.normal, time.time()-start_time)

#----------------------------------------#
# MAIN #
#----------------------------------------#

def main():
print "\n " + "-" * 74 + "\n " + colors.white + "InSpy v1.0 - LinkedIn Employee Enumerator by Jonathan Broche (@g0jhonny)\n " + colors.normal + "-" * 74 + "\n "
parser = argparse.ArgumentParser(description='InSpy - A LinkedIn employee enumerator by Jonathan Broche (@g0jhonny)')
parser.add_argument('-c', '--company', required=True, help='Company name')
parser.add_argument('-d', '--dept', nargs='?', const='', help='Department or title to query employees against. Inspy searches through a predefined list by default.')
parser.add_argument('-e', '--emailformat', help='Email output format. Acceptable formats: [email protected], [email protected], [email protected], [email protected]')
parser.add_argument('-i', '--inputfilename', nargs='?', const='', help='File with list of departments or titles to query employees against (one item per line)')
parser.add_argument('-o', '--outfilename', nargs='?', const='', help='Output results to text file')
args = parser.parse_args()

employees = inspy_enum(args.company, args.dept, args.inputfilename)

if args.emailformat:
if args.emailformat.find('@') and args.emailformat[:args.emailformat.find('@')] in {'flast', 'lfirst', 'first.last', 'last.first'}:
if employees is not None:
e = format_email(employees.keys(), args.emailformat)
output(employees, e,args.company.lower(), args.outfilename)
else:
print "{}[!]{} Please provide a valid email address format (i.e., [email protected], [email protected], [email protected], [email protected])".format(colors.red, colors.normal)
employee_html, employee_csv, employee_json = craft_employees(employees, None)
else:
if employees is not None:
output(employees,'',args.company.lower(), args.outfilename)
print os.path.abspath(args.empspy)
perror("No such file or directory: '{}'".format(args.empspy))

#output
if args.html:
if tech_html or employee_html:
if tech_html and employee_html:
craft_html(args.company, tech_html, employee_html, args.html)
elif tech_html and not employee_html:
craft_html(args.company, tech_html, None, args.html)
else:
craft_html(args.company, None, employee_html, args.html)
if args.csv:
if tech_csv or employee_csv:
if tech_csv and employee_csv:
craft_csv(tech_csv, employee_csv, args.csv)
elif tech_csv and not employee_csv:
craft_csv(tech_csv, None, args.csv)
else:
craft_csv(None, employee_csv, args.csv)
if args.json:
if tech_json or employee_json:
if tech_json and employee_json:
craft_json(tech_json, employee_json, args.json)
elif tech_json and not employee_json:
craft_json(tech_json, None, args.json)
else:
craft_json(None, employee_json, args.json)

if __name__ == '__main__':
main()
print "Completed in {:.1f}s".format(time.time()-stime)
Loading

0 comments on commit d44710d

Please sign in to comment.