-
Notifications
You must be signed in to change notification settings - Fork 125
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
InSpy v2.0 release
- Loading branch information
Showing
12 changed files
with
620 additions
and
266 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,194 +1,124 @@ | ||
#!/usr/bin/env python2 | ||
|
||
# InSpy - A LinkedIn employee enumerator | ||
# This script enumerates employees from any organization | ||
# using LinkedIn. Please note that this will not harvest all | ||
# employees within a given organization. | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
# | ||
# Author: Jonathan Broche | ||
# Contact: @g0jhonny | ||
# Version: 1.0.1 | ||
# Date: 2015-11-22 | ||
# | ||
# usage: ./inspy.py -c <company> [-d dept/title] [-e email output format] [-i input file with dept/titles] [-o output file] | ||
# example: ./inspy.py -c abc -e [email protected] -o abc_employees.txt | ||
|
||
|
||
import requests, BeautifulSoup, argparse, signal, time, datetime, os | ||
|
||
start_time = time.time() | ||
|
||
class colors: | ||
lightblue = "\033[1;36m" | ||
blue = "\033[1;34m" | ||
normal = "\033[0;00m" | ||
red = "\033[1;31m" | ||
yellow = "\033[1;33m" | ||
white = "\033[1;37m" | ||
green = "\033[1;32m" | ||
|
||
#----------------------------------------# | ||
# HARVEST USERS # | ||
#----------------------------------------# | ||
from lib.logger import * | ||
from lib.soupify import * | ||
from lib.workbench import * | ||
from lib.crawler import * | ||
import os, argparse, sys, time | ||
|
||
parser = argparse.ArgumentParser(description='InSpy - A LinkedIn enumeration tool by Jonathan Broche (@g0jhonny)', version="2.0") | ||
parser.add_argument('company', help="Company name to use for tasks.") | ||
techgroup = parser.add_argument_group(title="Technology Search") | ||
techgroup.add_argument('--techspy', metavar='file', const="wordlists/tech-list-small.txt", nargs='?', help="Crawl LinkedIn job listings for technologies used by the company. Technologies imported from a new line delimited file. [Default: tech-list-small.txt]") | ||
techgroup.add_argument('--limit', metavar='int', type=int, default=50, help="Limit the number of job listings to crawl. [Default: 50]") | ||
empgroup = parser.add_argument_group(title="Employee Harvesting") | ||
empgroup.add_argument('--empspy', metavar='file', const="wordlists/title-list-small.txt", nargs='?', help="Discover employees by title and/or department. Titles and departments are imported from a new line delimited file. [Default: title-list-small.txt]") | ||
empgroup.add_argument('--emailformat', metavar='string', help="Create email addresses for discovered employees using a known format. [Accepted Formats: [email protected], [email protected], [email protected], [email protected], [email protected], [email protected], [email protected], [email protected]]") | ||
outgroup = parser.add_argument_group(title="Output Options") | ||
outgroup.add_argument('--html', metavar='file', help="Print results in HTML file.") | ||
outgroup.add_argument('--csv', metavar='file', help="Print results in CSV format.") | ||
outgroup.add_argument('--json', metavar='file', help="Print results in JSON.") | ||
|
||
if len(sys.argv) == 1: | ||
parser.print_help() | ||
sys.exit(1) | ||
|
||
args = parser.parse_args() | ||
start_logger(args.company) | ||
|
||
print "\nInSpy {}\n".format(parser.version) | ||
|
||
if not args.techspy and not args.empspy: | ||
print "You didn't provide any work for me to do." | ||
sys.exit(1) | ||
|
||
stime = time.time() | ||
tech_html, employee_html, tech_csv, employee_csv, employee_json = [], [], [], [], [] | ||
|
||
if args.techspy: | ||
if os.path.exists(os.path.abspath(args.techspy)): | ||
initial_crawl = crawl_jobs(args.company) | ||
if initial_crawl: | ||
soup = soupify(initial_crawl) | ||
job_links = [] | ||
for link in get_job_links(soup, args.company): | ||
if len(job_links) < args.limit: | ||
job_links.append(link) | ||
if len(job_links) != args.limit: | ||
page_links = get_page_links(soup) | ||
for page in range(len(page_links)): | ||
if len(job_links) == args.limit: break | ||
urlcrawl = crawl_url(page_links[page]) | ||
if urlcrawl: | ||
for link in get_job_links(soupify(urlcrawl), args.company): | ||
if len(job_links) < args.limit: | ||
job_links.append(link) | ||
|
||
pstatus("{} Jobs identified".format(len(job_links))) | ||
if job_links: | ||
techs = {} | ||
for job in range(len(job_links)): | ||
jobresponse = crawl_url(job_links[job]) | ||
if jobresponse: | ||
jobsoup = soupify(jobresponse) | ||
description = get_job_description(jobsoup) | ||
matches = identify_tech(description, os.path.abspath(args.techspy)) | ||
if matches: | ||
title = get_job_title(jobsoup) | ||
techs[title] = {job_links[job]:matches} | ||
|
||
tech_html, tech_csv, tech_json = craft_tech(techs) | ||
else: | ||
perror("No such file or directory: '{}'".format(args.techspy)) | ||
|
||
def inspy_enum(company, dept, ifile): | ||
try: | ||
dept_dictionary = ['sales', 'marketing', 'human resources', 'finance', 'accounting', 'inventory', 'quality assurance', 'insurance', 'licenses', 'operational', 'customer service', 'staff', 'research & development', 'management', 'administration', 'engineering', 'it', 'is', 'strategy', 'other'] | ||
|
||
if args.empspy: | ||
if os.path.exists(os.path.abspath(args.empspy)): | ||
employees = {} | ||
|
||
if dept is not None: | ||
dept_dictionary = [dept.lower()] | ||
|
||
if ifile is not None: | ||
try: | ||
if os.path.exists(ifile): | ||
with open(ifile, 'r') as f: | ||
dept_dictionary = [] | ||
for line in f.readlines(): | ||
if line.rstrip(): | ||
dept_dictionary.append(line.rstrip()) | ||
except IOError as e: | ||
print "{}[!]{} Problem opening the file. {}".format(e) | ||
|
||
for dd in dept_dictionary: | ||
print "{}[*]{} Searching for employees working at {} with '{}' in their title".format(colors.lightblue, colors.normal, company, dd) | ||
|
||
try: | ||
response = requests.get('https://www.linkedin.com/title/{}-at-{}'.format(dd.replace('-', ' '), company.replace('-', ' ')), timeout=2) | ||
if response.status_code == 200: | ||
soup = BeautifulSoup.BeautifulSoup(response.text) | ||
emails = [] | ||
for response in crawl_employees(args.company, os.path.abspath(args.empspy)): | ||
for name, title in get_employees(soupify(response)).items(): | ||
if args.company.lower() in title.lower(): | ||
if not name in employees: | ||
employees[name] = title | ||
|
||
pstatus("{} Employees identified".format(len(employees.keys()))) | ||
if employees: | ||
if args.emailformat: | ||
if args.emailformat[:args.emailformat.find('@')] in ['first.last', 'last.first', 'firstlast', 'lastfirst', 'first', 'last', 'firstl', 'lfirst', 'flast', 'lastf']: | ||
employee_html, employee_csv, employee_json = craft_employees(employees, args.emailformat) | ||
else: | ||
pass | ||
except requests.exceptions.Timeout: | ||
print "{}[!]{} Timeout enumerating the {} department".format(colors.red, colors.normal, dd) | ||
except requests.exceptions.ConnectionError: | ||
print "{}[!]{} Connection error.".format(colors.red, colors.normal) | ||
except requests.exceptions.HTTPError: | ||
print "{}[!]{} HTTP error.".format(colors.red, colors.normal) | ||
|
||
#get employee names | ||
for n, t in zip(soup.findAll('h3', { "class" : "name" }), soup.findAll('p', { "class" : "headline" })): | ||
name = u''.join(n.getText()).encode('utf-8') | ||
title = u''.join(t.getText()).encode('utf-8').replace('&', '&') | ||
|
||
if not name in employees: | ||
employees[name] = title | ||
|
||
return employees | ||
except Exception as e: | ||
print "{}[!]{} Error harvesting users. {}".format(colors.red, colors.normal, e) | ||
|
||
#----------------------------------------# | ||
# EMAILS # | ||
#----------------------------------------# | ||
|
||
def format_email(names, eformat): | ||
emails = [] | ||
for name in names: | ||
spaces = [] | ||
for x,y in enumerate(name): | ||
if ' ' in y: | ||
spaces.append(x) | ||
|
||
if eformat[:eformat.find('@')] == 'flast': | ||
emails.append('{}{}{}'.format(name[0], name[(spaces[-1]+1):], eformat[eformat.find('@'):])) | ||
elif eformat[:eformat.find('@')] == 'lfirst': | ||
emails.append('{}{}{}'.format(name[spaces[-1]+1], name[0:spaces[0]], eformat[eformat.find('@'):])) | ||
elif eformat[:eformat.find('@')] == 'first.last': | ||
emails.append('{}.{}{}'.format(name[0:spaces[0]], name[(spaces[-1]+1):], eformat[eformat.find('@'):])) | ||
elif eformat[:eformat.find('@')] == 'last.first': | ||
emails.append('{}.{}{}'.format(name[(spaces[-1]+1):], name[0:spaces[0]], eformat[eformat.find('@'):])) | ||
|
||
return [e.lower() for e in emails] | ||
|
||
#----------------------------------------# | ||
# OUTPUT # | ||
#----------------------------------------# | ||
|
||
def output(employees, email, company, ofile): | ||
counter = 0 | ||
ge, be = {}, {} | ||
print '\n' | ||
|
||
if email: | ||
for k, e in zip(employees, email): | ||
if company in employees[k].lower(): | ||
if ',' in k: | ||
be[e] = '{}, {}'.format(k, employees[k]) | ||
else: | ||
ge[e] = '{}, {}'.format(k, employees[k]) | ||
print "{}[*]{} {}, {}, {}".format(colors.green, colors.normal, k.replace('&', '&'), employees[k].replace('&', '&'), e) | ||
counter +=1 | ||
else: | ||
for k in employees: | ||
if company in employees[k].lower(): | ||
ge[k] = employees[k] | ||
print "{}[*]{} {} {}".format(colors.green, colors.normal, k.replace('&', '&'), employees[k].replace('&', '&')) | ||
counter +=1 | ||
if be: | ||
print "\n{}[!]{} The following employees have commas in their names. Their emails were not accurate.".format(colors.red, colors.normal) | ||
for k in be: | ||
print "{}[*]{} {}".format(colors.yellow, colors.normal, be[k]) | ||
|
||
if ofile: | ||
with open(ofile, 'w') as f: | ||
f.write("\n" + "-" * 69 + "\n" + "InSpy Output" + "\n" + "-" * 69 + "\n\n") | ||
|
||
if [e for e in ge.keys() if '@' in e]: #if emails in keys | ||
f.write("\n" + "E-mails" + "\n" + "-" * 25 + "\n\n") | ||
for k in ge.keys(): | ||
f.write(k+'\n') | ||
|
||
f.write("\n" + "All" + "\n" + "-" * 25 + "\n\n") | ||
for k in ge: | ||
f.write('{}, {}\n'.format(ge[k], k)) | ||
pwarning("You didn't provide a valid e-mail format. See help (-h) for acceptable formats.") | ||
employee_html, employee_csv, employee_json = craft_employees(employees, None) | ||
else: | ||
for k in ge: | ||
f.write('{}, {}\n'.format(k, ge[k])) | ||
|
||
print "\n{}[*]{} Done! {}{}{} employees found.".format(colors.lightblue, colors.normal, colors.green, counter, colors.normal) | ||
print "{}[*]{} Completed in {:.1f}s\n".format(colors.lightblue, colors.normal, time.time()-start_time) | ||
|
||
#----------------------------------------# | ||
# MAIN # | ||
#----------------------------------------# | ||
|
||
def main(): | ||
print "\n " + "-" * 74 + "\n " + colors.white + "InSpy v1.0 - LinkedIn Employee Enumerator by Jonathan Broche (@g0jhonny)\n " + colors.normal + "-" * 74 + "\n " | ||
parser = argparse.ArgumentParser(description='InSpy - A LinkedIn employee enumerator by Jonathan Broche (@g0jhonny)') | ||
parser.add_argument('-c', '--company', required=True, help='Company name') | ||
parser.add_argument('-d', '--dept', nargs='?', const='', help='Department or title to query employees against. Inspy searches through a predefined list by default.') | ||
parser.add_argument('-e', '--emailformat', help='Email output format. Acceptable formats: [email protected], [email protected], [email protected], [email protected]') | ||
parser.add_argument('-i', '--inputfilename', nargs='?', const='', help='File with list of departments or titles to query employees against (one item per line)') | ||
parser.add_argument('-o', '--outfilename', nargs='?', const='', help='Output results to text file') | ||
args = parser.parse_args() | ||
|
||
employees = inspy_enum(args.company, args.dept, args.inputfilename) | ||
|
||
if args.emailformat: | ||
if args.emailformat.find('@') and args.emailformat[:args.emailformat.find('@')] in {'flast', 'lfirst', 'first.last', 'last.first'}: | ||
if employees is not None: | ||
e = format_email(employees.keys(), args.emailformat) | ||
output(employees, e,args.company.lower(), args.outfilename) | ||
else: | ||
print "{}[!]{} Please provide a valid email address format (i.e., [email protected], [email protected], [email protected], [email protected])".format(colors.red, colors.normal) | ||
employee_html, employee_csv, employee_json = craft_employees(employees, None) | ||
else: | ||
if employees is not None: | ||
output(employees,'',args.company.lower(), args.outfilename) | ||
print os.path.abspath(args.empspy) | ||
perror("No such file or directory: '{}'".format(args.empspy)) | ||
|
||
#output | ||
if args.html: | ||
if tech_html or employee_html: | ||
if tech_html and employee_html: | ||
craft_html(args.company, tech_html, employee_html, args.html) | ||
elif tech_html and not employee_html: | ||
craft_html(args.company, tech_html, None, args.html) | ||
else: | ||
craft_html(args.company, None, employee_html, args.html) | ||
if args.csv: | ||
if tech_csv or employee_csv: | ||
if tech_csv and employee_csv: | ||
craft_csv(tech_csv, employee_csv, args.csv) | ||
elif tech_csv and not employee_csv: | ||
craft_csv(tech_csv, None, args.csv) | ||
else: | ||
craft_csv(None, employee_csv, args.csv) | ||
if args.json: | ||
if tech_json or employee_json: | ||
if tech_json and employee_json: | ||
craft_json(tech_json, employee_json, args.json) | ||
elif tech_json and not employee_json: | ||
craft_json(tech_json, None, args.json) | ||
else: | ||
craft_json(None, employee_json, args.json) | ||
|
||
if __name__ == '__main__': | ||
main() | ||
print "Completed in {:.1f}s".format(time.time()-stime) |
Oops, something went wrong.