diff --git a/InSpy.py b/InSpy.py index 5bb49fd..ff40d9f 100755 --- a/InSpy.py +++ b/InSpy.py @@ -1,24 +1,24 @@ #!/usr/bin/env python2 -# Copyright (c) 2016 Jonathan Broche (@g0jhonny) +# Copyright (c) 2018 Jonathan Broche (@LeapSecurity) -from lib.logger import * -from lib.soupify import * +import argparse, sys, os +from lib.http import * from lib.workbench import * -from lib.crawler import * -import os, argparse, sys, time +from lib.soup import * +from lib.export import * +from lib.logger import * -parser = argparse.ArgumentParser(description='InSpy - A LinkedIn enumeration tool by Jonathan Broche (@g0jhonny)', version="2.0.2") -parser.add_argument('company', help="Company name to use for tasks.") -techgroup = parser.add_argument_group(title="Technology Search") -techgroup.add_argument('--techspy', metavar='file', const="wordlists/tech-list-small.txt", nargs='?', help="Crawl LinkedIn job listings for technologies used by the company. Technologies imported from a new line delimited file. [Default: tech-list-small.txt]") -techgroup.add_argument('--limit', metavar='int', type=int, default=50, help="Limit the number of job listings to crawl. [Default: 50]") -empgroup = parser.add_argument_group(title="Employee Harvesting") -empgroup.add_argument('--empspy', metavar='file', const="wordlists/title-list-small.txt", nargs='?', help="Discover employees by title and/or department. Titles and departments are imported from a new line delimited file. [Default: title-list-small.txt]") -empgroup.add_argument('--emailformat', metavar='string', help="Create email addresses for discovered employees using a known format. [Accepted Formats: first.last@xyz.com, last.first@xyz.com, firstl@xyz.com, lfirst@xyz.com, flast@xyz.com, lastf@xyz.com, first@xyz.com, last@xyz.com]") + +parser = argparse.ArgumentParser(description='InSpy - A LinkedIn enumeration tool by Jonathan Broche (@LeapSecurity)', version="3.0.0") +parser.add_argument('company', help="Company name to use for tasks.") +parser.add_argument('--domain', help="Company domain to use for searching.") +parser.add_argument('--email', help="Email format to create email addresses with. [Accepted Formats: first.last@xyz.com, last.first@xyz.com, firstl@xyz.com, lfirst@xyz.com, flast@xyz.com, lastf@xyz.com, first@xyz.com, last@xyz.com]") +parser.add_argument('--titles', metavar='file', default="wordlists/title-list-small.txt", nargs='?', help="Discover employees by title and/or department. Titles and departments are imported from a new line delimited file. [Default: title-list-small.txt]") outgroup = parser.add_argument_group(title="Output Options") outgroup.add_argument('--html', metavar='file', help="Print results in HTML file.") outgroup.add_argument('--csv', metavar='file', help="Print results in CSV format.") outgroup.add_argument('--json', metavar='file', help="Print results in JSON.") +outgroup.add_argument('--xml', metavar='file', help="Print results in XML.") if len(sys.argv) == 1: parser.print_help() @@ -26,100 +26,57 @@ args = parser.parse_args() start_logger(args.company) +hunterapi = "" #insert hunterio api key here -print "\nInSpy {}\n".format(parser.version) - -if not args.techspy and not args.empspy: - print "You didn't provide any work for me to do." - sys.exit(1) - -stime = time.time() -tech_html, employee_html, tech_csv, employee_csv, tech_json, employee_json = [], [], [], [], [], [] - -if args.techspy: - if os.path.exists(os.path.abspath(args.techspy)): - initial_crawl = crawl_jobs(args.company) - if initial_crawl: - soup = soupify(initial_crawl) - job_links = [] - for link in get_job_links(soup, args.company): - if len(job_links) < args.limit: - job_links.append(link) - if len(job_links) != args.limit: - page_links = get_page_links(soup) - for page in range(len(page_links)): - if len(job_links) == args.limit: break - urlcrawl = crawl_url(page_links[page]) - if urlcrawl: - for link in get_job_links(soupify(urlcrawl), args.company): - if len(job_links) < args.limit: - job_links.append(link) +print "\nInSpy {}".format(parser.version) - pstatus("{} Jobs identified".format(len(job_links))) - if job_links: - techs = {} - for job in range(len(job_links)): - jobresponse = crawl_url(job_links[job]) - if jobresponse: - jobsoup = soupify(jobresponse) - description = get_job_description(jobsoup) - matches = identify_tech(description, os.path.abspath(args.techspy)) - if matches: - title = get_job_title(jobsoup) - techs[title] = {job_links[job]:matches} +if args.domain and not args.email: #search hunterio for email format + domain = args.domain + email = get_email_format(args.domain, hunterapi).replace("{", "").replace("}","") +elif args.email and not args.domain: #search clearbit for domain + email = args.email + domain = get_domain(args.company) +else: #no domain or email provided - fully automate it + domain = get_domain(args.company) + if domain: + email = get_email_format(domain, hunterapi) + if email: email = email.replace("{", "").replace("}","") - tech_html, tech_csv, tech_json = craft_tech(techs) - else: - perror("No such file or directory: '{}'".format(args.techspy)) +if domain and email: + print "\nDomain: {}, Email Format: {}\n".format(domain, email) + employees = {} -if args.empspy: - if os.path.exists(os.path.abspath(args.empspy)): - employees = {} - emails = [] - for response in crawl_employees(args.company, os.path.abspath(args.empspy)): - for name, title in get_employees(soupify(response)).items(): - if args.company.lower() in title.lower(): - if not name in employees: - employees[name] = title + if os.path.exists(os.path.abspath(args.titles)): + for response in search_linkedin(args.company, os.path.abspath(args.titles)): + for name, title in get_employees(soupify(response)).items(): + if args.company.lower() in title.lower(): + if not name in employees: + employees[name] = title + print "\n{} Employees identified".format(len(employees.keys())) + else: + print os.path.abspath(args.titles) + print "No such file or directory: '{}'".format(args.titles) - pstatus("{} Employees identified".format(len(employees.keys()))) - if employees: - if args.emailformat: - if args.emailformat[:args.emailformat.find('@')] in ['first.last', 'last.first', 'firstlast', 'lastfirst', 'first_last', 'last_first', 'first', 'last', 'firstl', 'lfirst', 'flast', 'lastf']: - employee_html, employee_csv, employee_json = craft_employees(employees, args.emailformat) - else: - pwarning("You didn't provide a valid e-mail format. See help (-h) for acceptable formats.") - employee_html, employee_csv, employee_json = craft_employees(employees, None) - else: - employee_html, employee_csv, employee_json = craft_employees(employees, None) - else: - print os.path.abspath(args.empspy) - perror("No such file or directory: '{}'".format(args.empspy)) + if employees: + #output employees + for name, title in employees.iteritems(): + print "{} {}".format(name, title[:50].replace('&', '&')) + + #craft emails + emails = create_emails(employees, domain, email) -#output -if args.html: - if tech_html or employee_html: - if tech_html and employee_html: - craft_html(args.company, tech_html, employee_html, args.html) - elif tech_html and not employee_html: - craft_html(args.company, tech_html, None, args.html) - else: - craft_html(args.company, None, employee_html, args.html) -if args.csv: - if tech_csv or employee_csv: - if tech_csv and employee_csv: - craft_csv(tech_csv, employee_csv, args.csv) - elif tech_csv and not employee_csv: - craft_csv(tech_csv, None, args.csv) - else: - craft_csv(None, employee_csv, args.csv) -if args.json: - if tech_json or employee_json: - if tech_json and employee_json: - craft_json(tech_json, employee_json, args.json) - elif tech_json and not employee_json: - craft_json(tech_json, None, args.json) - else: - craft_json(None, employee_json, args.json) + if emails: + #output emails + print "\nEmails crafted\n".format(len(emails.keys())) + for name, email in emails.items(): + print email -print "Completed in {:.1f}s".format(time.time()-stime) \ No newline at end of file + #export results + if args.html: + output("html", args.html, args.company, domain, employees, emails) + if args.xml: + output("xml", args.xml, args.company, domain, employees, emails) + if args.json: + output("json", args.json, args.company, domain, employees, emails) + if args.csv: + output("csv", args.csv, args.company, domain, employees, emails) \ No newline at end of file diff --git a/LICENSE b/LICENSE index 3e53492..d293ab9 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2016 Jonathan Broche +Copyright (c) 2018 Leap Security Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 0d4ec11..0d92ae7 100644 --- a/README.md +++ b/README.md @@ -3,50 +3,41 @@ ## Introduction ----- -InSpy is a python based LinkedIn enumeration tool. Inspy has two functionalities: TechSpy and EmpSpy. +InSpy is a python based LinkedIn enumeration tool. -- TechSpy - Crawls LinkedIn job listings for technologies used by the provided company. InSpy attempts to identify technologies by matching job descriptions to keywords from a new line delimited file. - -- EmpSpy - Crawls LinkedIn for employees working at the provided company. InSpy searches for employees by title and/or departments from a new line delimited file. InSpy may also create emails for the identified employees if the user specifies an email format. +Version 3.0 introduces the automation of domain and email retrieval in addition to randomized headers and xml output support. ## Installation ----- Run `pip install -r requirements.txt` within the cloned InSpy directory. +Obtain an API key from [HunterIO](https://hunter.io/) and insert it into the hunterio variable within InSpy.py (line 29). + ## Help ----- ``` -InSpy - A LinkedIn enumeration tool by Jonathan Broche (@jonathanbroche) +InSpy - A LinkedIn enumeration tool by Jonathan Broche (@LeapSecurity) positional arguments: - company Company name to use for tasks. + company Company name to use for tasks. optional arguments: - -h, --help show this help message and exit - -v, --version show program's version number and exit - -Technology Search: - --techspy [file] Crawl LinkedIn job listings for technologies used by - the company. Technologies imported from a new line - delimited file. [Default: tech-list-small.txt] - --limit int Limit the number of job listings to crawl. [Default: - 50] - -Employee Harvesting: - --empspy [file] Discover employees by title and/or department. Titles - and departments are imported from a new line delimited - file. [Default: title-list-small.txt] - --emailformat string Create email addresses for discovered employees using - a known format. [Accepted Formats: first.last@xyz.com, - last.first@xyz.com, first_last@xyz.com, last_first@xyz.com, - firstl@xyz.com, lfirst@xyz.com, - flast@xyz.com, lastf@xyz.com, first@xyz.com, - last@xyz.com] + -h, --help show this help message and exit + -v, --version show program's version number and exit + --domain DOMAIN Company domain to use for searching. + --email EMAIL Email format to create email addresses with. [Accepted + Formats: first.last@xyz.com, last.first@xyz.com, + firstl@xyz.com, lfirst@xyz.com, flast@xyz.com, + lastf@xyz.com, first@xyz.com, last@xyz.com] + --titles [file] Discover employees by title and/or department. Titles and + departments are imported from a new line delimited file. + [Default: title-list-small.txt] Output Options: - --html file Print results in HTML file. - --csv file Print results in CSV format. - --json file Print results in JSON. + --html file Print results in HTML file. + --csv file Print results in CSV format. + --json file Print results in JSON. + --xml file Print results in XML. ``` diff --git a/lib/__init__.pyc b/lib/__init__.pyc index e42f2c8..2b69e10 100644 Binary files a/lib/__init__.pyc and b/lib/__init__.pyc differ diff --git a/lib/crawler.py b/lib/crawler.py deleted file mode 100644 index 9a2297c..0000000 --- a/lib/crawler.py +++ /dev/null @@ -1,51 +0,0 @@ -from logger import * -import requests -requests.packages.urllib3.disable_warnings() - -headers={'Host':'www.linkedin.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'} - - -def crawl_employees(company, file): - titles = [] - responses = [] - try: - with open(file) as f: - for title in f.readlines(): - titles.append(title.rstrip()) - for title in titles: - response = requests.get("https://www.linkedin.com/title/{}-at-{}".format(title.replace(' ', '-'), company.replace(' ', '-')), timeout=3, headers=headers) - responses.append(response.text) - except requests.exceptions.Timeout as e: - pwarning("Warning: Timed out crawling {}".format(title)) - except Exception as e: - perror("Error: {}".format(e)) - logging.error(e) - return responses - -def crawl_jobs(company): #initial crawl - url = "https://www.linkedin.com/jobs/{}-jobs".format(company.replace(' ', '-')) - try: - response = requests.get(url, timeout=3, headers=headers) - return response.text - except requests.exceptions.Timeout as e: - perror("Error: Timed out. Try again, LinkedIn doesn't like us sometimes") - logging.error(e) - except requests.exceptions.ReadTimeout as e: - perror("Error: Read time out") - logging.error(e) - except Exception as e: - perror("Error: {}".format(e)) - logging.error(e) - - -def crawl_url(url=None): #page crawls - try: - response = requests.get(url, timeout=3, headers=headers) - return response.text - except requests.exceptions.Timeout as e: - pwarning("Warning: Timed out") - except requests.exceptions.ReadTimeout as e: - pwarning("Warning: Read time out") - except Exception as e: - pwarning("Warning: {}".format(e)) - logging.error(e) \ No newline at end of file diff --git a/lib/crawler.pyc b/lib/crawler.pyc deleted file mode 100644 index fd4bce3..0000000 Binary files a/lib/crawler.pyc and /dev/null differ diff --git a/lib/export.py b/lib/export.py new file mode 100644 index 0000000..3bd7055 --- /dev/null +++ b/lib/export.py @@ -0,0 +1,99 @@ +import json, os, xml.dom.minidom, time +from xml.etree.ElementTree import Element, SubElement, tostring + +def output(format, file, company, domain, employees, emails): + if format == "xml": + oxml(file, company, domain, employees, emails) + if format == "csv": + ocsv(file, company, domain, employees, emails) + if format == "html": + ohtml(file, company, domain, employees, emails) + if format == "json": + ojson(file, company, domain, employees, emails) + +#CSV +def ocsv(filename, company, domain, employees, emails): + with open(os.path.abspath(filename), 'a') as csvfile: + fieldnames = ["Employee Name", "Title", "Email"] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + for name, title in employees.iteritems(): + writer.writerow({"Employee Name": name, "Title": title.replace('&', '&'), "Email": emails[name]}) + +#JSON +def ojson(file, company, domain, employees, emails): + employee_json = [] + + for name, title in employees.iteritems(): + employee_json.append({"name": name, "title": title.replace('&', '&'), "email": emails[name]}) + + full_json = { + "company": {"name":company, "domain": domain}, + "employees": employee_json + } + + with open(os.path.abspath(file), 'w') as f: + f.write(json.dumps(full_json)) + +#XML +def oxml(file, company, domain, employees, emails): + top = Element('InSpy') + cxml = SubElement(top, 'Company') + + #company name + cnxml = SubElement(cxml, "Name") + cnxml.text = company + #company domain + cdxml = SubElement(cxml, "Domain") + cdxml.text = domain + + echild = SubElement(top, 'Employees') + + for name, title in employees.iteritems(): + + employee = SubElement(echild, "Employee") + #name + nxml = SubElement(employee, "Name") + nxml.text = name + #title + txml = SubElement(employee, "Title") + txml.text = title.replace("&", "&") + #email + exml = SubElement(employee, "Email") + exml.text = emails[name] + + fxml = xml.dom.minidom.parseString(tostring(top)) + + with open(os.path.abspath(file), 'w') as f: + f.write(fxml.toprettyxml()) + +#HTML +def ohtml(file, company, domain, employees, emails): + employee_html = [] + + for name, title in employees.iteritems(): + employee_html.append("
Company: {company}
Date: {time}
+Employee Name | +Title | +
---|
Job Title | -Technologies | -Excerpt | -
---|
Employee Name | -Title | -
---|
Company: {company}
Date: {time}
- {tech} - {emp} -