diff --git a/InSpy.py b/InSpy.py old mode 100644 new mode 100755 index a29b215..1187a33 --- a/InSpy.py +++ b/InSpy.py @@ -1,194 +1,124 @@ #!/usr/bin/env python2 -# InSpy - A LinkedIn employee enumerator -# This script enumerates employees from any organization -# using LinkedIn. Please note that this will not harvest all -# employees within a given organization. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# -# Author: Jonathan Broche -# Contact: @g0jhonny -# Version: 1.0.1 -# Date: 2015-11-22 -# -# usage: ./inspy.py -c [-d dept/title] [-e email output format] [-i input file with dept/titles] [-o output file] -# example: ./inspy.py -c abc -e flast@abc.com -o abc_employees.txt - - -import requests, BeautifulSoup, argparse, signal, time, datetime, os - -start_time = time.time() - -class colors: - lightblue = "\033[1;36m" - blue = "\033[1;34m" - normal = "\033[0;00m" - red = "\033[1;31m" - yellow = "\033[1;33m" - white = "\033[1;37m" - green = "\033[1;32m" - -#----------------------------------------# -# HARVEST USERS # -#----------------------------------------# +from lib.logger import * +from lib.soupify import * +from lib.workbench import * +from lib.crawler import * +import os, argparse, sys, time + +parser = argparse.ArgumentParser(description='InSpy - A LinkedIn enumeration tool by Jonathan Broche (@g0jhonny)', version="2.0") +parser.add_argument('company', help="Company name to use for tasks.") +techgroup = parser.add_argument_group(title="Technology Search") +techgroup.add_argument('--techspy', metavar='file', const="wordlists/tech-list-small.txt", nargs='?', help="Crawl LinkedIn job listings for technologies used by the company. Technologies imported from a new line delimited file. [Default: tech-list-small.txt]") +techgroup.add_argument('--limit', metavar='int', type=int, default=50, help="Limit the number of job listings to crawl. [Default: 50]") +empgroup = parser.add_argument_group(title="Employee Harvesting") +empgroup.add_argument('--empspy', metavar='file', const="wordlists/title-list-small.txt", nargs='?', help="Discover employees by title and/or department. Titles and departments are imported from a new line delimited file. [Default: title-list-small.txt]") +empgroup.add_argument('--emailformat', metavar='string', help="Create email addresses for discovered employees using a known format. [Accepted Formats: first.last@xyz.com, last.first@xyz.com, firstl@xyz.com, lfirst@xyz.com, flast@xyz.com, lastf@xyz.com, first@xyz.com, last@xyz.com]") +outgroup = parser.add_argument_group(title="Output Options") +outgroup.add_argument('--html', metavar='file', help="Print results in HTML file.") +outgroup.add_argument('--csv', metavar='file', help="Print results in CSV format.") +outgroup.add_argument('--json', metavar='file', help="Print results in JSON.") + +if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + +args = parser.parse_args() +start_logger(args.company) + +print "\nInSpy {}\n".format(parser.version) + +if not args.techspy and not args.empspy: + print "You didn't provide any work for me to do." + sys.exit(1) + +stime = time.time() +tech_html, employee_html, tech_csv, employee_csv, employee_json = [], [], [], [], [] + +if args.techspy: + if os.path.exists(os.path.abspath(args.techspy)): + initial_crawl = crawl_jobs(args.company) + if initial_crawl: + soup = soupify(initial_crawl) + job_links = [] + for link in get_job_links(soup, args.company): + if len(job_links) < args.limit: + job_links.append(link) + if len(job_links) != args.limit: + page_links = get_page_links(soup) + for page in range(len(page_links)): + if len(job_links) == args.limit: break + urlcrawl = crawl_url(page_links[page]) + if urlcrawl: + for link in get_job_links(soupify(urlcrawl), args.company): + if len(job_links) < args.limit: + job_links.append(link) + + pstatus("{} Jobs identified".format(len(job_links))) + if job_links: + techs = {} + for job in range(len(job_links)): + jobresponse = crawl_url(job_links[job]) + if jobresponse: + jobsoup = soupify(jobresponse) + description = get_job_description(jobsoup) + matches = identify_tech(description, os.path.abspath(args.techspy)) + if matches: + title = get_job_title(jobsoup) + techs[title] = {job_links[job]:matches} + + tech_html, tech_csv, tech_json = craft_tech(techs) + else: + perror("No such file or directory: '{}'".format(args.techspy)) -def inspy_enum(company, dept, ifile): - try: - dept_dictionary = ['sales', 'marketing', 'human resources', 'finance', 'accounting', 'inventory', 'quality assurance', 'insurance', 'licenses', 'operational', 'customer service', 'staff', 'research & development', 'management', 'administration', 'engineering', 'it', 'is', 'strategy', 'other'] - +if args.empspy: + if os.path.exists(os.path.abspath(args.empspy)): employees = {} - - if dept is not None: - dept_dictionary = [dept.lower()] - - if ifile is not None: - try: - if os.path.exists(ifile): - with open(ifile, 'r') as f: - dept_dictionary = [] - for line in f.readlines(): - if line.rstrip(): - dept_dictionary.append(line.rstrip()) - except IOError as e: - print "{}[!]{} Problem opening the file. {}".format(e) - - for dd in dept_dictionary: - print "{}[*]{} Searching for employees working at {} with '{}' in their title".format(colors.lightblue, colors.normal, company, dd) - - try: - response = requests.get('https://www.linkedin.com/title/{}-at-{}'.format(dd.replace('-', ' '), company.replace('-', ' ')), timeout=2) - if response.status_code == 200: - soup = BeautifulSoup.BeautifulSoup(response.text) + emails = [] + for response in crawl_employees(args.company, os.path.abspath(args.empspy)): + for name, title in get_employees(soupify(response)).items(): + if args.company.lower() in title.lower(): + if not name in employees: + employees[name] = title + + pstatus("{} Employees identified".format(len(employees.keys()))) + if employees: + if args.emailformat: + if args.emailformat[:args.emailformat.find('@')] in ['first.last', 'last.first', 'firstlast', 'lastfirst', 'first', 'last', 'firstl', 'lfirst', 'flast', 'lastf']: + employee_html, employee_csv, employee_json = craft_employees(employees, args.emailformat) else: - pass - except requests.exceptions.Timeout: - print "{}[!]{} Timeout enumerating the {} department".format(colors.red, colors.normal, dd) - except requests.exceptions.ConnectionError: - print "{}[!]{} Connection error.".format(colors.red, colors.normal) - except requests.exceptions.HTTPError: - print "{}[!]{} HTTP error.".format(colors.red, colors.normal) - - #get employee names - for n, t in zip(soup.findAll('h3', { "class" : "name" }), soup.findAll('p', { "class" : "headline" })): - name = u''.join(n.getText()).encode('utf-8') - title = u''.join(t.getText()).encode('utf-8').replace('&', '&') - - if not name in employees: - employees[name] = title - - return employees - except Exception as e: - print "{}[!]{} Error harvesting users. {}".format(colors.red, colors.normal, e) - -#----------------------------------------# -# EMAILS # -#----------------------------------------# - -def format_email(names, eformat): - emails = [] - for name in names: - spaces = [] - for x,y in enumerate(name): - if ' ' in y: - spaces.append(x) - - if eformat[:eformat.find('@')] == 'flast': - emails.append('{}{}{}'.format(name[0], name[(spaces[-1]+1):], eformat[eformat.find('@'):])) - elif eformat[:eformat.find('@')] == 'lfirst': - emails.append('{}{}{}'.format(name[spaces[-1]+1], name[0:spaces[0]], eformat[eformat.find('@'):])) - elif eformat[:eformat.find('@')] == 'first.last': - emails.append('{}.{}{}'.format(name[0:spaces[0]], name[(spaces[-1]+1):], eformat[eformat.find('@'):])) - elif eformat[:eformat.find('@')] == 'last.first': - emails.append('{}.{}{}'.format(name[(spaces[-1]+1):], name[0:spaces[0]], eformat[eformat.find('@'):])) - - return [e.lower() for e in emails] - -#----------------------------------------# -# OUTPUT # -#----------------------------------------# - -def output(employees, email, company, ofile): - counter = 0 - ge, be = {}, {} - print '\n' - - if email: - for k, e in zip(employees, email): - if company in employees[k].lower(): - if ',' in k: - be[e] = '{}, {}'.format(k, employees[k]) - else: - ge[e] = '{}, {}'.format(k, employees[k]) - print "{}[*]{} {}, {}, {}".format(colors.green, colors.normal, k.replace('&', '&'), employees[k].replace('&', '&'), e) - counter +=1 - else: - for k in employees: - if company in employees[k].lower(): - ge[k] = employees[k] - print "{}[*]{} {} {}".format(colors.green, colors.normal, k.replace('&', '&'), employees[k].replace('&', '&')) - counter +=1 - if be: - print "\n{}[!]{} The following employees have commas in their names. Their emails were not accurate.".format(colors.red, colors.normal) - for k in be: - print "{}[*]{} {}".format(colors.yellow, colors.normal, be[k]) - - if ofile: - with open(ofile, 'w') as f: - f.write("\n" + "-" * 69 + "\n" + "InSpy Output" + "\n" + "-" * 69 + "\n\n") - - if [e for e in ge.keys() if '@' in e]: #if emails in keys - f.write("\n" + "E-mails" + "\n" + "-" * 25 + "\n\n") - for k in ge.keys(): - f.write(k+'\n') - - f.write("\n" + "All" + "\n" + "-" * 25 + "\n\n") - for k in ge: - f.write('{}, {}\n'.format(ge[k], k)) + pwarning("You didn't provide a valid e-mail format. See help (-h) for acceptable formats.") + employee_html, employee_csv, employee_json = craft_employees(employees, None) else: - for k in ge: - f.write('{}, {}\n'.format(k, ge[k])) - - print "\n{}[*]{} Done! {}{}{} employees found.".format(colors.lightblue, colors.normal, colors.green, counter, colors.normal) - print "{}[*]{} Completed in {:.1f}s\n".format(colors.lightblue, colors.normal, time.time()-start_time) - -#----------------------------------------# -# MAIN # -#----------------------------------------# - -def main(): - print "\n " + "-" * 74 + "\n " + colors.white + "InSpy v1.0 - LinkedIn Employee Enumerator by Jonathan Broche (@g0jhonny)\n " + colors.normal + "-" * 74 + "\n " - parser = argparse.ArgumentParser(description='InSpy - A LinkedIn employee enumerator by Jonathan Broche (@g0jhonny)') - parser.add_argument('-c', '--company', required=True, help='Company name') - parser.add_argument('-d', '--dept', nargs='?', const='', help='Department or title to query employees against. Inspy searches through a predefined list by default.') - parser.add_argument('-e', '--emailformat', help='Email output format. Acceptable formats: first.last@xyz.com, last.first@xyz.com, flast@xyz.com, lastf@xyz.com') - parser.add_argument('-i', '--inputfilename', nargs='?', const='', help='File with list of departments or titles to query employees against (one item per line)') - parser.add_argument('-o', '--outfilename', nargs='?', const='', help='Output results to text file') - args = parser.parse_args() - - employees = inspy_enum(args.company, args.dept, args.inputfilename) - - if args.emailformat: - if args.emailformat.find('@') and args.emailformat[:args.emailformat.find('@')] in {'flast', 'lfirst', 'first.last', 'last.first'}: - if employees is not None: - e = format_email(employees.keys(), args.emailformat) - output(employees, e,args.company.lower(), args.outfilename) - else: - print "{}[!]{} Please provide a valid email address format (i.e., flast@xyz.com, lfirst@xyz.com, first.last@xyz.com, last.first@xyz.com)".format(colors.red, colors.normal) + employee_html, employee_csv, employee_json = craft_employees(employees, None) else: - if employees is not None: - output(employees,'',args.company.lower(), args.outfilename) + print os.path.abspath(args.empspy) + perror("No such file or directory: '{}'".format(args.empspy)) + +#output +if args.html: + if tech_html or employee_html: + if tech_html and employee_html: + craft_html(args.company, tech_html, employee_html, args.html) + elif tech_html and not employee_html: + craft_html(args.company, tech_html, None, args.html) + else: + craft_html(args.company, None, employee_html, args.html) +if args.csv: + if tech_csv or employee_csv: + if tech_csv and employee_csv: + craft_csv(tech_csv, employee_csv, args.csv) + elif tech_csv and not employee_csv: + craft_csv(tech_csv, None, args.csv) + else: + craft_csv(None, employee_csv, args.csv) +if args.json: + if tech_json or employee_json: + if tech_json and employee_json: + craft_json(tech_json, employee_json, args.json) + elif tech_json and not employee_json: + craft_json(tech_json, None, args.json) + else: + craft_json(None, employee_json, args.json) -if __name__ == '__main__': - main() \ No newline at end of file +print "Completed in {:.1f}s".format(time.time()-stime) \ No newline at end of file diff --git a/README.md b/README.md index 66828ab..e254ad5 100644 --- a/README.md +++ b/README.md @@ -1,93 +1,51 @@ # InSpy -A python based LinkedIn employee enumerator. This script is great for social engineering assessments where clients ask one -to provide employee emails. +##Introduction +----- -### Help +InSpy is a python based LinkedIn enumeration tool. Inspy has two functionalities: TechSpy and EmpSpy. -``` -InSpy - A LinkedIn employee enumerator by Jonathan Broche (@g0jhonny) - -optional arguments: - -h, --help show this help message and exit - -c COMPANY, --company COMPANY - Company name - -d [DEPT], --dept [DEPT] - Department or title to query employees against. Inspy - searches through a predefined list by default. - -e EMAILFORMAT, --emailformat EMAILFORMAT - Email output format. Acceptable formats: - first.last@xyz.com, last.first@xyz.com, flast@xyz.com, - lastf@xyz.com - -i [INPUTFILENAME], --inputfilename [INPUTFILENAME] - File with list of departments or titles to query - employees against (one item per line) - -o [OUTFILENAME], --outfilename [OUTFILENAME] - Output results to text file -``` -### Examples - -``` -./InSpy.py -c "acme corp" +- TechSpy - Crawls LinkedIn job listings for technlogoies used by the provided company. InSpy attempts to identify technologies by matching job descriptions to keywords from a new line delimited file. - -------------------------------------------------------------------------- - InSpy v1.0 - LinkedIn User Enumerator by Jonathan Broche (@g0jhonny) - -------------------------------------------------------------------------- - -[*] Searching for employees working at acme corp with 'sales' in their title -[*] Searching for employees working at acme corp with 'hr' in their title -[*] Searching for employees working at acme corp with 'marketing' in their title -[*] Searching for employees working at acme corp with 'finance' in their title -[*] Searching for employees working at acme corp with 'accounting' in their title -[*] Searching for employees working at acme corp with 'director' in their title -[*] Searching for employees working at acme corp with 'administrative' in their title -[*] Searching for employees working at acme corp with 'lawyer' in their title -[*] Searching for employees working at acme corp with 'it' in their title -[*] Searching for employees working at acme corp with 'security' in their title +- EmpSpy - Crawls LinkedIn for employees working at the provided company. InSpy searches for employees by title and/or departments from a new line delimited file. InSpy may also create emails for the identified employees if the user specifies an email format. +## Installation +----- -[*] Proud Arkie Accounts Receivable specialist at Acme Corp. -[*] Brian Russo Finance Manager at Acme corp -[*] Paul Samuelson Director of Customer Support at ACME Corp. Production Resources -[*] Steve Smith Developer at Acme Corp -[*] Sarah Rhodes Director of Sales at Acme Corp -[*] Frances Jones Assistant to the Director at Acme Corp - ...snip... +Run `pip install -r requirements.txt` within the cloned InSpy directory. -[*] Done! 29 employees found. -[*] Completed in 28.7s -``` - -Provide InSpy with the email format of the respective corporation and it'll output the emails for you. +## Usage +----- ``` -./InSpy.py -c 'acme corp' -e flast@acme.com - - -------------------------------------------------------------------------- - InSpy v1.0 - LinkedIn User Enumerator by Jonathan Broche (@g0jhonny) - -------------------------------------------------------------------------- - -[*] Searching for employees working at acme corp with 'sales' in their title -[*] Searching for employees working at acme corp with 'hr' in their title -[*] Searching for employees working at acme corp with 'marketing' in their title -[*] Searching for employees working at acme corp with 'finance' in their title -[*] Searching for employees working at acme corp with 'accounting' in their title -[*] Searching for employees working at acme corp with 'director' in their title -[*] Searching for employees working at acme corp with 'administrative' in their title -[*] Searching for employees working at acme corp with 'lawyer' in their title -[*] Searching for employees working at acme corp with 'it' in their title -[*] Searching for employees working at acme corp with 'security' in their title - +InSpy - A LinkedIn enumeration tool by Jonathan Broche (@g0jhonny) -[*] Proud Arkie, Accounts Receivable specialist at Acme Corp., parkie@acme.com -[*] Brian Russo, Finance Manager at Acme corp, brusso@acme.com -[*] Paul Samuelson, Director of Customer Support at ACME Corp. Production Resources, psamuelson@acme.com -[*] Steve Smith, Developer at Acme Corp, ssmith@acme.com -[*] Sarah Rhodes, Director of Sales at Acme Corp, srhodes@acme.com -[*] Frances Jones, Assistant to the Director at Acme Corp, fjones@acme.com - ...snip... - -[*] Done! 29 employees found. -[*] Completed in 29.0s +positional arguments: + company Company name to use for tasks. +optional arguments: + -h, --help show this help message and exit + -v, --version show program's version number and exit + +Technology Search: + --techspy [file] Crawl LinkedIn job listings for technologies used by + the company. Technologies imported from a new line + delimited file. [Default: tech-list-small.txt] + --limit int Limit the number of job listings to crawl. [Default: + 50] + +Employee Harvesting: + --empspy [file] Discover employees by title and/or department. Titles + and departments are imported from a new line delimited + file. [Default: title-list-small.txt] + --emailformat string Create email addresses for discovered employees using + a known format. [Accepted Formats: first.last@xyz.com, + last.first@xyz.com, firstl@xyz.com, lfirst@xyz.com, + flast@xyz.com, lastf@xyz.com, first@xyz.com, + last@xyz.com] + +Output Options: + --html file Print results in HTML file. + --csv file Print results in CSV format. + --json file Print results in JSON. ``` \ No newline at end of file diff --git a/lib/__init__.py b/lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/crawler.py b/lib/crawler.py new file mode 100644 index 0000000..4fa6725 --- /dev/null +++ b/lib/crawler.py @@ -0,0 +1,50 @@ +from logger import * +import requests +requests.packages.urllib3.disable_warnings() + +headers={'Host':'www.linkedin.com', 'User-Agent':'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3'} + +def crawl_employees(company, file): + titles = [] + responses = [] + try: + with open(file) as f: + for title in f.readlines(): + titles.append(title.rstrip()) + for title in titles: + response = requests.get("https://www.linkedin.com/title/{}-at-{}".format(title.replace(' ', '-'), company.replace(' ', '-')), timeout=3, headers=headers) + responses.append(response.text) + except requests.exceptions.Timeout as e: + pwarning("Warning: Timed out crawling {}".format(title)) + except Exception as e: + perror("Error: {}".format(e)) + logging.error(e) + return responses + +def crawl_jobs(company): #initial crawl + url = "https://www.linkedin.com/jobs/{}-jobs".format(company.replace(' ', '-')) + try: + response = requests.get(url, timeout=3, headers=headers) + return response.text + except requests.exceptions.Timeout as e: + perror("Error: Timed out. Try again, LinkedIn doesn't like us sometimes") + logging.error(e) + except requests.exceptions.ReadTimeout as e: + perror("Error: Read time out") + logging.error(e) + except Exception as e: + perror("Error: {}".format(e)) + logging.error(e) + + +def crawl_url(url=None): #page crawls + try: + response = requests.get(url, timeout=3, headers=headers) + return response.text + except requests.exceptions.Timeout as e: + pwarning("Warning: Timed out") + except requests.exceptions.ReadTimeout as e: + pwarning("Warning: Read time out") + except Exception as e: + pwarning("Warning: {}".format(e)) + logging.error(e) \ No newline at end of file diff --git a/lib/logger.py b/lib/logger.py new file mode 100644 index 0000000..76998a5 --- /dev/null +++ b/lib/logger.py @@ -0,0 +1,32 @@ +import logging, sys, time + +time_format = time.strftime("%Y-%m-%d %H:%M:%S") + +def start_logger(company): + handler = logging.FileHandler('./logs/{}_{}.log'.format(company.replace(' ', '_'), time_format.replace(' ', '_'))) + handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s - %(message)s")) + + logger = logging.getLogger() + logger.propagate = False + logger.addHandler(handler) + logger.setLevel(logging.INFO) + logging.getLogger("requests").setLevel(logging.DEBUG) + +class colors(object): + grey = "\033[0;37m" + cyan = "\033[0;36m" + yellow = "\033[0;33m" + red = "\033[1;31m" + normal = "\033[0;00m" + +def pstatus(message): + print "{} {}{}{}".format(time_format, colors.grey, message, colors.normal) + +def presults(message): + print "{} {}{}{}".format(time_format, colors.cyan, message, colors.normal) + +def pwarning(message): + print "{} {}{}{}".format(time_format, colors.yellow, message, colors.normal) + +def perror(message): + print "{} {}{}{}".format(time_format, colors.red, message, colors.normal) \ No newline at end of file diff --git a/lib/soupify.py b/lib/soupify.py new file mode 100644 index 0000000..570295d --- /dev/null +++ b/lib/soupify.py @@ -0,0 +1,71 @@ +from logger import * +import BeautifulSoup, json + +def soupify(response): + try: + soupd = BeautifulSoup.BeautifulSoup(response) + return soupd + except (AttributeError, TypeError) as e: + pass + except Exception as e: + perror("Error: {}".format(e)) + logging.error("Soupify.py Error: {}".format(e)) + +def get_employees(soup): + try: + employees = {} + for n, t in zip(soup.findAll('h3', { "class" : "name" }), soup.findAll('p', { "class" : "headline" })): + name = u''.join(n.getText()).encode('utf-8') + title = u''.join(t.getText()).encode('utf-8') + if name and title: + employees[name] = title + return employees + except (AttributeError, TypeError) as e: + pass + except Exception as e: + perror("Error: {}".format(e)) + logging.error("Soupify.py Error: {}".format(e)) + +def get_job_links(soup, company): + try: + job_links = [] + for link, comp in zip(soup.findAll('a', { "class" : "job-title-link" }), soup.findAll('span', { "class" : "company-name-text" })): + if comp.text == company: + job_links.append(u''.join(link['href']).encode('utf-8')) + return job_links + except (AttributeError, TypeError) as e: + pass + except Exception as e: + perror("Error: {}".format(e)) + logging.error("Soupify.py Error: {}".format(e)) + +def get_page_links(soup): + page_links = [] + try: + for page in soup.findAll('li', { "class" : "page-number"}): + a = page.findAll('a') + page_links.append(u''.join("https://linkedin.com{}".format(a[0]['href'])).encode('utf-8')) + return page_links + except (AttributeError, TypeError) as e: + pass + except Exception as e: + perror("Error: {}".format(e)) + logging.error("Soupify.py Error: {}".format(e)) + +def get_job_title(soup): + try: + return u''.join(json.loads(soup.find('code', {"id" : "decoratedJobPostingModule"}).string)['decoratedJobPosting']['jobPosting'].get('title')).encode('utf-8') + except (AttributeError, TypeError) as e: + pass + except Exception as e: + perror("Error: {}".format(e)) + logging.error("Soupify.py Error: {}".format(e)) + +def get_job_description(soup): + try: + return u''.join(json.loads(soup.find('code', {"id" : "decoratedJobPostingModule"}).string)['decoratedJobPosting']['jobPosting']['description'].get('rawText')).encode('utf-8') + except (AttributeError, TypeError): + pass + except Exception as e: + perror("Error: {}".format(e)) + logging.error("Soupify.py Error: {}".format(e)) \ No newline at end of file diff --git a/lib/workbench.py b/lib/workbench.py new file mode 100644 index 0000000..b2eb2b4 --- /dev/null +++ b/lib/workbench.py @@ -0,0 +1,181 @@ +#todo: fix special character output +import re, json, os, csv, time, codecs +from logger import * + +def identify_tech(data, file): + matches = [] + with open(file) as f: + keywords = f.readlines() + + for sentence in data.lower().split("."): + keyword_found = [] + for keyword in keywords: + if re.findall('\\b{}\\b'.format(re.escape(keyword.rstrip())), re.escape(sentence)): + keyword_found.append(keyword.rstrip()) + if keyword_found: + matches.append({sentence:keyword_found}) + return matches + +def craft_tech(matches): + logging.info(matches) + unique_techs, html_out, csv_out, json_out = [], [], [], [] + for title, link in matches.items(): + techs_per_job = [] + for url in link.keys(): + for data in link.get(url): + for sentence, techs in data.items(): + highlight_techs = sentence + for tech in techs: + if tech not in unique_techs: unique_techs.append(tech) + if tech not in techs_per_job: techs_per_job.append(tech) + highlight_techs = re.sub('\\b{}\\b'.format(tech), '{}'.format(tech), highlight_techs) + html_out.append("{title}{techs}{sentence}".format(title=title,techs=', '.join(techs),sentence=highlight_techs.replace("\xe2\x80\xa2", " * "),url=url)) + csv_out.append({"Job Title": title, "Technologies": ', '.join(techs), "Excerpt": sentence, "URL": url}) + json_out.append({"jobtitle": title, "technologies": ', '.join(techs), "excerpt": sentence, "url": url}) + + pstatus('Title: {}'.format(title)) + presults(', '.join(techs_per_job)) + + if unique_techs: + pstatus("Unique Technologies:") + presults(', '.join(unique_techs)) + + return html_out, csv_out, json_out + +def craft_employees(employees, eformat): + html_out, csv_out, json_out = [], [], [] + emails = {} + if eformat: + format = eformat[:eformat.find('@')] + domain = eformat[eformat.find('@'):] + + for name in employees.keys(): + first = [n.split() for n in name.split(',',1)][0][0] + last = [n.split() for n in name.split(',',1)][0][-1] + email = "{}{}".format(format_email(format, first.lower(), last.lower()), domain) + if email: + emails[name] = email + + for name, title in employees.items(): + presults("{} {}".format(name, title[:50].replace('&', '&'))) + logging.info("Employees identified: {}".format(employees)) + + #html output + if emails: + html_out.append("{name}{title}{email}".format(name=name, title=title, email=emails.get(name))) + csv_out.append({"Employee Name": name, "Title": title, "Email": emails.get(name)}) + json_out.append({"employeename": name, "title": title, "email": emails.get(name)}) + else: + html_out.append("{name}{title}--".format(name=name, title=title)) + csv_out.append({"Employee Name": name, "Title": title, "Email": "--"}) + json_out.append({"employeename": name, "title": title, "email": "--"}) + + if emails: + pstatus("Emails crafted") + for name, email in emails.items(): + presults(email) + + + + return html_out, csv_out, json_out + +def format_email(format, first, last): + try: + formats = { + 'first.last': '{}.{}'.format(first,last), + 'last.first': '{}.{}'.format(last,first), + 'firstlast': '{}{}'.format(first,last), + 'lastfirst': '{}{}'.format(last,first), + 'firstl':'{}{}'.format(first,last[0]), + 'lfirst':'{}{}'.format(last[0],first), + 'flast': '{}{}'.format(first[0],last), + 'lastf': '{}{}'.format(last,first[0]), + 'first': first, + 'last': last + } + return formats[format] + except Exception as e: + logging.error(e) + + +def craft_html(company, tech_html, employee_html, filename): + if tech_html: + tech_table = """ +

Technologies Identified

+ + + + + + + + {techs} +
Job TitleTechnologiesExcerpt
+ """.format(techs=' '.join(tech_html)) + else: tech_table = "" + + if employee_html: + employee_table = """ +

Employees Identified

+ + + + + + + {html} +
Employee NameTitleE-mail
+ """.format(html=' '.join(employee_html)) + else: employee_table = "" + + page = """ + + InSpy - {company} + + +

InSpy

+

Company: {company}

Date: {time}

+ {tech} + {emp} +
+ + + """.format(company=company, time=time.strftime("%Y/%m/%d %H:%M:%S"), tech=tech_table, emp=employee_table) + + with open(os.path.abspath(filename), 'w') as f: + f.write(page) + +def craft_csv(tech_csv, employee_csv, filename): + + if tech_csv: + with open(os.path.abspath(filename), 'w') as csvfile: + fieldnames = ["Job Title", "Technologies", "Excerpt", "URL"] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + for row in tech_csv: + writer.writerow(row) + writer.writerow({}) + + if employee_csv: + with open(os.path.abspath(filename), 'a') as csvfile: + fieldnames = ["Employee Name", "Title", "Email"] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + for row in employee_csv: + writer.writerow(row) + +def craft_json(tech_json, employee_json, filename): + if tech_json and employee_json: + tech = {"technologies":tech_json} + emp = {"employees":employee_json} + full_json = tech.copy() + full_json.update(emp) + elif tech_json: + tech = {"technologies":tech_json} + full_json = tech + elif employee_json: + emp = {"employees":employee_json} + full_json = emp + + with open(os.path.abspath(filename), 'w') as f: + f.write(json.dumps(full_json)) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..bdc7597 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +requests == 2.7.0 +BeautifulSoup == 3.2.1 diff --git a/wordlists/tech-list-large.txt b/wordlists/tech-list-large.txt new file mode 100644 index 0000000..5c482e8 --- /dev/null +++ b/wordlists/tech-list-large.txt @@ -0,0 +1,55 @@ +c# +ruby +python +cms +azure +java +javascript +cisco +asa +meraki +apache +iis +sql +mysql +windows +linux +unix +apple +adobe +android +blackberry +broadband +cloud +computing +dropbox +ebay +exchange +postfix +sendmail +encryption +filesharing +microsoft +mobile +oracle +juniper +avaya +software +sunos +as400 +mainframe +bluecoat +siem +intrusion prevention +intrusion detection +ids +ips +web proxy +web filter +antivirus +anti virus +dlp +endpoint detection +mobile security +active directory +vmware \ No newline at end of file diff --git a/wordlists/tech-list-small.txt b/wordlists/tech-list-small.txt new file mode 100644 index 0000000..ac13c8e --- /dev/null +++ b/wordlists/tech-list-small.txt @@ -0,0 +1,10 @@ +c# +ruby +python +windows +unix +linux +antivirus +ips +ids +cisco \ No newline at end of file diff --git a/wordlists/title-list-large.txt b/wordlists/title-list-large.txt new file mode 100644 index 0000000..1e10013 --- /dev/null +++ b/wordlists/title-list-large.txt @@ -0,0 +1,45 @@ +chairman +president +executive +deputy +manager +staff +chief +director +partner +owner +treasurer +secretary +associate +supervisor +foreman +counsel +consultant +recruiter +human resources +hr +payroll +administrator +training +coordinator +therapist +nurse +pharmacist +pathologist +occupational +marketing +product +development +senior +project +software +developer +analyst +engineer +technician +accountant +controller +financial +auditor +tax +security \ No newline at end of file diff --git a/wordlists/title-list-small.txt b/wordlists/title-list-small.txt new file mode 100644 index 0000000..5b25ecd --- /dev/null +++ b/wordlists/title-list-small.txt @@ -0,0 +1,20 @@ +sales +marketing +human resources +finance +accounting +inventory +quality assurance +insurance +licenses +operational +customer service +staff +research & development +management +administration +engineering +it +is +strategy +other \ No newline at end of file