diff --git a/.gitignore b/.gitignore index 79dcaa94e..b7db1ef46 100644 --- a/.gitignore +++ b/.gitignore @@ -23,8 +23,8 @@ src/ # sphinx docs/source docs/_build -docs/guides/Feeds.md -docs/guides/Harmonization-fields.md +docs/user/feeds.rst +docs/dev/harmonization-fields.md # Debian build filed debian/files diff --git a/docs/guides/images/cef_logo.png b/docs/_static/cef_logo.png similarity index 100% rename from docs/guides/images/cef_logo.png rename to docs/_static/cef_logo.png diff --git a/docs/guides/images/intelmq-arch-schema.png b/docs/_static/intelmq-arch-schema.png similarity index 100% rename from docs/guides/images/intelmq-arch-schema.png rename to docs/_static/intelmq-arch-schema.png diff --git a/docs/guides/images/intelmq-arch-schema.vsd b/docs/_static/intelmq-arch-schema.vsd similarity index 100% rename from docs/guides/images/intelmq-arch-schema.vsd rename to docs/_static/intelmq-arch-schema.vsd diff --git a/docs/guides/images/intelmq_logo.jpg b/docs/_static/intelmq_logo.jpg similarity index 100% rename from docs/guides/images/intelmq_logo.jpg rename to docs/_static/intelmq_logo.jpg diff --git a/docs/guides/images/rabbitmq-user-monitoring.png b/docs/_static/rabbitmq-user-monitoring.png similarity index 100% rename from docs/guides/images/rabbitmq-user-monitoring.png rename to docs/_static/rabbitmq-user-monitoring.png diff --git a/docs/autogen.py b/docs/autogen.py index bdc5b4e35..b6d0e48f3 100755 --- a/docs/autogen.py +++ b/docs/autogen.py @@ -74,34 +74,26 @@ def feeds_docs(): with open(os.path.join(BASEDIR, 'intelmq/etc/feeds.yaml')) as fhandle: config = yaml.safe_load(fhandle.read()) - toc = "" - for provider in sorted(config['providers'].keys()): - provider_link = provider.replace('.', '') - provider_link = provider_link.replace(' ', '-') - toc += "- [%s](#%s)\n" % (provider, provider_link.lower()) - - output = """# Available Feeds + output = """Feeds +====== The available feeds are grouped by the provider of the feeds. For each feed the collector and parser that can be used is documented as well as any feed-specific parameters. -To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run -`intelmq/bin/intelmq_gen_feeds_docs.py` to generate the new content of this file. - - - -%s +To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then rebuild the documentation. -\n +.. contents :: -""" % toc +""" for provider, feeds in sorted(config['providers'].items(), key=lambda x: x[0]): - output += "## %s\n\n" % provider + output += f"{provider}\n" + output += "-"*len(provider) + "\n" for feed, feed_info in sorted(feeds.items(), key=lambda x: x[0]): - output += "### %s\n\n" % feed + output += f"{feed}\n" + output += "^"*len(feed) + "\n" if feed_info.get('public'): output += info("public", "yes" if feed_info['public'] else "no") @@ -122,7 +114,7 @@ def feeds_docs(): for bot, bot_info in sorted(feed_info['bots'].items(), key=lambda x: x[0]): - output += "#### %s\n\n" % bot.title() + output += "**%s**\n\n" % bot.title() output += info("Module", bot_info['module']) output += info("Configuration Parameters") @@ -141,10 +133,17 @@ def feeds_docs(): if isinstance(value, (list, tuple)) and value: value = '["%s"]' % '", "'.join(value) - output += "* * `%s`: `%s`\n" % (key, value) + output += " * `%s`: `%s`\n" % (key, value) output += '\n' output += '\n' return output + + +if __name__ == '__main__': # pragma: no cover + with open('guides/Harmonization-fields.md', 'w') as handle: + handle.write(harm_docs()) + with open('user/feeds.rst', 'w') as handle: + handle.write(feeds_docs()) diff --git a/docs/conf.py b/docs/conf.py index ccb830831..504725dd5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -54,7 +54,11 @@ def resolve_any_xref(self, env, fromdocname, builder, target, node, contnode): # The full version, including alpha/beta/rc tags release = '2.3.0' - +rst_prolog = """ +.. |intelmq-users-list-link| replace:: `IntelMQ Users Mailinglist `__ +.. |intelmq-developers-list-link| replace:: `IntelMQ Developers Mailinglist `__ +.. |intelmq-manager-github-link| replace:: `IntelMQ Manager `__ +""" # -- General configuration --------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be @@ -116,9 +120,9 @@ def run_apidoc(_): def run_autogen(_): - with open('guides/Harmonization-fields.md', 'w') as handle: + with open('dev/harmonization-fields.md', 'w') as handle: handle.write(autogen.harm_docs()) - with open('guides/Feeds.md', 'w') as handle: + with open('user/feeds.rst', 'w') as handle: handle.write(autogen.feeds_docs()) diff --git a/docs/guides/IntelMQ-3.0-Architecture.md b/docs/dev/IntelMQ-3.0-Architecture.md similarity index 100% rename from docs/guides/IntelMQ-3.0-Architecture.md rename to docs/dev/IntelMQ-3.0-Architecture.md diff --git a/docs/dev/data-harmonization.rst b/docs/dev/data-harmonization.rst new file mode 100644 index 000000000..0c70dcb83 --- /dev/null +++ b/docs/dev/data-harmonization.rst @@ -0,0 +1,207 @@ +################## +Data Harmonization +################## + +.. contents:: + +Overview +======== + +All messages (reports and events) are Python/JSON dictionaries. The key names and according types are defined by the so called *harmonization*. + +The purpose of this document is to list and clearly define known **fields** in Abusehelper as well as IntelMQ or similar systems. A field is a ```key=value``` pair. For a clear and unique definition of a field, we must define the **key** (field-name) as well as the possible **values**. A field belongs to an **event**. An event is basically a structured log record in the form ```key=value, key=value, key=value, …```. In the [List of known fields](#fields), each field is grouped by a **section**. We describe these sections briefly below. +Every event **MUST** contain a timestamp field. + +An `IOC `_ (Indicator of compromise) is a single observation like a log line. + +Rules for keys +============== + +The keys can be grouped together in sub-fields, e.g. `source.ip` or `source.geolocation.latitude`. Thus, keys must match `^[a-z_](.[a-z0-9_]+)*$`. + + +Sections +======== + +As stated above, every field is organized under some section. The following is a description of the sections and what they imply. + +Feed +---- + +Fields listed under this grouping list details about the source feed where information came from. + +Time +---- + +The time section lists all fields related to time information. +This document requires that all the timestamps MUST be normalized to UTC. If the source reports only a date, do not attempt to invent timestamps. + +Source Identity +--------------- + +This section lists all fields related to identification of the source. The source is the identity the IoC is about, as opposed to the destination identity, which is another identity. + +For examples see the table below. + +The abuse type of an event defines the way these events needs to be interpreted. For example, for a botnet drone they refer to the compromised machine, whereas for a command and control server they refer the server itself. + +Source Geolocation Identity +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We recognize that ip geolocation is not an exact science and analysis of the abuse data has shown that different sources attribution sources have different opinions of the geolocation of an ip. This is why we recommend to enrich the data with as many sources as you have available and make the decision which value to use for the cc IOC based on those answers. + +Source Local Identity +^^^^^^^^^^^^^^^^^^^^^ + +Some sources report an internal (NATed) IP address. + +Destination Identity +-------------------- + +The abuse type of an event defines the way these IOCs needs to be interpreted. For a botnet drone they refer to the compromised machine, whereas for a command and control server they refer the server itself. + +Destination Geolocation Identity +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We recognize that ip geolocation is not an exact science and analysis of the abuse data has shown that different sources attribution sources have different opinions of the geolocation of an ip. This is why we recommend to enrich the data with as many sources as you have available and make the decision which value to use for the cc IOC based on those answers. + +Destination Local Identity +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Some sources report an internal (NATed) IP address. + +Extra values +------------ +Data which does not fit in the harmonization can be saved in the 'extra' namespace. All keys must begin with `extra.`, there are no other rules on key names and values. The values can be get/set like all other fields. + +Fields List and data types +========================== + +A list of allowed fields and data types can be found in [Harmonization-fields.md](Harmonization-fields.md) + +Classification +============== + +IntelMQ classifies events using three labels: taxonomy, type and identifier. This tuple of three values can be used for deduplication of events and describes what happened. +TODO: examples from chat + +The taxonomy can be automatically added by the taxonomy expert bot based on the given type. The following taxonomy-type mapping is based on `eCSIRT II Taxonomy `_: + +=============================== ========================================= ============================================= + Taxonomy Type Description +=============================== ========================================= ============================================= + abusive content spam Or 'Unsolicited Bulk Email', this means that the recipient has not granted verifiable permission for the message to be sent and that the message is sent as part of a larger collection of messages, all having a functionally comparable content. + abusive-content harmful-speech Discreditation or discrimination of somebody, e.g. cyber stalking, racism or threats against one or more individuals. + abusive-content violence Child pornography, glorification of violence, etc. + availability ddos Distributed Denial of Service attack, e.g. SYN-Flood or UDP-based reflection/amplification attacks. + availability dos Denial of Service attack, e.g. sending specially crafted requests to a web application which causes the application to crash or slow down. + availability outage Outage caused e.g. by air condition failure or natural disaster. + availability sabotage Physical sabotage, e.g cutting wires or malicious arson. + fraud copyright Offering or Installing copies of unlicensed commercial software or other copyright protected materials (Warez). + fraud masquerade Type of attack in which one entity illegitimately impersonates the identity of another in order to benefit from it. + fraud phishing Masquerading as another entity in order to persuade the user to reveal private credentials. + fraud unauthorized-use-of-resources Using resources for unauthorized purposes including profit-making ventures, e.g. the use of e-mail to participate in illegal profit chain letters or pyramid schemes. + information content security Unauthorised-information-acces Unauthorized access to information, e.g. by abusing stolen login credentials for a system or application, intercepting traffic or gaining access to physical documents. + information content security Unauthorised-information-modification Unauthorised modification of information, e.g. by an attacker abusing stolen login credentials for a system or application or a ransomware encrypting data. + information content security data-loss Loss of data, e.g. caused by harddisk failure or physical theft. + information content security dropzone This IOC refers to place where the compromised machines store the stolen user data. Not in ENISA eCSIRT-II taxonomy. + information content security leak IOCs relating to leaked credentials or personal data. Not in ENISA eCSIRT-II taxonomy. + information gathering scanner Attacks that send requests to a system to discover weaknesses. This also includes testing processes to gather information on hosts, services and accounts. Examples: fingerd, DNS querying, ICMP, SMTP (EXPN, RCPT, ...), port scanning. + information-gathering sniffing Observing and recording of network traffic (wiretapping). + information-gathering social-engineering Gathering information from a human being in a non-technical way (e.g. lies, tricks, bribes, or threats). This IOC refers to a resource, which has been observed to perform brute-force attacks over a given application protocol. + intrusion attempts brute-force Multiple login attempts (Guessing / cracking of passwords, brute force). + intrusion attempts exploit An attack using an unknown exploit. + intrusion attempts ids-alert IOCs based on a sensor network. This is a generic IOC denomination, should it be difficult to reliably denote the exact type of activity involved for example due to an anecdotal nature of the rule that triggered the alert. + intrusions application-compromise Compromise of an application by exploiting (un)known software vulnerabilities, e.g. SQL injection. + intrusions backdoor This refers to hosts, which have been compromised and backdoored with a remote administration software or Trojan in the traditional sense. Not in ENISA eCSIRT-II taxonomy. + intrusions burglary Physical intrusion, e.g. into corporate building or data center. + intrusions compromised This IOC refers to compromised system. Not in ENISA eCSIRT-II taxonomy. + intrusions defacement This IOC refers to hacktivism related activity. Not in ENISA eCSIRT-II taxonomy. + intrusions privileged-account-compromise Compromise of a system where the attacker gained administrative privileges. + intrusions unauthorized-command The possibly infected device sent unauthorized commands to a remote device with malicious intent. Not in ENISA eCSIRT-II taxonomy. + intrusions unauthorized-login A possibly infected device logged in to a remote device without authorization. Not in ENISA eCSIRT-II taxonomy. + intrusions unprivileged-account-compromise Compromise of a system using an unprivileged (user/service) account. + malicious code c2server This is a command and control server in charge of a given number of botnet drones. + malicious code dga domain DGA Domains are seen various families of malware that are used to periodically generate a large number of domain names that can be used as rendezvous points with their command and control servers. Not in ENISA eCSIRT-II taxonomy. + malicious code infected-system This is a compromised machine, which has been observed to make a connection to a command and control server. + malicious code malware A URL is the most common resource with reference to malware binary distribution. Not in ENISA eCSIRT-II taxonomy. + malicious code malware-configuration This is a resource which updates botnet drones with a new configuration. + malicious code malware-distribution URI used for malware distribution, e.g. a download URL included in fake invoice malware spam. + malicious code ransomware This IOC refers to a specific type of compromised machine, where the computer has been hijacked for ransom by the criminals. Not in ENISA eCSIRT-II taxonomy and deprecated, use 'infected system instead'. + other blacklist Some sources provide blacklists, which clearly refer to abusive behavior, such as spamming, but fail to denote the exact reason why a given identity has been blacklisted. The reason may be that the justification is anecdotal or missing entirely. This type should only be used if the typing fits the definition of a blacklist, but an event specific denomination is not possible for one reason or another. + other other All incidents which don't fit in one of the given categories should be put into this class. + other proxy This refers to the use of proxies from inside your network. Not in ENISA eCSIRT-II taxonomy. + other tor This IOC refers to incidents related to TOR network infrastructure. Not in ENISA eCSIRT-II taxonomy. + other unknown Unknown classification. Not in ENISA eCSIRT-II taxonomy. + test test Meant for testing. + vulnerable ddos-amplifier Publicly accessible services that can be abused for conducting DDoS reflection/amplification attacks, e.g. DNS open-resolvers or NTP servers with monlist enabled. + vulnerable information-disclosure Publicly accessible services potentially disclosing sensitive information, e.g. SNMP or Redis. + vulnerable potentially-unwanted-accessible Potentially unwanted publicly accessible services, e.g. Telnet, RDP or VNC. + vulnerable vulnerable client This attribute refers to a badly configured or vulnerable clients, which may be vulnerable and can be compromised by a third party. For example, not-up-to-date clients or client which are misconfigured, such as clients querying public domains for WPAD configurations. In addition, to specify the vulnerability and its potential abuse, one should use the classification.identifier, description and other attributes for that purpose respectively. Not in ENISA eCSIRT-II taxonomy. + vulnerable vulnerable service This attribute refers to a badly configured or vulnerable network service, which may be abused by a third party. For example, these services relate to open proxies, open dns resolvers, network time servers (NTP) or character generation services (chargen), simple network management services (SNMP). In addition, to specify the network service and its potential abuse, one should use the protocol, destination port and description attributes for that purpose respectively. Not in ENISA eCSIRT-II taxonomy. + vulnerable vulnerable-system A system which is vulnerable to certain attacks. Example: misconfigured client proxy settings (example: WPAD), outdated operating system version, etc. + vulnerable weak-crypto Publicly accessible services offering weak crypto, e.g. web servers susceptible to POODLE/FREAK attacks. +=============================== ========================================= ============================================= + +Meaning of source, destination and local values for each classification type and possible identifiers. The identifier is often a normalized malware name, grouping many variants. + +======================= ================================================ ========================== ================== =========================== + Type Source Destination Local Possible identifiers +======================= ================================================ ========================== ================== =========================== + backdoor *backdoored device* + blacklist *blacklisted device* + brute-force *attacker* target + c2server *(sinkholed) c&c server* zeus, palevo, feodo + compromised *server* + ddos *attacker* target + defacement *defaced website* + dga domain *infected device* + dropzone *server hosting stolen data* + exploit *hosting server* + ids-alert *triggering device* + infected system *infected device* *contacted c2c server* + malware *infected device* internal at source zeus, palevo, feodo + malware configuration *infected device* + other + phishing *phishing website* + proxy *server allowing policy and security bypass* + ransomware *infected device* + scanner *scanning device* scanned device http,modbus,wordpress + spam *infected device* targeted server internal at source + test + unknown + vulnerable service *vulnerable device* heartbleed, openresolver, snmp + vulnerable client *vulnerable device* wpad +======================= ================================================ ========================== ================== =========================== + +Field in italics is the interesting one for CERTs. + +Example: + +If you know of an IP address that connects to a zeus c&c server, it's about the infected device, thus type malware and identifier zeus. If you want to complain about the c&c server, it's type c&c and identifier zeus. The `malware.name` can have the full name, eg. 'zeus_p2p'. + +Minimum recommended requirements for events +=========================================== + +Below, we have enumerated the minimum recommended requirements for an actionable abuse event. These keys should to be present for the abuse report to make sense for the end recipient. Please note that if you choose to anonymize your sources, you can substitute **feed** with **feed.code** and that only one of the identity keys **ip**, **domain name**, **url**, **email address** must be present. All the rest of the keys are **optional**. + +================= ======================== ================= + Category Key Terminology +================= ======================== ================= + Feed feed Should + Classification classification.type Should + Classification classification.taxonomy Should + Time time.source Should + Time time.observation Should + Identity source.ip Should* + Identity source.fqdn Should* + Identity source.url Should* + Identity source.account Should* +================= ======================== ================= + +* only one of them + +This list of required fields is *not* enforced by IntelMQ. + +**NOTE:** This document was copied from `AbuseHelper repository `_ and improved. + diff --git a/docs/dev/feeds-wishlist.rst b/docs/dev/feeds-wishlist.rst new file mode 100644 index 000000000..be834e86a --- /dev/null +++ b/docs/dev/feeds-wishlist.rst @@ -0,0 +1,82 @@ +############### +Feeds whishlist +############### + +This is a list with various feeds, which are either currently not supported or the usage is not clearly documented in IntelMQ. + +If you want to **contribute** documenting how to configure existing bots in order to collect new feeds or by creating new parsers, here is a list of potentially interesting feeds. +See `Feeds documentation `_ for more information on this. + +This list evolved from the issue `Contribute: Feeds List (#384) `_. + +- A list of feeds + - `threatfeeds.io `_ + - `TheCyberThreat `_ + +- Some third party intelmq bots: `NRDCS' IntelMQ fork `_ + +- List of potentially interesting data sources: + - `Abuse.ch SSL Blacklists `_ + - `Adblock Plus Malwaredomains `_ + - `apivoid IP Reputation API `_ + - `APWG's ecrimex `_ + - `Bad IPs `_ + - `Berkeley `_ + - `Binary Defense `_ + - `Bot Invaders Realtime tracker `_ + - `Botscout Last Caught `_ + - `Carbon Black Feeds `_ + - `CERT.pl Phishing Warning List `_ + - `Chaos Reigns `_ + - `Critical Stack `_ + - `Cruzit `_ + - `Cyber Crime Tracker `_ + - `DNS DB API `_ + - `Dyn DNS `_ + - `Facebook Threat Exchange `_ + - `FilterLists `_ + - `Firehol IPLists `_ + - `Google Webmaster Alerts `_ + - `GPF Comics DNS Blacklist `_ + - `Greensnow `_ + - `HP Feeds `_ + - `IBM X-Force Exchange `_ + - `ISC SANS `_ + - `ISightPartners `_ + - `Joewein `_ + - `Malshare `_ + - `Malware Config `_ + - `Malware DB (cert.pl) `_ + - `MalwareDomainList `_ + - `MalwareDomains `_ + - `MalwareInt `_ + - `Manity Spam IP addresses `_ + - `Marc Blanchard DGA Domains `_ + - `MaxMind Proxies `_ + - `mIRC Servers `_ + - `Monzymerza `_ + - `Multiproxy `_ + - `MVPS `_ + - `Null Secure `_ + - `OpenBugBounty `_ + - `Payload Security `_ + - `Project Honeypot (#284) `_ + - `ShadowServer Sandbox API `_ + - `Shodan search API `_ + - `Snort `_ + - `Spamhaus BGP feed (BGPf) `_ + - `SteveBlack Hosts File `_ + - `TheCyberThreat `_ + - `The Haleys `_ + - `Threat Crowd `_ + - `Threat Grid `_ + - `Threatstream `_ + - `TOR Project Exit addresses `_ + - `TotalHash `_ + - `UCE Protect `_ + - `URI BL `_ + - `Virustotal `_ + - `virustream `_ + - `VoIP Blacklist `_ + - `Wordpress Callback Domains `_ + - `YourCMC `_ diff --git a/docs/guides/Developers-Guide.md b/docs/dev/guide.rst similarity index 63% rename from docs/guides/Developers-Guide.md rename to docs/dev/guide.rst index d6e1d12ca..aab79810d 100644 --- a/docs/guides/Developers-Guide.md +++ b/docs/dev/guide.rst @@ -1,71 +1,22 @@ -# Developers Guide - -**Table of Contents:** -- [Intended Audience](#intended-audience) - - [Goals](#goals) -- [Development Environment](#development-environment) - - [Installation](#installation) - - [How to develop](#how-to-develop) - - [Update](#update) - - [Testing](#testing) - - [Additional optional requirements](#additional-optional-requirements) - - [Run the tests](#run-the-tests) - - [Environment variables](#environment-variables) - - [Configuration test files](#configuration-test-files) -- [Development Guidelines](#development-guidelines) - - [Coding-Rules](#coding-rules) - - [Unicode](#unicode) - - [Back-end independence and Compatibility](#back-end-independence-and-compatibility) - - [Layout Rules](#layout-rules) - - [Documentation](#documentation) - - [Directories Hierarchy on Default Installation](#directories-hierarchy-on-default-installation) - - [Directories and Files naming](#directories-and-files-naming) - - [Class Names](#class-names) - - [Data Harmonization Rules](#data-harmonization-rules) - - [Code Submission Rules](#code-submission-rules) - - [Releases, Repositories and Branches](#releases-repositories-and-branches) - - [Branching model](#branching-model) - - [How to Contribute](#how-to-contribute) - - [Workflow](#workflow) - - [Commit Messages](#commit-messages) - - [Prepare for Discussion in GitHub](#prepare-for-discussion-in-github) - - [License and Author files](#license-and-author-files) -- [System Overview](#system-overview) - - [Code Architecture](#code-architecture) - - [Pipeline](#pipeline) -- [Bot Developer Guide](#bot-developer-guide) - - [Template](#template) -- [imports for additional libraries and intelmq](#imports-for-additional-libraries-and-intelmq) - - [Pipeline interactions](#pipeline-interactions) - - [Logging](#logging) - - [Log Messages Format](#log-messages-format) - - [Log Levels](#log-levels) - - [What to Log](#what-to-log) - - [How to Log](#how-to-log) - - [String formatting in Logs](#string-formatting-in-logs) - - [Error handling](#error-handling) - - [Initialization](#initialization) - - [Custom configuration checks](#custom-configuration-checks) - - [Examples](#examples) - - [Parsers](#parsers) - - [parse_line](#parse_line) - - [Tests](#tests) - - [Configuration](#configuration) - - [Cache](#cache) -- [Documentation](#documentation) - - [Feeds documentation](#feeds-documentation) -- [Testing Pre-releases](#testing-pre-releases) - - [Installation](#installation) - -## Intended Audience +################ +Developers Guide +################ + +.. contents:: + +***************** +Intended Audience +***************** + This guide is for developers of IntelMQ. It explains the code architecture, coding guidelines as well as ways you can contribute code or documentation. -If you have not done so, please read the [User Guide](User-Guide.md) first. +If you have not done so, please read the :doc:`../user/introduction` first. Once you feel comfortable running IntelMQ with open source bots and you feel adventurous enough to contribute to the project, this guide is for you. It does not matter if you are an experienced Python programmer or just a beginner. There are a lot of samples to help you out. However, before we go into the details, it is important to observe and internalize some overall project goals. -### Goals +Goals +===== It is important, that all developers agree and stick to these meta-guidelines. IntelMQ tries to: @@ -88,9 +39,14 @@ How do you ultimately test if things are still easy? Let them new programmers te Similarly, if code does not get accepted upstream by the main developers, it is usually only because of the ease-of-use argument. Do not give up , go back to the drawing board, and re-submit again. -## Development Environment +.. _development environment: -### Installation +*********************** +Development Environment +*********************** + +Installation +============ Developers can create a fork repository of IntelMQ in order to commit the new code to this repository and then be able to do pull requests to the main repository. Otherwise you can just use the 'certtools' as username below. @@ -99,38 +55,40 @@ The following instructions will use `pip3 -e`, which gives you a so called *edit In this guide we use `/opt/dev_intelmq` as local repository copy. You can also use other directories as long as they are readable by other unprivileged users (e.g. home directories on Fedora can't be read by other users by default). `/opt/intelmq` is used as root location for IntelMQ installations, this is IntelMQ's default for this installation method. This directory is used for configurations (`/opt/intelmq/etc`), local states (`/opt/intelmq/var/lib`) and logs (`/opt/intelmq/var/log`). -```bash -sudo -s - -git clone https://github.com//intelmq.git /opt/dev_intelmq -cd /opt/dev_intelmq +.. code-block:: bash -pip3 install -e . + sudo -s + + git clone https://github.com//intelmq.git /opt/dev_intelmq + cd /opt/dev_intelmq + + pip3 install -e . + + useradd -d /opt/intelmq -U -s /bin/bash intelmq -useradd -d /opt/intelmq -U -s /bin/bash intelmq - -intelmqsetup -``` + intelmqsetup **Note:** please do not forget that configuration files, log files will be available on `/opt/intelmq`. However, if your development is somehow related to any shipped configuration file, you need to apply the changes in your repository `/opt/dev_intelmq/intelmq/etc/`. -### How to develop +How to develop +============== After you successfully setup your IntelMQ development environment, you can perform any development on any `.py` file on `/opt/dev_intelmq`. After you change, you can use the normal procedure to run the bots: -```bash -su - intelmq - -intelmqctl start spamhaus-drop-collector +.. code-block:: bash -tail -f /opt/intelmq/var/log/spamhaus-drop-collector.log -``` + su - intelmq + + intelmqctl start spamhaus-drop-collector + + tail -f /opt/intelmq/var/log/spamhaus-drop-collector.log You can also add new bots, creating the new `.py` file on the proper directory inside `cd /opt/dev_intelmq/intelmq`. However, your IntelMQ installation with pip3 needs to be updated. Please check the following section. -### Update +Update +====== In case you developed a new bot, you need to update your current development installation. In order to do that, please follow this procedure: @@ -139,59 +97,65 @@ In case you developed a new bot, you need to update your current development ins 2. Make sure that you have your new bot in the right place and the information on BOTS file is correct. 3. Execute the following commands: -```bash -sudo -s - -cd /opt/dev_intelmq -## necessary for pip metadata update and new executables: -pip3 install -e . -## only necessary if it's not a link yet -cp -fs /opt/dev_intelmq/intelmq/bots/BOTS /opt/intelmq/etc/BOTS - -find /opt/intelmq/ -type d -exec chmod 0770 {} \+ -find /opt/intelmq/ -type f -exec chmod 0660 {} \+ -chown -R intelmq.intelmq /opt/intelmq -## if you use the intelmq manager (adapt the webservers' group if needed): -chown intelmq.www-data /opt/intelmq/etc/*.conf -``` +.. code-block:: bash + + sudo -s + + cd /opt/dev_intelmq + ## necessary for pip metadata update and new executables: + pip3 install -e . + ## only necessary if it's not a link yet + cp -fs /opt/dev_intelmq/intelmq/bots/BOTS /opt/intelmq/etc/BOTS + + find /opt/intelmq/ -type d -exec chmod 0770 {} \+ + find /opt/intelmq/ -type f -exec chmod 0660 {} \+ + chown -R intelmq.intelmq /opt/intelmq + ## if you use the intelmq manager (adapt the webservers' group if needed): + chown intelmq.www-data /opt/intelmq/etc/*.conf Now you can test run your new bot following this procedure: -```bash -su - intelmq +.. code-block:: bash -intelmqctl start -``` + su - intelmq + + intelmqctl start -### Testing +Testing +======= -#### Additional optional requirements +Additional optional requirements +-------------------------------- For the documentation tests two additional libraries are required: Cerberus and PyYAML. You can install them with pip: -```bash -pip3 install Cerberus PyYAML -``` +.. code-block:: bash + + pip3 install Cerberus PyYAML or the package management of your operating system. -#### Run the tests +Run the tests +------------- All changes have to be tested and new contributions should be accompanied by according unit tests. Please do not run the tests as root just like any other IntelMQ component for security reasons. Any other unprivileged user is possible. You can run the tests by changing to the directory with IntelMQ repository and running either `unittest` or `nosetests`: - cd /opt/dev_intelmq - sudo -u intelmq python3 -m unittest {discover|filename} # or - sudo -u intelmq nosetests3 [filename] # alternatively nosetests or nosetests-3.5 depending on your installation, or - sudo -u intelmq python3 setup.py test # uses a build environment (no external dependencies) +.. code-block:: bash + + cd /opt/dev_intelmq + sudo -u intelmq python3 -m unittest {discover|filename} # or + sudo -u intelmq nosetests3 [filename] # alternatively nosetests or nosetests-3.5 depending on your installation, or + sudo -u intelmq python3 setup.py test # uses a build environment (no external dependencies) Some bots need local databases to succeed. If you only want to test one explicit test file, give the file path as argument. -There is a [Travis-CI](https://travis-ci.org/certtools/intelmq/builds) setup for automatic testing, which triggers on pull requests. You can also easily activate it for your forks. +There is a `Travis-CI `_ setup for automatic testing, which triggers on pull requests. You can also easily activate it for your forks. -#### Environment variables +Environment variables +--------------------- There are a bunch of environment variables which switch on/off some tests: @@ -203,24 +167,28 @@ There are a bunch of environment variables which switch on/off some tests: For example, to run all tests you can use: -```bash -INTELMQ_TEST_DATABASES=1 INTELMQ_TEST_EXOTIC=1 nosetests3 -``` +.. code-block:: bash -#### Configuration test files + INTELMQ_TEST_DATABASES=1 INTELMQ_TEST_EXOTIC=1 nosetests3 + +Configuration test files +------------------------ The tests use the configuration files in your working directory, not those installed in `/opt/intelmq/etc/` or `/etc/`. You can run the tests for a locally changed intelmq without affecting an installation or requiring root to run them. -## Development Guidelines +********************** +Development Guidelines +********************** -### Coding-Rules +Coding-Rules +============ Most important: **KEEP IT SIMPLE**!! This can not be over-estimated. Feature creep can destroy any good software project. But if new folks can not understand what you wrote in 10-15 minutes, it is not good. It's not about the performance, etc. It's about readability. -In general, we follow the [Style Guide for Python Code (PEP8)](https://www.python.org/dev/peps/pep-0008/). +In general, we follow :pep:`0008`. We recommend reading it before committing code. There are some exceptions: sometimes it does not make sense to check for every PEP8 error (such as whitespace indentation when you want to make a dict=() assignment @@ -228,72 +196,79 @@ look pretty. Therefore, we do have some exceptions defined in the `setup.cfg` fi We support Python 3 only. -#### Unicode +Unicode +------- * Each internal object in IntelMQ (Event, Report, etc) that has strings, their strings MUST be in UTF-8 Unicode format. * Any data received from external sources MUST be transformed into UTF-8 Unicode format before add it to IntelMQ objects. -#### Back-end independence and Compatibility +Back-end independence and Compatibility +--------------------------------------- Any component of the IntelMQ MUST be independent of the message queue technology (Redis, RabbitMQ, etc...). -### Layout Rules - -```bash -intelmq/ - lib/ - bot.py - cache.py - message.py - pipeline.py - utils.py - bots/ - collector/ - / - collector.py - parser/ - / - parser.py - expert/ - / - expert.py - output/ - / - output.py - BOTS - /conf - pipeline.conf - runtime.conf - defaults.conf -``` +Layout Rules +============ + +.. code-block:: bash + + intelmq/ + lib/ + bot.py + cache.py + message.py + pipeline.py + utils.py + bots/ + collector/ + / + collector.py + parser/ + / + parser.py + expert/ + / + expert.py + output/ + / + output.py + BOTS + /conf + pipeline.conf + runtime.conf + defaults.conf Assuming you want to create a bot for a new 'Abuse.ch' feed. It turns out that here it is necessary to create different parsers for the respective kind of events (e.g. malicious URLs). Therefore, the usual hierarchy ‘intelmq/bots/parser//parser.py’ would not be suitable because it is necessary to have more parsers for each Abuse.ch Feed. The solution is to use the same hierarchy with an additional "description" in the file name, separated by underscore. Also see the section *Directories and Files naming*. Example (including the current ones): -``` -/intelmq/bots/parser/abusech/parser_domain.py -/intelmq/bots/parser/abusech/parser_ip.py -/intelmq/bots/parser/abusech/parser_ransomware.py -/intelmq/bots/parser/abusech/parser_malicious_url.py -``` +.. code-block:: + + /intelmq/bots/parser/abusech/parser_domain.py + /intelmq/bots/parser/abusech/parser_ip.py + /intelmq/bots/parser/abusech/parser_ransomware.py + + /intelmq/bots/parser/abusech/parser_malicious_url.py -#### Documentation +Documentation +------------- Please document your added/modified code. -For doc strings, we are using the [sphinx-napoleon-google-type-annotation](http://www.sphinx-doc.org/en/stable/ext/napoleon.html#type-annotations). +For doc strings, we are using the `sphinx-napoleon-google-type-annotation `_. -Additionally, Python's type hints/annotations are used, see [PEP 484](https://www.python.org/dev/peps/pep-0484/). +Additionally, Python's type hints/annotations are used, see :pep:`484`. -#### Directories Hierarchy on Default Installation +Directories Hierarchy on Default Installation +--------------------------------------------- * Configuration Files Path: `/opt/intelmq/etc/` * PID Files Path: `/opt/intelmq/var/run/` * Logs Files and dumps Path: `/opt/intelmq/var/log/` * Additional Bot Files Path, e.g. templates or databases: `/opt/intelmq/var/lib/bots/[bot-name]/` -#### Directories and Files naming +Directories and Files naming +---------------------------- Any directory and file of IntelMQ has to follow the Directories and Files naming. Any file name or folder name has to * be represented with lowercase and in case of the name has multiple words, the spaces between them must be removed or replaced by underscores; @@ -302,114 +277,135 @@ Any directory and file of IntelMQ has to follow the Directories and Files naming In the bot directories name, the name must correspond to the feed provider. If necessary and applicable the feed name can and should be used as postfix for the filename. Examples: -``` -intelmq/bots/parser/malwaredomainlist/parser.py -intelmq/bots/parser/taichung/parser.py -intelmq/bots/parser/cymru/parser_full_bogons.py -intelmq/bots/parser/abusech/parser_ransomware.py -``` -#### Class Names +.. code-block:: + + intelmq/bots/parser/malwaredomainlist/parser.py + intelmq/bots/parser/taichung/parser.py + intelmq/bots/parser/cymru/parser_full_bogons.py + intelmq/bots/parser/abusech/parser_ransomware.py + +Class Names +----------- Class name of the bot (ex: PhishTank Parser) must correspond to the type of the bot (ex: Parser) e.g. `PhishTankParserBot` -### Data Harmonization Rules +Data Harmonization Rules +======================== Any component of IntelMQ MUST respect the "Data Harmonization Ontology". -**Reference:** IntelMQ Data Harmonization - [Data Harmonization Ontology](Data-Harmonization.md) +**Reference:** IntelMQ Data Harmonization - :doc:`data-harmonization` -### Code Submission Rules +Code Submission Rules +===================== -#### Releases, Repositories and Branches +Releases, Repositories and Branches +----------------------------------- - * The main repository is in [github.com/certtools/intelmq](https://github.com/certtools/intelmq). + * The main repository is in `github.com/certtools/intelmq `_. * There are a couple of forks which might be regularly merged into the main repository. They are independent and can have incompatible changes and can deviate from the upstream repository. - * We use [semantic versioning](http://semver.org/). A short summary: + * We use `semantic versioning `_. A short summary: * a.x are stable releases * a.b.x are bugfix/patch releases * a.x must be compatible to version a.0 (i.e. API/Config-compatibility) * If you contribute something, please fork the repository, create a separate branch and use this for pull requests, see section below. -#### Branching model +Branching model +--------------- * "master" is the stable branch. It hold the latest stable release. Non-developers should only work on this branch. The recommended log level is WARNING. Code is only added by merges from the maintenance branches. * "maintenance/a.b.x" branches accumulate (cherry-picked) patches for a maintenance release (a.b.x). Recommended for experienced users which deploy intelmq themselves. No new features will be added to these branches. * "develop" is the development branch for the next stable release (a.x). New features must go there. Developers may want to work on this branch. This branch also holds all patches from maintenance releases if applicable. The recommended log level is DEBUG. * Separate branches to develop features or bug fixes may be used by any contributor. -#### How to Contribute +How to Contribute +----------------- * Make separate pull requests / branches on GitHub for changes. This allows us to discuss things via GitHub. * We prefer one Pull Request per feature or change. If you have a bunch of small fixes, please don't create one RP per fix :) - * Only very small and changes (docs, ...) might be committed directly to development branches without Pull Request by the [core-team](https://github.com/orgs/certtools/teams/core). + * Only very small and changes (docs, ...) might be committed directly to development branches without Pull Request by the `core-team `_. * Keep the balance between atomic commits and keeping the amount of commits per PR small. You can use interactive rebasing to squash multiple small commits into one (`rebase -i [base-branch]`). Only do rebasing if the code you are rebasing is yet not used by others or is already merged - because then others may need to run into conflicts. * Make sure your PR is merge able in the develop branch and all tests are successful. - * If possible [sign your commits with GPG](https://help.github.com/articles/signing-commits-using-gpg/). + * If possible `sign your commits with GPG `_. -#### Workflow +Workflow +-------- We assume here, that origin is your own fork. We first add the upstream repository: -```bash -> git remote add upstream https://github.com/certtools/intelmq.git -``` +.. code-block:: bash + + > git remote add upstream https://github.com/certtools/intelmq.git Syncing develop: -```bash -> git checkout develop -> git pull upstream develop -> git push origin develop -``` +.. code-block:: bash + + > git checkout develop + > git pull upstream develop + > git push origin develop + You can do the same with the branches `master` and `maintenance`. Create a separate feature-branch to work on, sync develop with upstream. Create working branch from develop: -```bash -> git checkout develop -> git checkout -b bugfix -## your work -> git commit -``` + +.. code-block:: bash + + > git checkout develop + > git checkout -b bugfix + # your work + > git commit + Or, for bugfixes create a separate bugfix-branch to work on, sync maintenance with upstream. Create working branch from maintenance: -```bash -> git checkout maintenance -> git checkout -b new-feature -## your work -> git commit + +.. code-block:: bash + + > git checkout maintenance + > git checkout -b new-feature + # your work + > git commit Getting upstream's changes for master or any other branch: -```bash -> git checkout develop -> git pull upstream develop -> git push origin develop -``` + +.. code-block:: bash + + > git checkout develop + > git pull upstream develop + > git push origin develop + There are 2 possibilities to get upstream's commits into your branch. Rebasing and Merging. Using rebasing, your history is rewritten, putting your changes on top of all other commits. You can use this if your changes are not published yet (or only in your fork). -```bash -> git checkout bugfix -> git rebase develop -``` + +.. code-block:: bash + + > git checkout bugfix + > git rebase develop + Using the `-i` flag for rebase enables interactive rebasing. You can then remove, reorder and squash commits, rewrite commit messages, beginning with the given branch, e.g. develop. Or using merging. This doesn't break the history. It's considered more , but also pollutes the history with merge commits. -```bash -> git checkout bugfix -> git merge develop -``` + +.. code-block:: bash + + > git checkout bugfix + > git merge develop You can then create a PR with your branch `bugfix` to our upstream repository, using GitHub's web interface. -#### Commit Messages +Commit Messages +--------------- If it fixes an existing issue, please use GitHub syntax, e.g.: `fixes certtools/intelmq#` -#### Prepare for Discussion in GitHub +Prepare for Discussion in GitHub +-------------------------------- If we don't discuss it, it's probably not tested. -### License and Author files +License and Author files +======================== License and Authors files can be found at the root of repository. * License file **MUST NOT** be modified except by the explicit written permission by CNCS/CERT.PT or CERT.at @@ -418,7 +414,9 @@ License and Authors files can be found at the root of repository. License and authors must be only listed in an external file but not inside the code files. -## System Overview +*************** +System Overview +*************** In the `intelmq/lib/` directory you can find some libraries: * Bots: Defines base structure for bots and handling of startup, stop, messages etc. @@ -429,58 +427,68 @@ In the `intelmq/lib/` directory you can find some libraries: * Test: Base class for bot tests with predefined test and assert methods. * Utils: Utility functions used by system components. -### Code Architecture +Code Architecture +================= -![Code Architecture](images/intelmq-arch-schema.png) +.. image:: /_static/intelmq-arch-schema.png + :alt: Code Architecture -### Pipeline +Pipeline +======== * collector bot **TBD** -## Bot Developer Guide +******************* +Bot Developer Guide +******************* There's a dummy bot including tests at `intelmq/tests/lib/test_parser_bot.py`. You can always start any bot directly from command line by calling the executable. The executable will be created during installation a directory for binaries. After adding new bots to the code, install IntelMQ to get the files created. Don't forget to give an bot id as first argument. Also, running bots with other users than `intelmq` will raise permission errors. -```bash -$ sudo -i intelmq -$ intelmqctl run file-output # if configured -$ intelmq.bots.outputs.file.output file-output -``` -You will get all logging outputs directly on stderr as well as in the log file. - -### Template -Please adjust the doc strings accordingly and remove the in-line comments (`#`). -```python -"""Parse data from example.com, be a nice ExampleParserBot. -Document possible necessary configurations. -""" -import sys +.. code-block:: bash -## imports for additional libraries and intelmq -from intelmq.lib.bot import Bot + $ sudo -i intelmq + $ intelmqctl run file-output # if configured + $ intelmq.bots.outputs.file.output file-output +You will get all logging outputs directly on stderr as well as in the log file. -class ExampleParserBot(Bot): - def process(self): - report = self.receive_message() - - event = self.new_event(report) # copies feed.name, time.observation - ... # implement the logic here - event.add('source.ip', '127.0.0.1') - event.add('extra', {"os.name": "Linux"}) - - self.send_message(event) - self.acknowledge_message() +Template +======== +Please adjust the doc strings accordingly and remove the in-line comments (`#`). -BOT = ExampleParserBot -``` +.. code-block::python + + """Parse data from example.com, be a nice ExampleParserBot. + + Document possible necessary configurations. + """ + import sys + + # imports for additional libraries and intelmq + from intelmq.lib.bot import Bot + + + class ExampleParserBot(Bot): + def process(self): + report = self.receive_message() + + event = self.new_event(report) # copies feed.name, time.observation + ... # implement the logic here + event.add('source.ip', '127.0.0.1') + event.add('extra', {"os.name": "Linux"}) + + self.send_message(event) + self.acknowledge_message() + + + BOT = ExampleParserBot There are some names with special meaning. These can be used i.e. called: * `stop`: Shuts the bot down. @@ -495,7 +503,8 @@ These can be defined: All other names can be used freely. -### Pipeline interactions +Pipeline interactions +===================== We can call three methods related to the pipeline: @@ -503,16 +512,19 @@ We can call three methods related to the pipeline: - `self.send_message(event, path="_default")`: Processed message is sent to destination queues. It is possible to change the destination queues by optional `path` parameter. - `self.acknowledge_message()`: Message formerly received by `receive_message` is removed from the internal queue. This should always be done after processing and after the sending of the new message. In case of errors, this function is not called and the message will stay in the internal queue waiting to be processed again. -### Logging +Logging +======= -#### Log Messages Format +Log Messages Format +------------------- Log messages have to be clear and well formatted. The format is the following: Format: -``` - - - - -``` + +.. code-block:: + + - - - Rules: * the Log message MUST follow the common rules of a sentence, beginning with uppercase and ending with period. @@ -520,7 +532,8 @@ Rules: When the logger instance is created, the bot id must be given as parameter anyway. The function call defines the log level, see below. -#### Log Levels +Log Levels +---------- * *debug*: Debugging information includes retrieved and sent messages, detailed status information. Can include sensitive information like passwords and amount can be huge. * *info*: Logs include loaded databases, fetched reports or waiting messages. @@ -528,73 +541,83 @@ When the logger instance is created, the bot id must be given as parameter anywa * *error*: Errors and Exceptions. * *critical* Program is failing. -#### What to Log +What to Log +----------- * Try to keep a balance between obscuring the source code file with hundreds of log messages and having too little log messages. * In general, a bot MUST report error conditions. -#### How to Log +How to Log +---------- + The Bot class creates a logger with that should be used by bots. Other components won't log anyway currently. Examples: -```python -self.logger.info('Bot start processing.') -self.logger.error('Pipeline failed.') -self.logger.exception('Pipeline failed.') -``` +.. code-block::python + + self.logger.info('Bot start processing.') + self.logger.error('Pipeline failed.') + self.logger.exception('Pipeline failed.') + The `exception` method automatically appends an exception traceback. The logger instance writes by default to the file `/opt/intelmq/var/log/[bot-id].log` and to stderr. -##### String formatting in Logs +String formatting in Logs +^^^^^^^^^^^^^^^^^^^^^^^^^ Parameters for string formatting are better passed as argument to the log function, see https://docs.python.org/3/library/logging.html#logging.Logger.debug In case of formatting problems, the error messages will be better. For example: -```python -self.logger.debug('Connecting to %r.', host) -``` +.. code-block::python -### Error handling + self.logger.debug('Connecting to %r.', host) + +Error handling +============== The bot class itself has error handling implemented. The bot itself is allowed to throw exceptions and **intended to fail**! The bot should fail in case of malicious messages, and in case of unavailable but necessary resources. The bot class handles the exception and will restart until the maximum number of tries is reached and fail then. Additionally, the message in question is dumped to the file `/opt/intelmq/var/log/[bot-id].dump` and removed from the queue. -### Initialization +Initialization +============== Maybe it is necessary so setup a Cache instance or load a file into memory. Use the `init` function for this purpose: -```python -class ExampleParserBot(Bot): - def init(self): - try: - self.database = pyasn.pyasn(self.parameters.database) - except IOError: - self.logger.error("pyasn data file does not exist or could not be " - "accessed in '%s'." % self.parameters.database) - self.logger.error("Read 'bots/experts/asn_lookup/README.md' and " - "follow the procedure.") - self.stop() -``` - -### Custom configuration checks +.. code-block::python + + class ExampleParserBot(Bot): + def init(self): + try: + self.database = pyasn.pyasn(self.parameters.database) + except IOError: + self.logger.error("pyasn data file does not exist or could not be " + "accessed in '%s'." % self.parameters.database) + self.logger.error("Read 'bots/experts/asn_lookup/README.md' and " + "follow the procedure.") + self.stop() + +Custom configuration checks +=========================== Every bot can define a static method `check(parameters)` which will be called by `intelmqctl check`. For example the check function of the ASNLookupExpert: -```python - @staticmethod - def check(parameters): - if not os.path.exists(parameters.get('database', '')): - return [["error", "File given as parameter 'database' does not exist."]] - try: - pyasn.pyasn(parameters['database']) - except Exception as exc: - return [["error", "Error reading database: %r." % exc]] -``` +.. code-block::python + + @staticmethod + def check(parameters): + if not os.path.exists(parameters.get('database', '')): + return [["error", "File given as parameter 'database' does not exist."]] + try: + pyasn.pyasn(parameters['database']) + except Exception as exc: + return [["error", "Error reading database: %r." % exc]] -### Examples +Examples +======== -* Check [Expert Bots](https://github.com/certtools/intelmq/tree/develop/intelmq/bots/experts) -* Check [Parser Bots](https://github.com/certtools/intelmq/tree/develop/intelmq/bots/parsers) +* Check `Expert Bots `_ +* Check `Parser Bots `_ -### Parsers +Parsers +======= Parsers can use a different, specialized Bot-class. It allows to work on individual elements of a report, splitting the functionality of the parser into multiple functions: @@ -607,67 +630,70 @@ For common cases, like CSV, existing function can be used, reducing the amount o You can have a look at the implementation `intelmq/lib/bot.py` or at examples, e.g. the DummyBot in `intelmq/tests/lib/test_parser_bot.py`. This is a stub for creating a new Parser, showing the parameters and possible code: -```python -class MyParserBot(ParserBot): - - def parse(self, report): - """A generator yielding the single elements of the data. - - Comments, headers etc. can be processed here. Data needed by - `self.parse_line` can be saved in `self.tempdata` (list). - - Default parser yields stripped lines. - Override for your use or use an existing parser, e.g.: - parse = ParserBot.parse_csv - """ - for line in utils.base64_decode(report.get("raw")).splitlines(): - yield line.strip() - - def parse_line(self, line, report): - """A generator which can yield one or more messages contained in line. - - Report has the full message, thus you can access some metadata. - Override for your use. - """ - raise NotImplementedError - - def process(self): - self.tempdata = [] # temporary data for parse, parse_line and recover_line - self.__failed = [] - report = self.receive_message() - - for line in self.parse(report): - if not line: - continue - try: - # filter out None - events = list(filter(bool, self.parse_line(line, report))) - except Exception as exc: - self.logger.exception('Failed to parse line.') - self.__failed.append((exc, line)) - else: - self.send_message(*events) - - for exc, line in self.__failed: - self._dump_message(exc, self.recover_line(line)) - - self.acknowledge_message() +.. code-block::python + + class MyParserBot(ParserBot): + + def parse(self, report): + """A generator yielding the single elements of the data. + + Comments, headers etc. can be processed here. Data needed by + `self.parse_line` can be saved in `self.tempdata` (list). + + Default parser yields stripped lines. + Override for your use or use an existing parser, e.g.: + parse = ParserBot.parse_csv + """ + for line in utils.base64_decode(report.get("raw")).splitlines(): + yield line.strip() + + def parse_line(self, line, report): + """A generator which can yield one or more messages contained in line. + + Report has the full message, thus you can access some metadata. + Override for your use. + """ + raise NotImplementedError + + def process(self): + self.tempdata = [] # temporary data for parse, parse_line and recover_line + self.__failed = [] + report = self.receive_message() + + for line in self.parse(report): + if not line: + continue + try: + # filter out None + events = list(filter(bool, self.parse_line(line, report))) + except Exception as exc: + self.logger.exception('Failed to parse line.') + self.__failed.append((exc, line)) + else: + self.send_message(*events) + + for exc, line in self.__failed: + self._dump_message(exc, self.recover_line(line)) + + self.acknowledge_message() + + def recover_line(self, line): + """Reverse of parse for single lines. + + Recovers a fully functional report with only the problematic line. + """ + return '\n'.join(self.tempdata + [line]) + + + BOT = MyParserBot + +parse_line +---------- - def recover_line(self, line): - """Reverse of parse for single lines. - - Recovers a fully functional report with only the problematic line. - """ - return '\n'.join(self.tempdata + [line]) - - -BOT = MyParserBot -``` - -#### parse_line One line can lead to multiple events, thus `parse_line` can't just return one Event. Thus, this function is a generator, which allows to easily return multiple values. Use `yield event` for valid Events and `return` in case of a void result (not parseable line, invalid data etc.). -### Tests +Tests +===== In order to do automated tests on the bot, it is necessary to write tests including sample data. Have a look at some existing tests: @@ -678,48 +704,50 @@ In order to do automated tests on the bot, it is necessary to write tests includ Ideally an example contains not only the ideal case which should succeed, but also a case where should fail instead. (TODO: Implement assertEventNotEqual or assertEventNotcontainsSubset or similar) Most existing bots are only tested with one message. For newly written test it is appreciable to have tests including more then one message, e.g. a parser fed with an report consisting of multiple events. -```python -import unittest - -import intelmq.lib.test as test -from intelmq.bots.parsers.exampleparser.parser import ExampleParserBot # adjust bot class name and module - - -class TestExampleParserBot(test.BotTestCase, unittest.TestCase): # adjust test class name - """A TestCase for ExampleParserBot.""" - - @classmethod - def set_bot(cls): - cls.bot_reference = ExampleParserBot # adjust bot class name - cls.default_input_message = EXAMPLE_EVENT # adjust source of the example event (dict), by default an empty event or report (depending on bot type) - - # This is an example how to test the log output - def test_log_test_line(self): - """Test if bot does log example message.""" - self.run_bot() - self.assertRegexpMatches(self.loglines_buffer, - "INFO - Lorem ipsum dolor sit amet") - - def test_event(self): - """Test if correct Event has been produced.""" - self.run_bot() - self.assertMessageEqual(0, EXAMPLE_REPORT) - - -if __name__ == '__main__': # pragma: no cover +.. code-block::python + + import unittest + + import intelmq.lib.test as test + from intelmq.bots.parsers.exampleparser.parser import ExampleParserBot # adjust bot class name and module + + + class TestExampleParserBot(test.BotTestCase, unittest.TestCase): # adjust test class name + """A TestCase for ExampleParserBot.""" + + @classmethod + def set_bot(cls): + cls.bot_reference = ExampleParserBot # adjust bot class name + cls.default_input_message = EXAMPLE_EVENT # adjust source of the example event (dict), by default an empty event or report (depending on bot type) + + # This is an example how to test the log output + def test_log_test_line(self): + """Test if bot does log example message.""" + self.run_bot() + self.assertRegexpMatches(self.loglines_buffer, + "INFO - Lorem ipsum dolor sit amet") + + def test_event(self): + """Test if correct Event has been produced.""" + self.run_bot() + self.assertMessageEqual(0, EXAMPLE_REPORT) + + + if __name__ == '__main__': # pragma: no cover unittest.main() -``` When calling the file directly, only the tests in this file for the bot will be expected. Some default tests are always executed (via the `test.BotTestCase` class), such as pipeline and message checks, logging, bot naming or empty message handling. -See the [testing section](#testing) about how to run the tests. +See the :ref:`testing` section about how to run the tests. -### Configuration +Configuration +============= In the end, the new information about the new bot should be added to BOTS file located at `intelmq/bots`. Note that the file is sorted! -### Cache +Cache +===== Bots can use a Redis database as cache instance. Use the `intelmq.lib.utils.Cache` class to set this up and/or look at existing bots, like the `cymru_whois` expert how the cache can be used. Bots must set a TTL for all keys that are cached to avoid caches growing endless over time. @@ -730,7 +758,9 @@ The databases `<` 10 are reserved for the IntelMQ core: * 3: statistics * 4: tests -## Documentation +************* +Documentation +************* The documentation is automatically published to https://intelmq.readthedocs.io/ at every push to the repository. @@ -740,32 +770,40 @@ To build the documentation you need three packages: - `sphinx-markdown-tables` To install them, you can use pip: -```bash -pip3 install -r docs/requirements.txt -``` + +.. code-block:: bash + + pip3 install -r docs/requirements.txt Then use the Makefile to build the documentation using Sphinx: -```bash -cd docs -make html -``` -### Feeds documentation +.. code-block:: bash + + cd docs + make html + +Feeds documentation +=================== The feeds which are known to be working with IntelMQ are documented in the machine-readable file `intelmq/etc/feeds.yaml`. The human-readable documentation is in generated with the Sphinx build as described in the previous section. -## Testing Pre-releases +.. _testing: -### Installation +******************** +Testing Pre-releases +******************** + +Installation +============ The [installation procedures](INSTALL.md) needs to be adapted only a little bit. -For native packages, you can find the unstable packages of the next version here: [Installation Unstable Native Packages](https://software.opensuse.org/download.html?project=home%3Asebix%3Aintelmq%3Aunstable&package=intelmq). +For native packages, you can find the unstable packages of the next version here: `Installation Unstable Native Packages `_. For the installation with pip, use the `--pre` parameter as shown here following command: -```bash -pip3 install --pre intelmq -``` +.. code-block:: bash + + pip3 install --pre intelmq -All other steps are not different. Please report any issues you find in our [Issue Tracker](https://github.com/certtools/intelmq/issues/new). +All other steps are not different. Please report any issues you find in our `Issue Tracker `_. diff --git a/docs/dev/release-procedure.rst b/docs/dev/release-procedure.rst new file mode 100644 index 000000000..1c391e691 --- /dev/null +++ b/docs/dev/release-procedure.rst @@ -0,0 +1,129 @@ +################# +Release procedure +################# + +.. contents:: + +General assumption: You are working on branch maintenance, the next version is a bug fix release. For feature releases it is slightly different. + +************ +Check before +************ + +* Make sure the current state is really final ;) + You can test most of the steps described here locally before doing it real. +* Check the upgrade functions in `intelmq/lib/upgrades.py`. +* Close the milestone on GitHub and move any open issues to the next one. +* `docs/INSTALL.md`: Update supported operating systems. + +************* +Documentation +************* + + * CHANGELOG.MD and + * NEWS.MD: Update the latest header, fix the order, remove empty sections and (re)group the entries if necessary. + * `intelmq/version.py`: Update the version. + * `debian/changelog`: Insert a new section for the new version with the tool `dch`. + +Eventually adapt the default log levels if necessary. Should be INFO for stable releases. See older releases. + +****************************** +Commit, push, review and merge +****************************** + +Commit your changes in a separate branch, the final commit's message should start with :code:`REL:`. Push and create a pull request to maintenance and after that from maintenance to master. Someone else should review the changes. Eventually fix them, make sure the :code:`REL:` is the last commit, you can also push that one at last, after the reviews. + +Why a separate branch? Because if problems show up, you can still force-push to that one, keeping the release commit the latest one. + +*************** +Tag and release +*************** + +Tag the commit with `git tag -s version HEAD`, merge it into master, push the branches *and* the tag. The tag is just `a.b.c`, not prefixed with `v` (that was necessary only with SVN a long time ago...). + +Go to https://github.com/certtools/intelmq/tags and enter the release notes (from the CHANGELOG) for the new tag, then it's considered a *release* by GitHub. + +***************** +Tarballs and PyPI +***************** + +* Build the source and binary (wheel) distribution: `python3 setup.py sdist bdist_wheel` +* Upload the files including signatures to PyPI with e.g. twine: `twine upload -s dist/intelmq...` + +******** +Packages +******** + +We are currently using the public Open Build Service instance of openSUSE: http://build.opensuse.org/project/show/home:sebix:intelmq + +First, test all the steps first with the `unstable-repository `_ and check that at least installations succeed. + +* Create the tarballs with the script `create-archives.sh`. +* Update the dsc and spec files for new filenames and versions. +* Update the .changes file +* Build locally for all distributions. +* Commit. + +************* +Announcements +************* + +Announce the new version at the mailinglists intelmq-users, intelmq-dev. +For bigger releases, probably also at IHAP, Twitter, etc. Ask your favorite social media consultant. + +******************* +Prepare new version +******************* + +Increase the version in `intelmq/version.py` and declare it as alpha version. +Add the new version in `intelmq/lib/upgrades.py`. +Add a new entry in `debian/changelog` with `dch -v [version] -c debian/changelog`. + +Add new entries to `CHANGELOG.md` and `NEWS.md`. For `CHANGELOG.md`: + +.. code-block:: markdown + + ### Configuration + + ### Core + + ### Development + + ### Harmonization + + ### Bots + #### Collectors + + #### Parsers + + #### Experts + + #### Outputs + + ### Documentation + + ### Packaging + + ### Tests + + ### Tools + + ### Contrib + + ### Known issues + +And for `NEWS.md`: + +.. code-block:: markdown + + ### Requirements + + ### Tools + + ### Harmonization + + ### Configuration + + ### Libraries + + ### Postgres databases diff --git a/docs/guides/Data-Harmonization.md b/docs/guides/Data-Harmonization.md deleted file mode 100644 index bb3973acd..000000000 --- a/docs/guides/Data-Harmonization.md +++ /dev/null @@ -1,202 +0,0 @@ -# Data Harmonization - -## Table of Contents - -**Table of Contents:** -- [Overview](#overview) -- [Rules for keys](#rules-for-keys) -- [Sections](#sections) -- [Feed](#feed) -- [Time](#time) -- [Source Identity](#source-identity) - - [Source Geolocation Identity](#source-geolocation-identity) - - [Source Local Identity](#source-local-identity) -- [Destination Identity](#destination-identity) - - [Destination Geolocation Identity](#destination-geolocation-identity) - - [Destination Local Identity](#destination-local-identity) -- [Extra values](#extra-values) -- [Fields List and data types](#fields-list-and-data-types) -- [Classification](#classification) -- [Minimum recommended requirements for events](#minimum-recommended-requirements-for-events) - - -## Overview - -All messages (reports and events) are Python/JSON dictionaries. The key names and according types are defined by the so called *harmonization*. - -The purpose of this document is to list and clearly define known **fields** in Abusehelper as well as IntelMQ or similar systems. A field is a ```key=value``` pair. For a clear and unique definition of a field, we must define the **key** (field-name) as well as the possible **values**. A field belongs to an **event**. An event is basically a structured log record in the form ```key=value, key=value, key=value, …```. In the [List of known fields](Harmonization-fields.md), each field is grouped by a **section**. We describe these sections briefly below. -Every event **MUST** contain a timestamp field. - -An [IOC](https://en.wikipedia.org/wiki/Indicator_of_compromise) (Indicator of compromise) is a single observation like a log line. - -## Rules for keys - -The keys can be grouped together in sub-fields, e.g. `source.ip` or `source.geolocation.latitude`. Thus, keys must match `^[a-z_](.[a-z0-9_]+)*$`. - - -## Sections - -As stated above, every field is organized under some section. The following is a description of the sections and what they imply. - -### Feed - -Fields listed under this grouping list details about the source feed where information came from. - -### Time - -The time section lists all fields related to time information. -This document requires that all the timestamps MUST be normalized to UTC. If the source reports only a date, do not attempt to invent timestamps. - -### Source Identity - -This section lists all fields related to identification of the source. The source is the identity the IoC is about, as opposed to the destination identity, which is another identity. - -For examples see the table below. - -The abuse type of an event defines the way these events needs to be interpreted. For example, for a botnet drone they refer to the compromised machine, whereas for a command and control server they refer the server itself. - -#### Source Geolocation Identity - -We recognize that ip geolocation is not an exact science and analysis of the abuse data has shown that different sources attribution sources have different opinions of the geolocation of an ip. This is why we recommend to enrich the data with as many sources as you have available and make the decision which value to use for the cc IOC based on those answers. - -#### Source Local Identity - -Some sources report an internal (NATed) IP address. - -### Destination Identity - -The abuse type of an event defines the way these IOCs needs to be interpreted. For a botnet drone they refer to the compromised machine, whereas for a command and control server they refer the server itself. - -#### Destination Geolocation Identity - -We recognize that ip geolocation is not an exact science and analysis of the abuse data has shown that different sources attribution sources have different opinions of the geolocation of an ip. This is why we recommend to enrich the data with as many sources as you have available and make the decision which value to use for the cc IOC based on those answers. - -#### Destination Local Identity - -Some sources report an internal (NATed) IP address. - -### Extra values -Data which does not fit in the harmonization can be saved in the 'extra' namespace. All keys must begin with `extra.`, there are no other rules on key names and values. The values can be get/set like all other fields. - -## Fields List and data types - -A list of allowed fields and data types can be found in [Harmonization-fields](Harmonization-fields.md) page. - -## Classification - -IntelMQ classifies events using three labels: taxonomy, type and identifier. This tuple of three values can be used for deduplication of events and describes what happened. -TODO: examples from chat - -The taxonomy can be automatically added by the taxonomy expert bot based on the given type. The following taxonomy-type mapping is based on [eCSIRT II Taxonomy](https://www.trusted-introducer.org/Incident-Classification-Taxonomy.pdf): - -|Taxonomy|Type|Description| -|--------|----|-----------| -|abusive content|spam|Or 'Unsolicited Bulk Email', this means that the recipient has not granted verifiable permission for the message to be sent and that the message is sent as part of a larger collection of messages, all having a functionally comparable content.| -|abusive-content|harmful-speech|Discreditation or discrimination of somebody, e.g. cyber stalking, racism or threats against one or more individuals.| -|abusive-content|violence|Child pornography, glorification of violence, etc.| -|availability|ddos|Distributed Denial of Service attack, e.g. SYN-Flood or UDP-based reflection/amplification attacks.| -|availability|dos|Denial of Service attack, e.g. sending specially crafted requests to a web application which causes the application to crash or slow down.| -|availability|outage|Outage caused e.g. by air condition failure or natural disaster.| -|availability|sabotage|Physical sabotage, e.g cutting wires or malicious arson.| -|fraud|copyright|Offering or Installing copies of unlicensed commercial software or other copyright protected materials (Warez).| -|fraud|masquerade|Type of attack in which one entity illegitimately impersonates the identity of another in order to benefit from it.| -|fraud|phishing|Masquerading as another entity in order to persuade the user to reveal private credentials.| -|fraud|unauthorized-use-of-resources|Using resources for unauthorized purposes including profit-making ventures, e.g. the use of e-mail to participate in illegal profit chain letters or pyramid schemes.| -|information content security|Unauthorised-information-access|Unauthorized access to information, e.g. by abusing stolen login credentials for a system or application, intercepting traffic or gaining access to physical documents.| -|information content security|Unauthorised-information-modification|Unauthorised modification of information, e.g. by an attacker abusing stolen login credentials for a system or application or a ransomware encrypting data.| -|information content security|data-loss|Loss of data, e.g. caused by harddisk failure or physical theft.| -|information content security|dropzone|This IOC refers to place where the compromised machines store the stolen user data. Not in ENISA eCSIRT-II taxonomy.| -|information content security|leak|IOCs relating to leaked credentials or personal data. Not in ENISA eCSIRT-II taxonomy.| -|information gathering|scanner|Attacks that send requests to a system to discover weaknesses. This also includes testing processes to gather information on hosts, services and accounts. Examples: fingerd, DNS querying, ICMP, SMTP (EXPN, RCPT, ...), port scanning.| -|information-gathering|sniffing|Observing and recording of network traffic (wiretapping).| -|information-gathering|social-engineering|Gathering information from a human being in a non-technical way (e.g. lies, tricks, bribes, or threats). This IOC refers to a resource, which has been observed to perform brute-force attacks over a given application protocol.| -|intrusion attempts|brute-force|Multiple login attempts (Guessing / cracking of passwords, brute force).| -|intrusion attempts|exploit|An attack using an unknown exploit.| -|intrusion attempts|ids-alert|IOCs based on a sensor network. This is a generic IOC denomination, should it be difficult to reliably denote the exact type of activity involved for example due to an anecdotal nature of the rule that triggered the alert.| -|intrusions|application-compromise|Compromise of an application by exploiting (un)known software vulnerabilities, e.g. SQL injection.| -|intrusions|backdoor|This refers to hosts, which have been compromised and backdoored with a remote administration software or Trojan in the traditional sense. Not in ENISA eCSIRT-II taxonomy.| -|intrusions|burglary|Physical intrusion, e.g. into corporate building or data center.| -|intrusions|compromised|This IOC refers to compromised system. Not in ENISA eCSIRT-II taxonomy.| -|intrusions|defacement|This IOC refers to hacktivism related activity. Not in ENISA eCSIRT-II taxonomy.| -|intrusions|privileged-account-compromise|Compromise of a system where the attacker gained administrative privileges.| -|intrusions|unauthorized-command|The possibly infected device sent unauthorized commands to a remote device with malicious intent. Not in ENISA eCSIRT-II taxonomy.| -|intrusions|unauthorized-login|A possibly infected device logged in to a remote device without authorization. Not in ENISA eCSIRT-II taxonomy.| -|intrusions|unprivileged-account-compromise|Compromise of a system using an unprivileged (user/service) account.| -|malicious code|c2server|This is a command and control server in charge of a given number of botnet drones.| -|malicious code|dga domain|DGA Domains are seen various families of malware that are used to periodically generate a large number of domain names that can be used as rendezvous points with their command and control servers. Not in ENISA eCSIRT-II taxonomy.| -|malicious code|infected-system|This is a compromised machine, which has been observed to make a connection to a command and control server.| -|malicious code|malware|A URL is the most common resource with reference to malware binary distribution. Not in ENISA eCSIRT-II taxonomy.| -|malicious code|malware-configuration|This is a resource which updates botnet drones with a new configuration.| -|malicious code|malware-distribution|URI used for malware distribution, e.g. a download URL included in fake invoice malware spam.| -|malicious code|ransomware|This IOC refers to a specific type of compromised machine, where the computer has been hijacked for ransom by the criminals. Not in ENISA eCSIRT-II taxonomy and deprecated, use 'infected system instead'.| -|other|blacklist|Some sources provide blacklists, which clearly refer to abusive behavior, such as spamming, but fail to denote the exact reason why a given identity has been blacklisted. The reason may be that the justification is anecdotal or missing entirely. This type should only be used if the typing fits the definition of a blacklist, but an event specific denomination is not possible for one reason or another.| -|other|other|All incidents which don't fit in one of the given categories should be put into this class.| -|other|proxy|This refers to the use of proxies from inside your network. Not in ENISA eCSIRT-II taxonomy.| -|other|tor|This IOC refers to incidents related to TOR network infrastructure. Not in ENISA eCSIRT-II taxonomy.| -|other|unknown|Unknown classification. Not in ENISA eCSIRT-II taxonomy.| -|test|test|Meant for testing.| -|vulnerable|ddos-amplifier|Publicly accessible services that can be abused for conducting DDoS reflection/amplification attacks, e.g. DNS open-resolvers or NTP servers with monlist enabled.| -|vulnerable|information-disclosure|Publicly accessible services potentially disclosing sensitive information, e.g. SNMP or Redis.| -|vulnerable|potentially-unwanted-accessible|Potentially unwanted publicly accessible services, e.g. Telnet, RDP or VNC.| -|vulnerable|vulnerable client|This attribute refers to a badly configured or vulnerable clients, which may be vulnerable and can be compromised by a third party. For example, not-up-to-date clients or client which are misconfigured, such as clients querying public domains for WPAD configurations. In addition, to specify the vulnerability and its potential abuse, one should use the classification.identifier, description and other attributes for that purpose respectively. Not in ENISA eCSIRT-II taxonomy.| -|vulnerable|vulnerable service|This attribute refers to a badly configured or vulnerable network service, which may be abused by a third party. For example, these services relate to open proxies, open dns resolvers, network time servers (NTP) or character generation services (chargen), simple network management services (SNMP). In addition, to specify the network service and its potential abuse, one should use the protocol, destination port and description attributes for that purpose respectively. Not in ENISA eCSIRT-II taxonomy.| -|vulnerable|vulnerable-system|A system which is vulnerable to certain attacks. Example: misconfigured client proxy settings (example: WPAD), outdated operating system version, etc.| -|vulnerable|weak-crypto|Publicly accessible services offering weak crypto, e.g. web servers susceptible to POODLE/FREAK attacks.| - -Meaning of source, destination and local values for each classification type and possible identifiers. The identifier is often a normalized malware name, grouping many variants. - -|Type|Source|Destination|Local|Possible identifiers| -|----|------|-----------|-----|--------------------| -|backdoor|*backdoored device*|||| -|blacklist|*blacklisted device*|||| -|brute-force|*attacker*|target||| -|c2server|*(sinkholed) c&c server*|||zeus, palevo, feodo| -|compromised|*server*|||| -|ddos|*attacker*|target||| -|defacement|*defaced website*|||| -|dga domain|*infected device*|||| -|dropzone|*server hosting stolen data*|||| -|exploit|*hosting server*|||| -|ids-alert|*triggering device*|||| -|infected system|*infected device*|*contacted c2c server*||| -|malware|*infected device*||internal at source|zeus, palevo, feodo| -|malware configuration|*infected device*|||| -|other|||||| -|phishing|*phishing website*|||| -|proxy|*server allowing policy and security bypass*|||| -|ransomware|*infected device*|||| -|scanner|*scanning device*|scanned device||http,modbus,wordpress| -|spam|*infected device*|targeted server|internal at source|| -|test|||||| -|unknown|||||| -|vulnerable service|*vulnerable device*||| heartbleed, openresolver, snmp | -|vulnerable client|*vulnerable device*||| wpad | - -Field in italics is the interesting one for CERTs. - -Example: - -If you know of an IP address that connects to a zeus c&c server, it's about the infected device, thus type malware and identifier zeus. If you want to complain about the c&c server, it's type c&c and identifier zeus. The `malware.name` can have the full name, eg. 'zeus_p2p'. - -## Minimum recommended requirements for events - -Below, we have enumerated the minimum recommended requirements for an actionable abuse event. These keys should to be present for the abuse report to make sense for the end recipient. Please note that if you choose to anonymize your sources, you can substitute **feed** with **feed.code** and that only one of the identity keys **ip**, **domain name**, **url**, **email address** must be present. All the rest of the keys are **optional**. - -|Category|Key|Terminology| -|--------|---|-----------| -|Feed|feed|Should| -|Classification|classification.type|Should| -|Classification|classification.taxonomy|Should| -|Time|time.source|Should| -|Time|time.observation|Should| -|Identity|source.ip|Should*| -|Identity|source.fqdn|Should*| -|Identity|source.url|Should*| -|Identity|source.account|Should*| - -* only one of them - -This list of required fields is *not* enforced by IntelMQ. - -**NOTE:** This document was copied from [AbuseHelper repository](https://github.com/abusesa/abusehelper/) and improved. - diff --git a/docs/guides/Ecosystem.md b/docs/guides/Ecosystem.md deleted file mode 100644 index eb70ac3ed..000000000 --- a/docs/guides/Ecosystem.md +++ /dev/null @@ -1,67 +0,0 @@ -# IntelMQ Ecosystem - -IntelMQ is more than a the core library itself and many programs are developed around in the IntelMQ initiative. -This document provides an overview of the ecosystem and all related tools. If you think something is missing, please let us know! - -## IntelMQ "Core" - -This is IntelMQ itself, as it is available on [github](https://github.com/certtools/intelmq). - -It includes all the bots, the harmonization, etc. - -## IntelMQ Manager - -The Manager is the most known software and can be seen as the face of IntelMQ. -This software provides a graphical user interface to the management tool `intelmqctl`. - -→ [Github repo of the manager](https://github.com/certtools/intelmq-manager/) - -## EventDB - -This is not a software itself but listed here because the term it is often mentioned. - -The EventDB is a (usually PostgreSQL) database with data from intelmq. - -For some related scripts see the [contrib/eventdb](https://github.com/certtools/intelmq/tree/develop/contrib/eventdb) directory and the [eventdb-stats](https://github.com/wagner-certat/eventdb-stats) repository for simple statistics generation. - -## intelmq-webinput-csv - -A web-based interface to inject CSV data into IntelMQ with on-line validation and live feedback. - -→ [Repository: intelmq-webinput-csv](http://github.com/certat/intelmq-webinput-csv/) - -## intelmq-mailgen - -A solution to send grouped notifications to network owners using SMTP/OTRS. - -→ [Repository: intelmq-mailgen](https://github.com/Intevation/intelmq-mailgen) - -## IntelMQ Fody + Backend - -Fody is an interface for intelmq-mailgen's contact database, it's OTRS and the EventDB. -The certbund-contact expert fetches the information from this contact database and provides scripts to import RIPE data into the contact database. - -* →[Repository: intelmq-fody](https://github.com/Intevation/intelmq-fody/) -* →[Repository: intelmq-fody-backend](https://github.com/Intevation/intelmq-fody-backend/) -* →[Repository: intelmq-certbund-contact](https://github.com/Intevation/intelmq-certbund-contact/) - -## "Constituency Portal" do-portal (not developed any further) - -*Note:* A new version is being developed from scratch, see [do-portal#133](https://github.com/certat/do-portal/issues/133) for more information. - -A contact portal with organizational hierarchies, role functionality and network objects based on RIPE, allows self-administration by the contacts. -Can be queried from IntelMQ and integrates the stats-portal. - -→ [Repository: do-portal](https://github.com/certat/do-portal/) - -## stats-portal - -A Grafana-based statistics portal for the EventDB. Integrated in do-portal. - -→ [Repository: stats-portal](https://github.com/certtools/stats-portal/) - -## Malware Name Mapping - -A mapping for malware names of different feeds with different names to a common family name. - -→ [Repository: malware_name_mapping](https://github.com/certtools/malware_name_mapping) diff --git a/docs/guides/Feeds-whishlist.md b/docs/guides/Feeds-whishlist.md deleted file mode 100644 index cb07435e5..000000000 --- a/docs/guides/Feeds-whishlist.md +++ /dev/null @@ -1,73 +0,0 @@ -# Feeds whishlist - -This is a list with various feeds, which are either currently not supported or the usage is not clearly documented in IntelMQ. - -If you want to **contribute** documenting how to configure existing bots in order to collect new feeds or by creating new parsers, here is a list of potentially interesting feeds. -See [Feeds documentation](Developers-Guide.html#feeds-documentation) for more information on this. - -This list evolved from the issue [Contribute: Feeds List (#384)](https://github.com/certtools/intelmq/issues/384). - -Lists of feeds: -- [threatfeeds.io](https://threatfeeds.io/) -- [TheCyberThreat](http://thecyberthreat.com/cyber-threat-intelligence-feeds/) - -Some third party intelmq bots: [NRDCS' IntelMQ fork](https://github.com/NRDCS/intelmq/tree/certlt/intelmq/bots) - -List of potentially interesting data sources: -- [Abuse.ch SSL Blacklists](https://sslbl.abuse.ch/blacklist/) -- [Adblock Plus Malwaredomains](https://easylist-msie.adblockplus.org/malwaredomains_full.tpl) -- [apivoid IP Reputation API](https://www.apivoid.com/api/ip-reputation/) -- [APWG's ecrimex](https://www.ecrimex.net) (private) -- [Berkeley](https://security.berkeley.edu/services/aggressive-ip-distribution-aid-list) -- [Binary Defense](https://security.berkeley.edu/services/aggressive-ip-distribution-aid-list) -- [Binary Defense](https://www.binarydefense.com/) -- [Bot Invaders Realtime tracker](http://www.marc-blanchard.com/BotInvaders/index.php) -- [Botscout Last Caught](http://botscout.com/last_caught_cache.htm) -- [Carbon Black Feeds](https://github.com/carbonblack/cbfeeds) -- [CERT.pl Phishing Warning List](http://hole.cert.pl/domains/) -- [Chaos Reigns](http://www.chaosreigns.com/spam/) -- [Critical Stack](https://intel.criticalstack.com) -- [Cruzit](http://www.cruzit.com/xwbl2txt.php) -- [Cyber Crime Tracker](http://cybercrime-tracker.net/all.php) -- [DNS DB API](https://api.dnsdb.info) -- [Facebook Threat Exchange](https://developers.facebook.com/docs/threat-exchange) -- [FilterLists](https://filterlists.com) -- [Firehol IPLists](https://iplists.firehol.org/) -- [Google Webmaster Alerts](https://www.google.com/webmasters/) -- [GPF Comics DNS Blacklist](https://www.gpf-comics.com/dnsbl/export.php) -- [Greensnow](https://blocklist.greensnow.co/greensnow.txt) -- [HP Feeds](https://github.com/rep/hpfeeds) (not a feed, but a feed format) -- [IBM X-Force Exchange](https://exchange.xforce.ibmcloud.com/) -- [ISC SANS](https://isc.sans.edu/ipsascii.html) -- [ISightPartners](http://www.isightpartners.com/) -- [Malshare](https://malshare.com/) -- [Malware Config](http://malwareconfig.com) -- [Malware DB (cert.pl)](https://mwdb.cert.pl/) (private) -- [MalwareDomainList](http://www.malwaredomainlist.com/zeuscsv.php) -- [MalwareDomains](http://www.malwaredomainlist.com/hostslist/yesterday_urls.php) -- [MalwareIntelligence](https://malwareint.com/threatintelligence.php) -- [Manity Spam IP addresses](http://www.dnsbl.manitu.net/download/nixspam-ip.dump.gz) -- [Marc Blanchard DGA Domains](http://www.marc-blanchard.com/BotInvaders/index.php) -- [MaxMind Proxies](https://www.maxmind.com/en/anonymous_proxies) -- [mIRC Servers](http://www.mirc.com/servers.ini) -- [Monzymerza](https://github.com/monzymerza/parthenon) -- [Multiproxy](http://multiproxy.org/txt_all/proxy.txt) -- [OpenBugBounty](https://www.openbugbounty.org/) -- [Payload Security](http://payload-security.com) -- [Project Honeypot](http://www.projecthoneypot.org/list_of_ips.php?rss=1) ([#284](https://github.com/certtools/intelmq/issues/284)) -- [ShadowServer Sandbox API](http://www.shadowserver.org/wiki/pmwiki.php/Services/Sandboxapi) as expert bot (private) -- [Shodan search API](https://shodan.readthedocs.io/en/latest/tutorial.html#searching-shodan) -- [Snort IP Blacklist feed](http://talosintel.com/feeds/ip-filter.blf) (see also [this blogpost](https://blog.snort.org/2015/09/ip-blacklist-feed-has-moved-locations.html) -- [Spamhaus Botnet Controller List (BCL)](https://www.spamhaus.org/bgpf/) (private) -- [SteveBlack Hosts File](https://github.com/StevenBlack/hosts) -- [The Haleys](http://charles.the-haleys.org/ssh_dico_attack_hdeny_format.php/hostsdeny.txt) -- [Threat Crowd](https://www.threatcrowd.org/feeds/hashes.txt) -- [Threatstream](https://ui.threatstream.com/) (private) -- [TOR Project Exit addresses](https://check.torproject.org/exit-addresses) -- [TotalHash](http://totalhash.com) as expert bot -- [UCE Protect](http://wget-mirrors.uceprotect.net/) -- [URI BL](http://rss.uribl.com/index.shtml) -- [Virustotal Search](https://www.virustotal.com/gui/home/search) as expert bot -- [virustream](https://github.com/ntddk/virustream) (unmaintained?) -- [VoIP Blacklist](http://www.voipbl.org/update/) -- [YourCMC](http://vmx.yourcmc.ru/BAD_HOSTS.IP4) diff --git a/docs/guides/INSTALL.md b/docs/guides/INSTALL.md deleted file mode 100644 index 679ff66f9..000000000 --- a/docs/guides/INSTALL.md +++ /dev/null @@ -1,128 +0,0 @@ -# Installation -**Table of Contents:** - -- [Requirements](#requirements) -- [Install Dependencies](#install-dependencies) - - [Ubuntu / Debian](#ubuntu--debian) - - [CentOS 7 / RHEL 7](#centos-7--rhel-7) - - [openSUSE Leap 15.1](#opensuse-leap-151) -- [Installation](#installation) - - [Native Packages](#native-packages) - - [PyPi](#pypi) -- [Additional Information](#additional-information) -- [Afterwards](#afterwards) - - -Please report any errors you encounter at https://github.com/certtools/intelmq/issues - -For upgrade instructions, see [UPGRADING](UPGRADING.md). -For setting up a development environment see the [Developer's Guide](Developers-Guide.html#development-environment) section *Development Environment*. -For testing pre-releases see also the [Developer's Guide](Developers-Guide.html#testing-pre-releases) section *Testing Pre-releases*. - -## Requirements - -The following instructions assume the following requirements. Python versions >= 3.5 are supported. - -Supported and recommended operating systems are: -* CentOS 7 -* Debian 9 and 10 -* OpenSUSE Leap 15.1, 15.2 -* Ubuntu: 16.04, 18.04, 20.04 - -Other distributions which are (most probably) supported include CentOS 8, RHEL, Fedora and openSUSE Tumbleweed. - -## Install Dependencies - -If you are using native packages, you can simply skip this section as all dependencies are installed automatically. - -### Ubuntu / Debian - -```bash -apt install python3-pip python3-dnspython python3-psutil python3-redis python3-requests python3-termstyle python3-tz python3-dateutil -apt install redis-server -``` - -Optional dependencies: -```bash -apt install bash-completion jq -apt install python3-sleekxmpp python3-pymongo python3-psycopg2 -``` - -### CentOS 7 / RHEL 7 - -```bash -yum install epel-release -yum install python36 python36-devel python36-requests -yum install gcc gcc-c++ -yum install redis -``` - -### openSUSE 15.1 - -```bash -zypper install python3-dateutil python3-dnspython python3-psutil python3-pytz python3-redis python3-requests python3-python-termstyle -zypper install redis -``` - -Optional dependencies: -```bash -zypper in bash-completion jq -zypper in python3-psycopg2 python3-pymongo python3-sleekxmpp -``` - -## Installation - -Installation methods available: - -* native packages (`.deb`, `.rpm`) -* PyPi (latest releases as python package) - -**Note:** installation for development purposes must follow the instructions available on [Developers Guide](Developers-Guide.html#development-environment). - -### Native Packages - -Supported Operating Systems: - -* **CentOS 7** (requires `epel-release`) -* **Debian 8** (requires `python3-typing`) -* **Debian 9** -* **Debian 10** -* **Fedora 29** -* **Fedora 30** -* **RHEL 7** (requires `epel-release`) -* **openSUSE Leap 15.0** -* **openSUSE Leap 15.1** -* **openSUSE Tumbleweed** -* **Ubuntu 16.04** (enable the universe repositories by appending ` universe` in `/etc/apt/sources.list` to `deb http://[...].archive.ubuntu.com/ubuntu/ xenial main`) -* **Ubuntu 18.04** (enable the universe repositories by appending ` universe` in `/etc/apt/sources.list` to `deb http://[...].archive.ubuntu.com/ubuntu/ bionic main`) -* **Ubuntu 19.10** (enable the universe repositories by appending ` universe` in `/etc/apt/sources.list` to `deb http://[...].archive.ubuntu.com/ubuntu/ eoan main`) -* **Ubuntu 20.04** (enable the universe repositories by appending ` universe` in `/etc/apt/sources.list` to `deb http://[...].archive.ubuntu.com/ubuntu/ focal main`) - -Get the installation instructions for your operating system here: [Installation Native Packages](https://software.opensuse.org/download.html?project=home%3Asebix%3Aintelmq&package=intelmq). -To import the key on Debian and Ubuntu, use: -```bash -curl https://build.opensuse.org/projects/home:sebix:intelmq/public_key | sudo apt-key add - -``` - -Please report any errors or improvements at [IntelMQ Issues](https://github.com/certtools/intelmq/issues). Thanks! - -### PyPi - -```bash -sudo -i - -pip3 install intelmq - -useradd -d /opt/intelmq -U -s /bin/bash intelmq -sudo intelmqsetup -``` -`intelmqsetup` will create all necessary directories, provides a default configuration for new setups. See [the user-guide section on paths](User-Guide.html#opt-and-lsb-paths) for more information on them and how to influence them. - -### Additional Information - -Following any one of the installation methods mentioned before, will setup the IntelMQ base. However, some bots may have additional dependencies which are mentioned in their own documentation available on in the [Bots documentation](Bots.md). - - -## Afterwards - -Now continue with the [User Guide](User-Guide.md). diff --git a/docs/guides/README.md b/docs/guides/README.md deleted file mode 100644 index 8fd2d956c..000000000 --- a/docs/guides/README.md +++ /dev/null @@ -1,126 +0,0 @@ -Welcome to IntelMQ! -=================== - -![IntelMQ](/_static/Logo_Intel_MQ.png) - -[![Build Status](https://travis-ci.org/certtools/intelmq.svg?branch=master)](https://travis-ci.org/certtools/intelmq) -[![codecov.io](https://codecov.io/github/certtools/intelmq/coverage.svg?branch=master)](https://codecov.io/github/certtools/intelmq?branch=master) - -**IntelMQ** is a solution for IT security teams (CERTs & CSIRTs, SOCs abuse -departments, etc.) for collecting and processing security feeds (such as -log files) using a message queuing protocol. It's a community driven -initiative called **IHAP** (Incident Handling Automation Project) which -was conceptually designed by European CERTs/CSIRTs during several -InfoSec events. Its main goal is to give to incident responders an easy -way to collect & process threat intelligence thus improving the incident -handling processes of CERTs. - -Several pieces of software are evolved around IntelMQ. For an overview, -look at the [Ecosystem document](Ecosystem.md). - -IntelMQ can be used for -- automated incident handling -- situational awareness -- automated notifications -- as data collector for other tools -- etc. - -IntelMQ's design was influenced by -[AbuseHelper](https://github.com/abusesa/abusehelper) -however it was re-written from scratch and aims at: - -- Reducing the complexity of system administration -- Reducing the complexity of writing new bots for new data feeds -- Reducing the probability of events lost in all process with - persistence functionality (even system crash) -- Use and improve the existing Data Harmonization Ontology -- Use JSON format for all messages -- Provide easy way to store data into Log Collectors like - ElasticSearch, Splunk, databases (such as PostgreSQL) -- Provide easy way to create your own black-lists -- Provide easy communication with other systems via HTTP RESTful API - -It follows the following basic meta-guidelines: - -- Don't break simplicity - KISS -- Keep it open source - forever -- Strive for perfection while keeping a deadline -- Reduce complexity/avoid feature bloat -- Embrace unit testing -- Code readability: test with unexperienced programmers -- Communicate clearly - -Table of Contents ------------------ - -1. [How to Install](#how-to-install) -2. [Developers Guide](#developers-guide) -3. [User Guide](#user-guide) -4. [IntelMQ Manager](#intelmq-manager) -5. [Incident Handling Automation Project](#incident-handling-automation-project) -6. [Data Harmonization](#data-harmonization) -7. [How to Participate](#how-to-participate) -8. [Licence](#licence) -9. [Funded by](#funded-by) - -How to Install --------------- - -See [INSTALL](INSTALL.md). - -Developers Guide ----------------- - -See [Developers Guide](Developers-Guide.md). - -User Guide ----------------- - -See [User Guide](User-Guide.md). -Which Feeds are supported? Have a look at the [Feeds documentation](Feeds.md) and the [Bots documentation](Bots.md). -If you know additional feeds and how to parse them, please contribute your code or your configuration (by issues or the mailing lists). - -For support questions please use the intelmq-users mailing list: - -IntelMQ Manager ---------------- - -Check out this graphical -[tool](https://github.com/certtools/intelmq-manager) and easily manage -an IntelMQ system. - -Incident Handling Automation Project ------------------------------------- - -- **URL:** - -- **Mailing-list:** - -Data Harmonization ------------------- - -IntelMQ use the Data Harmonization. Please read [this document](Data-Harmonization.md) for more details. - -How to participate ------------------- - -- Subscribe to the Intelmq-dev Mailing list: - (for - developers) -- Watch out for our regular developers conf call -- IRC: server: irc.freenode.net, channel: \#intelmq -- Via github issues -- Via Pull requests (please do read help.github.com first) - -Licence -------- - -This software is licensed under GNU Affero General Public License -version 3 - -Funded by ---------- - -This project was partially funded by the CEF framework - -![Co-financed by the Connecting Europe Facility of the European Union](images/cef_logo.png) diff --git a/docs/guides/Release.md b/docs/guides/Release.md deleted file mode 100644 index df2769b9e..000000000 --- a/docs/guides/Release.md +++ /dev/null @@ -1,117 +0,0 @@ -# Release procedure - -**Table of Contents:** -- [Documentation](#documentation) -- [Commit, push, review and merge](#commit-push-review-and-merge) -- [Tag and release](#tag-and-release) -- [Tarballs and PyPI](#tarballs-and-pypi) -- [Packages](#packages) -- [Announcements](#announcements) -- [Prepare new version](#prepare-new-version) - - -General assumption: You are working on branch maintenance, the next version is a bug fix release. For feature releases it is slightly different. - -## Check before - - * Make sure the current state is really final ;) - You can test most of the steps described here locally before doing it real. - * Check the upgrade functions in `intelmq/lib/upgrades.py`. - * Close the milestone on GitHub and move any open issues to the next one. - * `docs/INSTALL.md`: Update supported operating systems. - -## Documentation - - * CHANGELOG.MD and - * NEWS.MD: Update the latest header, fix the order, remove empty sections and (re)group the entries if necessary. - * `intelmq/version.py`: Update the version. - * `debian/changelog`: Insert a new section for the new version with the tool `dch`. - -Eventually adapt the default log levels if necessary. Should be INFO for stable releases. See older releases. - -## Commit, push, review and merge - -Commit your changes in a separate branch, the final commit's message should start with `REL: `. Push and create a pull request to maintenance and after that from maintenance to master. Someone else should review the changes. Eventually fix them, make sure the `REL: ` is the last commit, you can also push that one at last, after the reviews. - -Why a separate branch? Because if problems show up, you can still force-push to that one, keeping the release commit the latest one. - -## Tag and release - -Tag the commit with `git tag -s version HEAD`, merge it into master, push the branches *and* the tag. The tag is just `a.b.c`, not prefixed with `v` (that was necessary only with SVN a long time ago...). - -Go to https://github.com/certtools/intelmq/tags and enter the release notes (from the CHANGELOG) for the new tag, then it's considered a *release* by GitHub. - -## Tarballs and PyPI - - * Build the source and binary (wheel) distribution: `python3 setup.py sdist bdist_wheel` - * Upload the files including signatures to PyPI with e.g. twine: `twine upload -s dist/intelmq...` - -## Packages -We are currently using the public Open Build Service instance of openSUSE: http://build.opensuse.org/project/show/home:sebix:intelmq - -First, test all the steps first with the [unstable-repository](http://build.opensuse.org/project/show/home:sebix:intelmq:unstable) and check that at least installations succeed. - - * Create the tarballs with the script `create-archives.sh`. - * Update the dsc and spec files for new filenames and versions. - * Update the .changes file - * Build locally for all distributions. - * Commit. - -## Announcements - -Announce the new version at the mailinglists intelmq-users, intelmq-dev. -For bigger releases, probably also at IHAP, Twitter, etc. Ask your favorite social media consultant. - -## Prepare new version - -Increase the version in `intelmq/version.py` and declare it as alpha version. -Add the new version in `intelmq/lib/upgrades.py`. -Add a new entry in `debian/changelog` with `dch -v [version] -c debian/changelog`. - -Add new entries to `CHANGELOG.md` and `NEWS.md`. For `CHANGELOG.md`: - -``` -### Configuration - -### Core - -### Development - -### Harmonization - -### Bots -#### Collectors - -#### Parsers - -#### Experts - -#### Outputs - -### Documentation - -### Packaging - -### Tests - -### Tools - -### Contrib - -### Known issues -``` -And for `NEWS.md`: - -``` -### Requirements - -### Tools - -### Harmonization - -### Configuration - -### Libraries - -### Postgres databases -``` diff --git a/docs/guides/UPGRADING.md b/docs/guides/UPGRADING.md deleted file mode 100644 index ca9d825c9..000000000 --- a/docs/guides/UPGRADING.md +++ /dev/null @@ -1,80 +0,0 @@ -# Upgrade instructions - -For installation instructions, see [INSTALL](INSTALL.md). - -**Table of Contents:** -- [Stop IntelMQ and Backup](#stop-intelmq-and-backup) -- [Upgrade IntelMQ](#upgrade-intelmq) - - [Packages](#packages) - - [PyPi](#pypi) - - [Local repository](#local-repository) -- [Check the installation](#check-the-installation) -- [Redefine/Check permissions](#redefinecheck-permissions) -- [Start IntelMQ](#start-intelmq) - - -## Read NEWS - -Read the [NEWS](https://github.com/certtools/intelmq/blob/develop/NEWS.md) file to look for things you need to have a look at. - -## Stop IntelMQ and Backup - -* Make sure that your IntelMQ system is completely stopped: `intelmqctl stop` -* Create a backup of IntelMQ Home directory, which includes all configurations. They are not overwritten, but backups are always nice to have! - -```bash -> sudo cp -R /opt/intelmq /opt/intelmq-backup -``` - -## Upgrade IntelMQ - -Before upgrading, check that your setup is clean and there are no events in the queues: -```bash -intelmqctl check -intelmqctl list queues -q -``` - -The upgrade depends on how you installed IntelMQ. - -### Packages - -Use your systems package management. - -### PyPi - -``` -pip install -U --no-deps intelmq -sudo intelmqsetup -``` -Using `--no-deps` will not upgrade dependencies, which would probably overwrite the system's libraries. -Remove this option to also upgrade dependencies. - -### Local repository - -If you have an editable installation, refer to the instructions in the [Developers Guide](Developers-Guide.html#development-environment). - -Update the repository depending on your setup (e.g. `git pull origin master`). - -And run the installation again: -```bash -pip install . -sudo intelmqsetup -``` -For editable installations (development only), run `pip install -e .` instead. - -## Upgrade configuration and check the installation - -Go through [NEWS](../NEWS.md) and apply necessary adaptions to your setup. -If you have adapted IntelMQ's code, also read the [CHANGELOG](../CHANGELOG.md). - -Check your installation and configuration to detect any problems: -```bash -intelmqctl upgrade-config -intelmqctl check -``` - -## Start IntelMQ - -``` -> intelmqctl start -``` diff --git a/docs/guides/images/Logo_Intel_MQ.svg b/docs/guides/images/Logo_Intel_MQ.svg deleted file mode 100644 index ecc7b7c81..000000000 --- a/docs/guides/images/Logo_Intel_MQ.svg +++ /dev/null @@ -1,504 +0,0 @@ - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/guides/intelmqctl.md b/docs/guides/intelmqctl.md deleted file mode 100644 index 8cd36d7d7..000000000 --- a/docs/guides/intelmqctl.md +++ /dev/null @@ -1,456 +0,0 @@ -# intelmqctl documentation - -**Table of Contents:** -- [Introduction](#introduction) -- [Output type](#output-type) -- [Manage individual bots](#manage-individual-bots) - - [start](#start) - - [stop](#stop) - - [status](#status) - - [restart](#restart) - - [reload](#reload) - - [run](#run) - - [console](#console) - - [message](#message) - - [process](#process) - - [disable](#disable) - - [enable](#enable) -- [Manage the botnet](#manage-the-botnet) - - [start](#start) - - [stop](#stop) - - [status](#status) - - [restart](#restart) - - [reload](#reload) - - [enable / disable](#enable-disable) -- [List bots](#list-bots) -- [List queues](#list-queues) -- [Log](#log) -- [Check](#check) - - [Orphaned queues](#orphaned-queues) -- [Configuration upgrade](#configuration-upgrade) -- [Exit code](#exit-code) -- [Known issues](#known-issues) - -## Introduction - -intelmqctl is the main tool to handle a intelmq installation. -It handles the bots themselves and has some tools to handle the installation. - -## Output type - -intelmqctl can be used as command line tool, as library and as tool by other programs. -If called directly, it will print all output to the console (stderr). -If used as python library, the python types themselves are returned. -The third option is to use machine-readable JSON as output (used by other managing tools). - -## Manage individual bots - -As all init systems, intelmqctl has the methods start, stop, restart, reload and status. - -### start - -This will start the bot with the ID `file-output`. A file with it's PID will be created in `/opt/intelmq/var/run/[bot-id].pid`. - -```bash -> intelmqctl start file-output -Starting file-output... -file-output is running. -``` - -If the bot is already running, it won't be started again: -```bash -> intelmqctl start file-output -file-output is running. -``` - -### stop - -If the PID file does exist, a SIGINT will be sent to the process. After 0.25s we check if the process is running. If not, the PID file will be removed. - -```bash -> intelmqctl stop file-output -Stopping file-output... -file-output is stopped. -``` - -If there's no running bot, there's nothing to do. -```bash -> intelmqctl stop file-output -file-output was NOT RUNNING. -``` - -If the bot did not stop in 0.25s, intelmqctl will say it's still running: - -```bash -> intelmqctl stop file-output -file-output is still running -``` - -### status - -Checks for the PID file and if the process with the given PID is alive. If the PID file exists, but the process does not exist, it will be removed. - -```bash -> intelmqctl status file-output -file-output is stopped. -> intelmqctl start file-output -Starting file-output... -file-output is running. -> intelmqctl status file-output -file-output is running. -``` - -### restart - -The same as stop and start consecutively. - -```bash -> intelmqctl restart file-output -Stopping file-output... -file-output is stopped. -Starting file-output... -file-output is running. -``` - -### reload - -Sends a SIGHUP to the bot, which will then reload the configuration. - -```bash -> intelmqctl reload file-output -Reloading file-output ... -file-output is running. -``` -If the bot is not running, we can't reload it: -```bash -> intelmqctl reload file-output -file-output was NOT RUNNING. -``` - -### run - -Run a bot directly for debugging purpose. - -If launched with no arguments, the bot will call its init method and start processing messages as usual – but you see everything happens. - -```bash -> intelmqctl run file-output -file-output: RestAPIOutputBot initialized with id file-output and version 3.5.2 as process 12345. -file-output: Bot is starting. -file-output: Loading source pipeline and queue 'file-output-queue'. -file-output: Connected to source queue. -file-output: No destination queues to load. -file-output: Bot initialization completed. -file-output: Waiting for incoming message. -``` - -Should you get lost any time, just use the **--help** after any argument for further explanation. - -```bash -> intelmqctl run file-output --help -``` - -Note that if another instance of the bot is running, only warning will be displayed. - -```bash -> intelmqctl run file-output -Main instance of the bot is running in the background. You may want to launch: intelmqctl stop file-output -``` - -You can set the log level with the `-l` flag, e.g. `-l DEBUG`. For the 'console' subcommand, 'DEBUG' is the default. - -#### console - -If launched with **console** argument, you get a ```pdb``` live console; or ```ipdb``` or ```pudb``` consoles if they were previously installed (I.E. ```pip3 install ipdb --user```). - -```bash -> intelmqctl run file-output console -*** Using console ipdb. Please use 'self' to access to the bot instance properties. *** -ipdb> self. ... -``` - -You may specify the desired console in the next argument. - -```bash -> intelmqctl run file-output console pudb -``` - -#### message - -Operate directly with the input / output pipelines. - -If **get** is the parameter, you see the message that waits in the input (source or internal) queue. If the argument is **pop**, the message gets popped as well. - -```bash -> intelmqctl run file-output message get -file-output: Waiting for a message to get... -{ - "classification.type": "c&c", - "feed.url": "https://example.com", - "raw": "1233", - "source.ip": "1.2.3.4", - "time.observation": "2017-05-17T22:00:33+00:00", - "time.source": "2017-05-17T22:00:32+00:00" -} -``` - -To send directly to the bot's ouput queue, just as it was sent by ```self.send_message()``` in bot's ```process()``` method, use the **send** argument. -In our case of ```file-output```, it has no destionation queue so that nothing happens. - -```bash -> intelmqctl run file-output message send '{"time.observation": "2017-05-17T22:00:33+00:00", "time.source": "2017-05-17T22:00:32+00:00"}' -file-output: Bot has no destination queues. -``` - -Note, if you would like to know possible parameters of the message, put a wrong one – you will be prompted if you want to list all the current bot harmonization. - -#### process - -With no other arguments, bot\'s ```process()``` method will be run one time. - -```bash -> intelmqctl run file-output process -file-output: Bot is starting. -file-output: Bot initialization completed. -file-output: Processing... -file-output: Waiting for incoming message. -file-output: Received message {'raw': '1234'}. -``` - -If run with **--dryrun|-d** flag, the message gets never really popped out from the source or internal pipeline, nor sent to the output pipeline. -Plus, you receive a note about the exact moment the message would get sent, or acknowledged. If the message would be sent to a non-default path, the name of this path is printed on the console. - -```bash -> intelmqctl run file-output process -d -file-output: * Dryrun only, no message will be really sent through. -... -file-output: DRYRUN: Message would be acknowledged now! -``` - -You may trick the bot to process a JSON instead of the Message in its pipeline with **--msg|-m** flag. - -```bash -> intelmqctl run file-output process -m '{"source.ip":"1.2.3.4"}' -file-output: * Message from cli will be used when processing. -... -``` - -If you wish to display the processed message as well, you the **--show-sent|-s** flag. Then, if sent through (either with `--dryrun` or without), the message gets displayed as well. - - -### disable - -Sets the `enabled` flag in the runtime configuration of the bot to `false`. -By default, all bots are enabled. - -Example output: - -```bash -> intelmqctl status file-output -file-output is stopped. -> intelmqctl disable file-output -> intelmqctl status file-output -file-output is disabled. -``` - -### enable - -Sets the `enabled` flag in the runtime configuration of the bot to `true`. - -Example output: - -```bash -> intelmqctl status file-output -file-output is disabled. -> intelmqctl enable file-output -> intelmqctl status file-output -file-output is stopped. -``` - - -## Manage the botnet - -In IntelMQ, the botnet is the set of all currently configured and enabled bots. -All configured bots have their configuration in runtime.conf and their queues in pipeline.conf. -By default, all bots are enabled. To disable a bot set `enabled` to `false`. -Also see [Bots](Bots) and [User-Guide.html#runtime-configuration](User Guide: Runtime Configuration). - -If not bot id is given, the command applies to all bots / the botnet. -All commands except the start action are applied to all bots. -But only enabled bots are started. - -In the examples below, a very minimal botnet is used. - -### start - -The start action applies to all bots which are enabled. - -```bash -> intelmqctl start -Starting abusech-domain-parser... -abusech-domain-parser is running. -Starting abusech-feodo-domains-collector... -abusech-feodo-domains-collector is running. -Starting deduplicator-expert... -deduplicator-expert is running. -file-output is disabled. -Botnet is running. -``` - -As we can > intelmqctl stop -Stopping Botnet... -Stopping abusech-domain-parser... -abusech-domain-parser is stopped. -Stopping abusech-feodo-domains-collector... -abusech-feodo-domains-collector is stopped. -Stopping deduplicator-expert... -deduplicator-expert is stopped. -Stopping file-output... -file-output is stopped. -Botnet is stopped. -see, file-output is disabled and thus has not been started. You can always explicitly start disabled bots. - -### stop -The stop action applies to all bots. Assume that all bots have been running: - -```bash -> intelmqctl stop -Stopping Botnet... -Stopping abusech-domain-parser... -abusech-domain-parser is stopped. -Stopping abusech-feodo-domains-collector... -abusech-feodo-domains-collector is stopped. -Stopping deduplicator-expert... -deduplicator-expert is stopped. -Stopping file-output... -file-output is stopped. -Botnet is stopped. -``` - -### status - -With this command we can see the status of all configured bots. Here, the botnet was started beforehand: -```bash -> intelmqctl status -abusech-domain-parser is running. -abusech-feodo-domains-collector is running. -deduplicator-expert is running. -file-output is disabled. -``` -And if the disabled bot has also been started: -```bash -> intelmqctl status -abusech-domain-parser is running. -abusech-feodo-domains-collector is running. -deduplicator-expert is running. -file-output is running. -``` - -If the botnet is stopped, the output looks like this: -```bash -> intelmqctl status -abusech-domain-parser is stopped. -abusech-feodo-domains-collector is stopped. -deduplicator-expert is stopped. -file-output is disabled. -``` - -### restart -The same as start and stop consecutively. - -### reload -The same as reload of every bot. - -### enable / disable -The sub commands `enable` and `disable` set the corresponding flags in runtime.conf. - -```bash -> intelmqctl status -file-output is stopped. -malware-domain-list-collector is stopped. -malware-domain-list-parser is stopped. -> intelmqctl disable file-output -> intelmqctl status -file-output is disabled. -malware-domain-list-collector is stopped. -malware-domain-list-parser is stopped. -> intelmqctl enable file-output -> intelmqctl status -file-output is stopped. -malware-domain-list-collector is stopped. -malware-domain-list-parser is stopped. -``` - -## List bots -`intelmqctl list bots` does list all configured bots and their description. - -## List queues -`intelmqctl list queues` shows all queues which are currently in use according to the configuration and how much events are in it: - -```bash -> intelmqctl list queues -abusech-domain-parser-queue - 0 -abusech-domain-parser-queue-internal - 0 -deduplicator-expert-queue - 0 -deduplicator-expert-queue-internal - 0 -file-output-queue - 234 -file-output-queue-internal - 0 -``` - -Use the `-q` or `--quiet` flag to only show non-empty queues: - -```bash -> intelmqctl list queues -q -file-output-queue - 234 -``` - -The `--sum` or `--count` flag will show the sum of events on all queues: -```bash -> intelmqctl list queues --sum -42 -``` - - -## Log - -intelmqctl can show the last log lines for a bot, filtered by the log level. - -See the help page for more information. - -## Check -This command will do various sanity checks on the installation and especially the configuration. - -### Orphaned Queues - -The `intelmqctl check` tool can search for orphaned queues. "Orphaned queues" are queues that have been used in the past and are no longer in use. For example you had a bot which you removed or renamed afterwards, but there were still messages in it's source queue. The source queue won't be renamed automatically and is now disconnected. As this queue is no longer configured, it won't show up in the list of IntelMQ's queues too. In case you are using redis as message broker, you can use the `redis-cli` tool to examine or remove these queues: - -```bash -redis-cli -n 2 -keys * # lists all existing non-empty queues -llen [queue-name] # shows the length of the queue [queue-name] -lindex [queue-name] [index] # show the [index]'s message of the queue [queue-name] -del [queue-name] # remove the queue [queue-name] -``` - -To ignore certain queues in this check, you can set the parameter `intelmqctl_check_orphaned_queues_ignore` in the *defaults* configuration file. For example: - -```json - "intelmqctl_check_orphaned_queues_ignore": ["Taichung-Parser"], -``` - -## Configuration upgrade -The `intelmqctl upgrade-config` function upgrade, upgrade the configuration from previous versions to the current one. -It keeps track of previously installed versions and the result of all "upgrade functions" in the "state file", locate in the `$var_state_path/state.json` (`/opt/intelmq/var/lib/state.json` or `/var/lib/intelmq/state.json`). - -This function has been introduced in version 2.0.1. - -It makes backups itself for all changed files before every run. Backups are overridden if they already exists. So make sure to always have a backup of your configuration just in case. - -## Exit code -In case of errors, unsuccessful operations, the exit code is higher than 0. -For example, when running `intelmqctl start` and one enabled bot is not running, the exit code is 1. -The same is valid for e.g. `intelmqctl status`, which can be used for monitoring, and all other operations. - -## Known issues - -The currently implemented process managing using PID files is very erroneous. diff --git a/docs/index.rst b/docs/index.rst index e1a56c670..cfaf2073d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,10 +1,27 @@ -Welcome to intelmq's documentation! -=================================== +IntelMQ +------- -.. toctree:: - :maxdepth: 1 - guides/Ecosystem.md +.. figure:: _static/Logo_Intel_MQ.png + :alt: IntelMQ + +|Build Status| |codecov.io| |CII Badge| + +.. |Build Status| image:: https://travis-ci.org/certtools/intelmq.svg?branch=develop + :target: https://travis-ci.org/certtools/intelmq +.. |codecov.io| image:: https://codecov.io/github/certtools/intelmq/coverage.svg?branch=develop + :target: https://codecov.io/github/certtools/intelmq?branch=master +.. |CII Badge| image:: https://bestpractices.coreinfrastructure.org/projects/4186/badge + :target: https://bestpractices.coreinfrastructure.org/projects/4186/ + +**IntelMQ** is a solution for IT security teams (CERTs & CSIRTs, SOCs abuse +departments, etc.) for collecting and processing security feeds (such as +log files) using a message queuing protocol. It's a community driven +initiative called **IHAP** (Incident Handling Automation Project) which +was conceptually designed by European CERTs/CSIRTs during several +InfoSec events. Its main goal is to give to incident responders an easy +way to collect & process threat intelligence thus improving the incident +handling processes of CERTs. User guide ========== @@ -13,17 +30,18 @@ User guide :glob: :maxdepth: 1 - guides/README.md - guides/INSTALL.md - guides/UPGRADING.md - guides/User-Guide.md - guides/intelmqctl.md - guides/Bots.md - guides/Feeds.md - guides/ELK-Stack.md - guides/FAQ.md - guides/MISP-Integrations.md - guides/n6-integrations.md + user/introduction + user/installation + user/upgrade + user/configuration-management + user/bots + user/intelmqctl + user/feeds + user/ecosystem + user/ELK-Stack + user/FAQ + user/MISP-Integrations + user/n6-integrations Getting involved @@ -32,14 +50,25 @@ Getting involved .. toctree:: :maxdepth: 1 - guides/Developers-Guide.md - guides/Data-Harmonization.md - guides/Harmonization-fields.md - guides/Release.md - guides/IntelMQ-3.0-Architecture.md - guides/Feeds-whishlist.md + dev/guide + dev/data-harmonization + dev/harmonization-fields.md + dev/release-procedure + dev/feeds-wishlist + dev/IntelMQ-3.0-Architecture.md + +Licence +======= + +This software is licensed under GNU Affero General Public License version 3 + +Funded by +========= +This project was partially funded by the CEF framework +.. figure:: _static/cef_logo.png + :alt: Co-financed by the Connecting Europe Facility of the European Union Indices and tables ================== diff --git a/docs/guides/ELK-Stack.md b/docs/user/ELK-Stack.rst similarity index 70% rename from docs/guides/ELK-Stack.md rename to docs/user/ELK-Stack.rst index a594f14b3..32fead2b3 100644 --- a/docs/guides/ELK-Stack.md +++ b/docs/user/ELK-Stack.rst @@ -1,100 +1,112 @@ -# ELK Stack +ELK Stack +========= If you wish to run IntelMQ with ELK (Elasticsearch, Logstash, Kibana) it is entirely possible. This guide assumes the reader is familiar with basic configuration of ELK and does not aim to cover using ELK in general. It is based on the version 6.8.0 (ELK is a fast moving train therefore things might change). Assuming you have IntelMQ (and Redis) installation in place, lets dive in. -## Configuring IntelMQ for Logstash +Configuring IntelMQ for Logstash +-------------------------------- In order to pass IntelMQ events to Logstash we will utilize already installed Redis. Add a new Redis Output Bot to your pipeline. As the minimum fill in the following parameters: `bot-id`, `redis_server_ip` (can be hostname), `redis_server_port`, `redis_password` (if required, else set to empty!), `redis_queue` (name for the queue). Redis IP, port and password can be taken from `defaults.conf`. It is recommended to use a different `redis_db` parameter than used by the IntelMQ (specified in `defaults.conf` as `source_pipeline_db`, `destination_pipeline_db` and `statistics_database`). Example values: -```json -bot-id: logstash-output -redis_server_ip: 10.10.10.10 -redis_server_port: 6379 -redis_db: 4 -redis_queue: logstash-queue -``` -#### Notes +.. code-block:: json + + bot-id: logstash-output + redis_server_ip: 10.10.10.10 + redis_server_port: 6379 + redis_db: 4 + redis_queue: logstash-queue + +**Notes** * Unfortunately you will not be able to monitor this redis queue via IntelMQ Manager. -## Configuring Logstash +Configuring Logstash +-------------------- Logstash defines pipeline as well. In the pipeline configuration of Logstash you need to specify where it should look for IntelMQ events, what to do with them and where to pass them. -### Input +Input +^^^^^ This part describes how to receive data from Redis queue. See the example configuration and comments below: -``` -input { - redis { - host => "10.10.10.10" - port => 6379 - db => 4 - data_type => "list" - key => "logstash-queue" - } -} -``` +.. code-block:: + + input { + redis { + host => "10.10.10.10" + port => 6379 + db => 4 + data_type => "list" + key => "logstash-queue" + } + } + * `host` - same as redis_server_ip from the Redis Output Bot * `port` - the redis_server_port from the Redis Output Bot * `db` - the redis_db parameter from the Redis Output Bot * `data_type` - set to `list` * `key` - same as redis_queue from the Redis Output Bot -#### Notes +**Notes** * You can also use syntax like this: `host => "${REDIS_HOST:10.10.10.10}"`\ The value will be taken from environment variable `$REDIS_HOST`. If the environment variable is not defined then the default value of `10.10.10.10` will be used instead. -### Filter (optional) +Filter (optional) +^^^^^^^^^^^^^^^^^ Before passing the data to the database you can apply certain changes. This is done with filters. See an example: -``` -filter { - mutate { - lowercase => ["source.geolocation.city", "classification.identifier"] - remove_field => ["__type", "@version"] - } - date { - match => ["time.observation", "ISO8601"] - } -} -``` - -#### Notes +.. code-block:: + + filter { + mutate { + lowercase => ["source.geolocation.city", "classification.identifier"] + remove_field => ["__type", "@version"] + } + date { + match => ["time.observation", "ISO8601"] + } + } + +**Notes** * It is not recommended to apply any modifications to the data (within the `mutate` key) outside of the IntelMQ. All necessary modifications should be done only by appropriate IntelMQ bots. This example only demonstrates the possibility. * It is recommended to use the `date` filter: generally we have two timestamp fields - `time.source` (provided by the feed source this can be understood as when the event happend; however it is not always present) and `time.observation` (when IntelMQ collected this event). Logstash also adds another field `@timestamp` with time of processing by Logstash. While it can be useful for debugging, I recommend to set the `@timestamp` to the same value as `time.observation`. -### Output +Output +^^^^^^ The pipeline also needs output, where we define our database (Elasticsearch). The simplest way of doing so is defining an output like this: -``` -output { - elasticsearch { - hosts => ["http://10.10.10.11:9200", "http://10.10.10.12:9200"] - index => "intelmq-%{+YYYY.MM}" - } -} -``` + +.. code-block:: + + output { + elasticsearch { + hosts => ["http://10.10.10.11:9200", "http://10.10.10.12:9200"] + index => "intelmq-%{+YYYY.MM}" + } + } + * `hosts` - Elasticsearch host (or more) with the correct port (9200 by default) * `index` - name of the index where to insert data -#### Notes +**Notes** + * Authors experience, hardware equipment and the amount of events collected led to having a separate index for each month. This might not necessarily suit your needs, but is a suggested option. * By default the ELK stack uses unsecure HTTP. It is possible to setup Security for secure connections and basic user management. This is possible with the Basic (free) licence since versions 6.8.0 and 7.1.0. -## Configuring Elasticsearch +Configuring Elasticsearch +------------------------- -Configuring Elasticsearch is entirely up to you and should be consulted with the [official documentation](https://www.elastic.co/guide/en/elasticsearch/reference/index.html). What you will most likely need is something called [index template](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-templates.html) mappings. IntelMQ provides a tool for generating such mappings. See [ElasticMapper Tool](../contrib/elasticsearch/README.md). +Configuring Elasticsearch is entirely up to you and should be consulted with the `official documentation `_. What you will most likely need is something called `index template `_ mappings. IntelMQ provides a tool for generating such mappings. See `ElasticMapper Tool `_. -#### Notes +**Notes** * Default installation of Elasticsearch database allows anyone with cURL and connection capability administrative access to the database. Make sure you secure your toys! diff --git a/docs/guides/FAQ.md b/docs/user/FAQ.rst similarity index 78% rename from docs/guides/FAQ.md rename to docs/user/FAQ.rst index eeee1c35a..308f81d3c 100644 --- a/docs/guides/FAQ.md +++ b/docs/user/FAQ.rst @@ -1,21 +1,16 @@ -# Frequently asked questions +Frequently asked questions +========================== -**Table of Contents:** +.. contents:: -- [Send IntelMQ events to Splunk](#send-intelmq-events-to-splunk) -- [Permission denied when using redis unix socket](#permission-denied-when-using-redis-unix-socket) -- [Why is the time invalid?](#why-is-the-time-invalid) -- [How can I improve the speed?](#how-can-i-improve-the-speed) -- [My bot(s) died on startup with no errors logged](#my-bots-died-on-startup-with-no-errors-logged) -- [Orphaned Queues](#orphaned-queues) -- [Multithreading is not available for this bot](#multithreading-is-not-available-for-this-bot) - -## Send IntelMQ events to Splunk +Send IntelMQ events to Splunk +------------------------------------------------------------------- 1. Go to Splunk and configure in order to be able to receive logs(intelmq events) to a tcp port 2. Use tcp output bot and configure accordingly to the Splunk configuration that you applied. -## Permission denied when using redis unix socket +Permission denied when using redis unix socket +------------------------------------------------------------------- If you get an error like this: @@ -27,43 +22,51 @@ make sure the permissions for the socket are set accordingly in `/etc/redis/redi unixsocketperm 777 -## Why is the time invalid? +Why is the time invalid? +------------------------------------------------------------------- If you wonder why you are getting errors like this: -```python -intelmq.lib.exceptions.InvalidValue: invalid value '2017-03-06T07:36:29' () for key 'time.source' -``` + +.. code-block:: python + + intelmq.lib.exceptions.InvalidValue: invalid value '2017-03-06T07:36:29' () for key 'time.source' + IntelMQ requires time zone information for all timestamps. Without a time zone, the time is ambiguous and therefore rejected. -## How can I improve the speed? +How can I improve the speed? +------------------------------------------------------------------- In most cases the bottlenecks are look-up experts. In these cases you can easily use the integrated load balancing features. -### Multithreading +Multithreading +^^^^^^^^^^^^^^ When using the AMQP broker, you can make use of Multi-threading. See the [Uer-Guide, section Multithreading](User-Guide.html#multithreading-beta). -### "Classic" load-balancing (Multiprocessing) +"Classic" load-balancing (Multiprocessing) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Before Multithreading was available in IntelMQ, and in case you use Redis as broker, the only way to do load balancing involves more work. Create multiple instances of the same bot and connect them all to the same source and destination bots. Then set the parameter `load_balance` to `true` for the bot which sends the messages to the duplicated bot. Then, the bot sends messages to only one of the destination queues and not to all of them. True Multi*processing* is not available in IntelMQ. See also this discussion on a possible enhanced load balancing: https://github.com/certtools/intelmq/issues/186 -### Other options +Other options +^^^^^^^^^^^^^ For any bottleneck based on (online) lookups, optimize the lookup itself and if possible use local databases. It is also possible to use multiple servers to spread the workload. To get the messages from one system to the other you can either directly connect to the other's pipeline or use a fast exchange mechanism such as the TCP Collector/Output (make sure to secure the network by other means). -### Removing raw data for higher performance and less space usage +Removing raw data for higher performance and less space usage +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If you do not need the raw data, you can safely remove it. For events (after parsers), it keeps the original data, eg. a line of a CSV file. In reports it keeps the actual data to be parsed, so don't delete the raw field in Reports - between collectors and parsers. The raw data consumes about 50% - 30% of the messages' size. The size of course depends on how many additional data you add to it and how much data the report includes. Dropping it, will improve the speed as less data needs to be transferred and processed at each step. -#### In a bot +**In a bot** You can do this for example by using the *Field Reducer Expert*. The configuration could be: @@ -76,7 +79,7 @@ Other solutions are the *Modify* bot and the *Sieve* bot. The last one is a good remove raw ``` -#### In the database +**In the database** In case you store data in the database and you want to keep its size small, you can (periodically) delete the raw data there. @@ -88,15 +91,18 @@ UPDATE events SET raw = NULL WHERE "time.source" < '2018-07-01'; If the database is big, make sure only update small parts of the database by using an appropriate `WHERE` clause. If you do not see any negative performance impact, you can increase the size of the chunks, otherwise the events in the output bot may queue up. The `id` column can also be used instead of the source's time. -## My bot(s) died on startup with no errors logged +My bot(s) died on startup with no errors logged +------------------------------------------------------------------- Rather than starting your bot(s) with `intelmqctl start`, try `intelmqctl run [bot]`. This will provide valuable debug output you might not otherwise see, pointing to issues like system configuration errors. -## Orphaned Queues +Orphaned Queues +------------------------------------------------------------------- This section has been moved to the [intelmqctl documentation](intelmctl.html#orphaned-queues) -## Multithreading is not available for this bot +Multithreading is not available for this bot +------------------------------------------------------------------- Multithreading is not available for some bots and AMQP broker is necessary. Possible reasons why a certain bot or a setup does not support Multithreading include: diff --git a/docs/guides/MISP-Integrations.md b/docs/user/MISP-Integrations.rst similarity index 67% rename from docs/guides/MISP-Integrations.md rename to docs/user/MISP-Integrations.rst index e265f4507..78675868c 100644 --- a/docs/guides/MISP-Integrations.md +++ b/docs/user/MISP-Integrations.rst @@ -1,26 +1,31 @@ -# MISP integrations in IntelMQ +MISP integrations in IntelMQ +============================ -## MISP API Collector +MISP API Collector +------------------------------- The MISP API Collector fetches data from MISP via the MISP API. Look at the Bots' documentation for more information. -## Coming soon: MISP Expert +Coming soon: MISP Expert +------------------------------- The MISP Expert will search MISP by API for attributes/events matching data of the event. Look at the Bots' documentation for more information. -## MISP Feed Output +MISP Feed Output +------------------------------- This bot creates a complete "MISP feed" ready to be configured in MISP as incoming data source. Look at the Bots' documentation for more information. -## MISP API Output +MISP API Output +------------------------------- Can be used to directly create MISP events in a MISP instance. diff --git a/docs/guides/Bots.md b/docs/user/bots.rst similarity index 75% rename from docs/guides/Bots.md rename to docs/user/bots.rst index b8a508072..31871d68e 100644 --- a/docs/guides/Bots.md +++ b/docs/user/bots.rst @@ -1,110 +1,10 @@ -# Bots Documentation - -**Table of Contents:** -- [Bots Documentation](#bots-documentation) -- [General remarks](#general-remarks) -- [Initialization parameters](#initialization-parameters) -- [Common parameters](#common-parameters) -- [Collectors](#collectors) - - [API](#api) - - [Generic URL Fetcher](#generic-url-fetcher) - - [Generic URL Stream Fetcher](#generic-url-stream-fetcher) - - [Generic Mail URL Fetcher](#generic-mail-url-fetcher) - - [Generic Mail Attachment Fetcher](#generic-mail-attachment-fetcher) - - [Generic Mail Body Fetcher](#generic-mail-body-fetcher) - - [GitHub API](#github-api) - - [Fileinput](#fileinput) - - [MISP Generic](#misp-generic) - - [Request Tracker](#request-tracker) - - [Rsync](#rsync) - - [Shodan Stream](#shodan-stream) - - [TCP](#tcp) - - [XMPP collector](#xmpp-collector) - - [Alien Vault OTX](#alien-vault-otx) - - [Blueliv Crimeserver](#blueliv-crimeserver) - - [Calidog Certstream](#calidog-certstream) - - [McAfee openDXL](#mcafee-opendxl) - - [Microsoft Azure](#microsoft-azure) - - [Microsoft Interflow](#microsoft-interflow) - - [Additional functionalities](#additional-functionalities) - - [Stomp](#stomp) - - [Twitter](#twitter) -- [Parsers](#parsers) - - [Not complete](#not-complete) - - [Generic CSV Parser](#generic-csv-parser) - - [Calidog Certstream](#calidog-certstream) - - [Cymru CAP Program](#cymru-cap-program) - - [Cymru Full Bogons](#cymru-full-bogons) - - [HTML Table Parser](#html-table-parser) - - [Key-Value Parser](#key-value-parser) - - [Twitter](#twitter) - - [Shadowserver](#shadowserver) - - [Shodan](#shodan) -- [Experts](#experts) - - [Abusix](#abusix) - - [ASN Lookup](#asn-lookup) - - [CSV Converter](#csv-converter) - - [Copy Extra](#copy-extra) - - [Cymru Whois](#cymru-whois) - - [Deduplicator](#deduplicator) - - [Domain Suffix](#domain-suffix) - - [Rule processing](#rule-processing) - - [DO-Portal](#do-portal) - - [Field Reducer Bot](#field-reducer-bot) - - [Whitelist](#whitelist) - - [Blacklist](#blacklist) - - [Filter](#filter) - - [Format Field](#format-field) - - [Generic DB Lookup](#generic-db-lookup) - - [Gethostbyname](#gethostbyname) - - [IDEA](#idea) - - [MaxMind GeoIP](#maxmind-geoip) - - [MISP](#misp) - - [Modify](#modify) - - [Configuration File](#configuration-file) - - [Actions](#actions) - - [Examples](#examples) - - [Types](#types) - - [McAfee Active Response Hash lookup](#mcafee-active-response-hash-lookup) - - [McAfee Active Response IP lookup](#mcafee-active-response-ip-lookup) - - [McAfee Active Response URL lookup](#mcafee-active-response-url-lookup) - - [National CERT contact lookup by CERT.AT](#national-cert-contact-lookup-by-certat) - - [Recorded Future IP Risk](#recorded-future-ip-risk) - - [Reverse DNS](#reverse-dns) - - [RFC1918](#rfc1918) - - [RipeNCC Abuse Contact](#ripencc-abuse-contact) - - [Sieve](#sieve) - - [Taxonomy](#taxonomy) - - [Threshold](#threshold) - - [Tor Nodes](#tor-nodes) - - [Url2FQDN](#url2fqdn) - - [Wait](#wait) -- [Outputs](#outputs) - - [AMQP Topic](#amqp-topic) - - [Blackhole](#blackhole) - - [Elasticsearch](#elasticsearch) - - [File](#file) - - [Filename formatting](#filename-formatting) - - [Files](#files) - - [McAfee Enterprise Security Manager](#mcafee-enterprise-security-manager) - - [MISP Feed](#misp-feed) - - [MISP API](#misp-api) - - [MongoDB](#mongodb) - - [Installation Requirements](#installation-requirements) - - [Redis](#redis) - - [Request Tracker](#request-tracker) - - [REST API](#rest-api) - - [SMTP Output Bot](#smtp-output-bot) - - [SQL](#sql) - - [PostgreSQL](#postgresql) - - [SQLite](#sqlite) - - [TCP](#tcp) - - [Touch](#touch) - - [UDP](#tcp) - - [XMPP](#xmpp) - - -## General remarks +Bots +==== + +.. contents:: + +General remarks +--------------- By default all of the bots are started when you start the whole botnet, however there is a possibility to *disable* a bot. This means that the bot will not start every time you start the botnet, but you can start @@ -117,38 +17,41 @@ There are two different types of parameters: The initialization parameters are n The initialization parameters are in the first level, the runtime parameters live in the `parameters` sub-dictionary: -```json -{ - "bot-id": { - "parameters": { - runtime parameters... - }, - initialization parameters... - } -} -``` +.. code-block:: json + + { + "bot-id": { + "parameters": { + runtime parameters... + }, + initialization parameters... + } + } + For example: -```json -{ - "abusech-feodo-domains-collector": { - "parameters": { - "provider": "Abuse.ch", - "name": "Abuse.ch Feodo Domains", - "http_url": "http://example.org/feodo-domains.txt" - }, - "name": "Generic URL Fetcher", - "group": "Collector", - "module": "intelmq.bots.collectors.http.collector_http", - "description": "collect report messages from remote hosts using http protocol", - "enabled": true, - "run_mode": "scheduled" - } -} -``` + +.. code-block:: json + + { + "abusech-feodo-domains-collector": { + "parameters": { + "provider": "Abuse.ch", + "name": "Abuse.ch Feodo Domains", + "http_url": "http://example.org/feodo-domains.txt" + }, + "name": "Generic URL Fetcher", + "group": "Collector", + "module": "intelmq.bots.collectors.http.collector_http", + "description": "collect report messages from remote hosts using http protocol", + "enabled": true, + "run_mode": "scheduled" + } + } This configuration resides in the file `runtime.conf` in your IntelMQ's configuration directory for each configured bot. -## Initialization parameters +Initialization parameters +------------------------- * `name` and `description`: The name and description of the bot as can be found in BOTS-file, not used by the bot itself. * `group`: Can be `"Collector"`, `"Parser"`, `"Expert"` or `"Output"`. Only used for visualization by other tools. @@ -156,7 +59,10 @@ This configuration resides in the file `runtime.conf` in your IntelMQ's configur * `enabled`: If the parameter is set to `true` (which is NOT the default value if it is missing as a protection) the bot will start when the botnet is started (`intelmqctl start`). If the parameter was set to `false`, the Bot will not be started by `intelmqctl start`, however you can run the bot independently using `intelmqctl start `. Check the [User-Guide](./User-Guide.md) for more details. * `run_mode`: There are two run modes, "continuous" (default run mode) or "scheduled". In the first case, the bot will be running forever until stopped or exits because of errors (depending on configuration). In the latter case, the bot will stop after one successful run. This is especially useful when scheduling bots via cron or systemd. Default is `continuous`. Check the [User-Guide](./User-Guide.md) for more details. -## Common parameters +.. _common-parameters: + +Common parameters +----------------- **Feed parameters**: Common configuration options for all collectors. @@ -189,22 +95,25 @@ This configuration resides in the file `runtime.conf` in your IntelMQ's configur * `redis_cache_ttl`: TTL used for caching. * `redis_cache_password`: Optional password for the Redis database (default: none). -## Collectors +Collectors +---------- Multihreading is disabled for all Collectors, as this would lead to duplicated data. -### AMQP +AMQP +^^^^ -Requires the [`pika` python library](https://pypi.org/project/pika/), minimum version 1.0.0. +Requires the `pika python library `_, minimum version 1.0.0. + +**Information** -#### Information: * `name`: intelmq.bots.collectors.amqp.collector_amqp * `lookup`: yes * `public`: yes * `cache (redis db)`: none * `description`: collect data from (remote) AMQP servers, for both IntelMQ as well as external data -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * `connection_attempts`: The number of connection attempts to defined server, defaults to 3 @@ -220,18 +129,20 @@ Requires the [`pika` python library](https://pypi.org/project/pika/), minimum ve Currently only fetching from a queue is supported can be extended in the future. Messages will be acknowledge at AMQP after it is sent to the pipeline. -* * * -### API -#### Information: +API +^^^ + +**Information** + * `name:` intelmq.bots.collectors.api.collector * `lookup:` yes * `public:` yes * `cache (redis db):` none * `description:` collect report messages from an HTTP REST API -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * `port`: Optional, integer. Default: 5000. The local port, the API will be available at. @@ -239,27 +150,27 @@ Currently only fetching from a queue is supported can be extended in the future. The API is available at `/intelmq/push`. The `tornado` library is required. -* * * -### Generic URL Fetcher +Generic URL Fetcher +^^^^^^^^^^^^^^^^^^^ + +**Information** -#### Information: * `name:` intelmq.bots.collectors.http.collector_http * `lookup:` yes * `public:` yes * `cache (redis db):` none * `description:` collect report messages from remote hosts using HTTP protocol -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * **HTTP parameters** (see above) * `extract_files`: Optional, boolean or list of strings. If it is true, the retrieved (compressed) file or archived will be uncompressed/unpacked and the files are extracted. If the parameter is a list for strings, only the files matching the filenames are extracted. Extraction handles gziped files and both compressed and uncompressed tar-archives as well as zip archives. * `http_url`: location of information resource (e.g. https://feodotracker.abuse.ch/blocklist/?download=domainblocklist) -* `http_url_formatting`: (`bool|JSON`, default: `false`) If `true`, `{time[format]}` will be replaced by the current time in local timezone formatted by the given format. E.g. if the URL is `http://localhost/{time[%Y]}`, then the resulting URL is `http://localhost/2019` for the year 2019. (Python's [Format Specification Mini-Language](https://docs.python.org/3/library/string.html#formatspec) is used for this.) -You may use a `JSON` specifying [time-delta](https://docs.python.org/3/library/datetime.html#datetime.timedelta) parameters to shift the current time accordingly. For example use `{"days": -1}` for the yesterday's date; the URL `http://localhost/{time[%Y-%m-%d]}` will get translated to "http://localhost/2018-12-31" for the 1st Jan of 2019. +* `http_url_formatting`: (`bool|JSON`, default: `false`) If `true`, `{time[format]}` will be replaced by the current time in local timezone formatted by the given format. E.g. if the URL is `http://localhost/{time[%Y]}`, then the resulting URL is `http://localhost/2019` for the year 2019. (Python's `Format Specification Mini-Language `_ is used for this.). You may use a `JSON` specifying `time-delta `_ parameters to shift the current time accordingly. For example use `{"days": -1}` for the yesterday's date; the URL `http://localhost/{time[%Y-%m-%d]}` will get translated to "http://localhost/2018-12-31" for the 1st Jan of 2019. * `verify_pgp_signatures`: `bool`, defaults to `false`. If `true`, signature file is downloaded and report file is checked. On error (missing signature, mismatch, ...), the error is logged and the report is not processed. Public key has to be imported in local keyring. This requires the `python-gnupg` library. * `signature_url`: Location of signature file for downloaded content. For path `http://localhost/data/latest.json` this may be for example `http://localhost/data/latest.asc`. * `signature_url_formatting`: (`bool|JSON`, default: `false`) The same as `http_url_formatting`, only for the signature file. @@ -269,25 +180,24 @@ Zipped files are automatically extracted if detected. For extracted files, every extracted file is sent in its own report. Every report has a field named `extra.file_name` with the file name in the archive the content was extracted from. -#### HTTP Response status code checks +**HTTP Response status code checks** If the HTTP response' status code is not 2xx, this is treated as error. In Debug logging level, the request's and response's headers and body are logged for further inspection. -* * * +Generic URL Stream Fetcher +^^^^^^^^^^^^^^^^^^^^^^^^^^ -### Generic URL Stream Fetcher +**Information** - -#### Information: * `name:` intelmq.bots.collectors.http.collector_http_stream * `lookup:` yes * `public:` yes * `cache (redis db):` none * `description:` Opens a streaming connection to the URL and sends the received lines. -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * **HTTP parameters** (see above) @@ -297,19 +207,18 @@ If the stream is interrupted, the connection will be aborted using the timeout p The parameter `http_timeout_max_tries` is of no use in this collector. -* * * - -### Generic Mail URL Fetcher +Generic Mail URL Fetcher +^^^^^^^^^^^^^^^^^^^^^^^^ +**Information** -#### Information: * `name:` intelmq.bots.collectors.mail.collector_mail_url * `lookup:` yes * `public:` yes * `cache (redis db):` none * `description:` collect messages from mailboxes, extract URLs from that messages and download the report messages from the URLs. -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * **HTTP parameters** (see above) @@ -326,13 +235,14 @@ The parameter `http_timeout_max_tries` is of no use in this collector. * `ssl_ca_certificate`: Optional string of path to trusted CA certificate. Applies only to IMAP connections, not HTTP. If the provided certificate is not found, the IMAP connection will fail on handshake. By default, no certificate is used. The resulting reports contains the following special fields: - * `feed.url`: The URL the data was downloaded from - * `extra.email_subject`: The subject of the email - * `extra.email_from`: The email's from address - * `extra.email_message_id`: The email's message ID - * `extra.file_name`: The file name of the downloaded file (extracted from the HTTP Response Headers if possible). -##### Chunking +* `feed.url`: The URL the data was downloaded from +* `extra.email_subject`: The subject of the email +* `extra.email_from`: The email's from address +* `extra.email_message_id`: The email's message ID +* `extra.file_name`: The file name of the downloaded file (extracted from the HTTP Response Headers if possible). + +**Chunking** For line-based inputs the bot can split up large reports into smaller chunks. @@ -346,19 +256,19 @@ for each chunk that is passed on to a parser bot. Specifically, to configure a large file input to work around Redis' size limitation set `chunk_size` to something like `384000000`, i.e., ~384 MB. -* * * -### Generic Mail Attachment Fetcher +Generic Mail Attachment Fetcher +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Information** -#### Information: * `name:` intelmq.bots.collectors.mail.collector_mail_attach * `lookup:` yes * `public:` yes * `cache (redis db):` none * `description:` collect messages from mailboxes, download the report messages from the attachments. -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * `extract_files`: Optional, boolean or list of strings. See documentation of the Generic URL Fetcher for more details. @@ -376,23 +286,25 @@ limitation set `chunk_size` to something like `384000000`, i.e., ~384 MB. * `ssl_ca_certificate`: Optional string of path to trusted CA certificate. Applies only to IMAP connections, not HTTP. If the provided certificate is not found, the IMAP connection will fail on handshake. By default, no certificate is used. The resulting reports contains the following special fields: - * `extra.email_subject`: The subject of the email - * `extra.email_from`: The email's from address - * `extra.email_message_id`: The email's message ID - * `extra.file_name`: The file name of the attachment or the file name in the attached archive if attachment is to uncompress. -* * * -### Generic Mail Body Fetcher +* `extra.email_subject`: The subject of the email +* `extra.email_from`: The email's from address +* `extra.email_message_id`: The email's message ID +* `extra.file_name`: The file name of the attachment or the file name in the attached archive if attachment is to uncompress. + +Generic Mail Body Fetcher +^^^^^^^^^^^^^^^^^^^^^^^^^ + +**Information** -#### Information: * `name:` intelmq.bots.collectors.mail.collector_mail_body * `lookup:` yes * `public:` yes * `cache (redis db):` none * `description:` collect messages from mailboxes, forwards the bodies as reports. Each non-empty body with the matching content type is sent as individual report. -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * `mail_host`: FQDN or IP of mail server @@ -411,16 +323,16 @@ The resulting reports contains the following special fields: - `string`, e.g. `'plain'` The resulting reports contains the following special fields: - * `extra.email_subject`: The subject of the email - * `extra.email_from`: The email's from address - * `extra.email_message_id`: The email's message ID -* * * +* `extra.email_subject`: The subject of the email +* `extra.email_from`: The email's from address +* `extra.email_message_id`: The email's message ID -### Github API +Github API +^^^^^^^^^^ +**Information** -#### Information: * `name:` intelmq.bots.collectors.github_api.collector_github_contents_api * `lookup:` yes * `public:` yes @@ -428,7 +340,7 @@ The resulting reports contains the following special fields: * `description:` Collects files matched by regex from GitHub repository via the GitHub API. Optionally with GitHub credentials, which are used as the Basic HTTP authentication. -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * `basic_auth_username:` GitHub account username (optional) @@ -437,7 +349,7 @@ The resulting reports contains the following special fields: * `regex:` Valid regex of target files within the repository (defaults to `.*.json`) * `extra_fields:` Comma-separated list of extra fields from [GitHub contents API](https://developer.github.com/v3/repos/contents/) -#### Workflow +**Workflow** The optional authentication parameters provide a high limit of the GitHub API requests. With the git hub user authentication, the requests are rate limited to 5000 per hour, otherwise to 60 requests per hour. @@ -447,11 +359,11 @@ Additionally it adds extra file metadata defined by the `extra_fields`. The bot always sets the url, from which downloaded the file, as `feed.url`. -* * * +Fileinput +^^^^^^^^^ -### Fileinput +**Information** -#### Information: * `name:` intelmq.bots.collectors.file.collector_file * `lookup:` yes * `public:` yes @@ -460,7 +372,7 @@ The bot always sets the url, from which downloaded the file, as `feed.url`. This is handy for testing purposes, or when you need to react to spontaneous events. In combination with the Generic CSV Parser this should work great. -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * `path`: path to file @@ -468,10 +380,11 @@ The bot always sets the url, from which downloaded the file, as `feed.url`. * `delete_file`: whether to delete the file after reading (default: `false`) The resulting reports contains the following special fields: - * `feed.url`: The URI using the `file://` scheme and localhost, with the full path to the processed file. - * `extra.file_name`: The file name (without path) of the processed file. -#### Chunking +* `feed.url`: The URI using the `file://` scheme and localhost, with the full path to the processed file. +* `extra.file_name`: The file name (without path) of the processed file. + +**Chunking** Additionally, for line-based inputs the bot can split up large reports into smaller chunks. @@ -486,7 +399,7 @@ for each chunk that is passed on to a parser bot. Specifically, to configure a large file input to work around Redis' size limitation set `chunk_size` to something like `384000`, i.e., ~384 MB. -#### Workflow +**Workflow** The bot loops over all files in `path` and tests if their file name matches *postfix, e.g. `*.csv`. If yes, the file will be read and inserted into the @@ -500,39 +413,38 @@ To prevent data loss, the bot also stops when no `postfix` is set and The bot always sets the file name as feed.url -* * * - -### Rsync +Rsync +^^^^^ Requires the rsync executable -#### Information: +**Information** + * `name:` intelmq.bots.collectors.rsync.collector_rsync * `lookup:` yes * `public:` yes * `cache (redis db):` none * `description:` Bot download file by rsync and then load data from downloaded file. Downloaded file is located in `var/lib/bots/rsync_collector.` -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * `file`: Name of downloaded file. * `rsync_path`: Path to file. It can be "/home/username/directory" or "username@remote_host:/home/username/directory" * `temp_directory`: Path of a temporary state directory to use for rsync'd files. Optional. Default: `/opt/intelmq/var/run/rsync_collector/`. -* * * +MISP Generic +^^^^^^^^^^^^ -### MISP Generic +**Information** - -#### Information: * `name:` intelmq.bots.collectors.misp.collector * `lookup:` yes * `public:` yes * `cache (redis db):` none * `description:` collect messages from [MISP](https://github.com/MISP), a malware information sharing platform server. -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * `misp_url`: URL of MISP server (with trailing '/') @@ -543,7 +455,7 @@ Requires the rsync executable Generic parameters used in this bot: * `http_verify_cert`: Verify the TLS certificate of the server, boolean (default: `true`) -#### Workflow +**Workflow** This collector will search for events on a MISP server that have a `to_process` tag attached to them (see the `misp_tag_to_process` parameter) and collect them for processing by IntelMQ. Once the MISP event has been @@ -553,25 +465,24 @@ processed the `to_process` tag is removed from the MISP event and a **NB.** The MISP tags must be configured to be 'exportable' otherwise they will not be retrieved by the collector. -* * * - -### Request Tracker +Request Tracker +^^^^^^^^^^^^^^^ +**Information** -#### Information: * `name:` intelmq.bots.collectors.rt.collector_rt * `lookup:` yes * `public:` yes * `cache (redis db):` none * `description:` Request Tracker Collector fetches attachments from an RTIR instance. -You need the rt-library >= 1.9 from nic.cz, available via [pypi](https://pypi.org/project/rt/): `pip3 install rt` +You need the rt-library >= 1.9 from nic.cz, available via `pypi `_: `pip3 install rt` This rt bot will connect to RT and inspect the given `search_queue` for tickets matching all criteria in `search_*`, Any matches will be inspected. For each match, all (RT-) attachments of the matching RT tickets are iterated over and within this loop, the first matching filename in the attachment is processed. If none of the filename matches apply, the contents of the first (RT-) "history" item is matched against the URL-regex. -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * **HTTP parameters** (see above) @@ -594,45 +505,46 @@ If none of the filename matches apply, the contents of the first (RT-) "history" The parameter `http_timeout_max_tries` is of no use in this collector. The resulting reports contains the following special fields: - * `rtir_id`: The ticket ID - * `extra.email_subject` and `extra.ticket_subject`: The subject of the ticket - * `extra.email_from` and `extra.ticket_requestors`: Comma separated list of requestor's email addresses. - * `extra.ticket_owner`: The ticket's owner name - * `extra.ticket_status`: The ticket's status - * `extra.ticket_queue`: The ticket's queue - * `extra.file_name`: The name of the extracted file, the name of the downloaded file or the attachments' filename without `.gz` postfix. - * `time.observation`: The creation time of the ticket or attachment. -##### Search +* `rtir_id`: The ticket ID +* `extra.email_subject` and `extra.ticket_subject`: The subject of the ticket +* `extra.email_from` and `extra.ticket_requestors`: Comma separated list of requestor's email addresses. +* `extra.ticket_owner`: The ticket's owner name +* `extra.ticket_status`: The ticket's status +* `extra.ticket_queue`: The ticket's queue +* `extra.file_name`: The name of the extracted file, the name of the downloaded file or the attachments' filename without `.gz` postfix. +* `time.observation`: The creation time of the ticket or attachment. + +**Search** The parameters prefixed with `search_` allow configuring the ticket search. Empty strings and `null` as value for search parameters are ignored. -##### File downloads +**File downloads** Attachments can be optionally unzipped, remote files are downloaded with the `http_*` settings applied (see `defaults.conf`). If `url_regex` or `attachment_regex` are empty strings, false or null, they are ignored. -##### Ticket processing +**Ticket processing** Optionally, the RT bot can "take" RT tickets (i.e. the `user` is assigned this ticket now) and/or the status can be changed (leave `set_status` empty in case you don't want to change the status). Please note however that you **MUST** do one of the following: either "take" the ticket or set the status (`set_status`). Otherwise, the search will find the ticket every time and we will have generated an endless loop. In case a resource needs to be fetched and this resource is permanently not available (status code is 4xx), the ticket status will be set according to the configuration to avoid processing the ticket over and over. For temporary failures the status is not modified, instead the ticket will be skipped in this run. -##### Time search +**Time search** To find only tickets newer than a given absolute or relative time, you can use the `search_not_older_than` parameter. Absolute time specification can be anything parseable by dateutil, best use a ISO format. Relative must be in this format: `[number] [timespan]s`, e.g. `3 days`. `timespan` can be hour, day, week, month, year. Trailing 's' is supported for all timespans. Relative times are subtracted from the current time directly before the search is performed. -* * * +Rsync +^^^^^ -### Rsync +**Information** -#### Information: * `name:` intelmq.bots.collectors.rsync.collector_rsync * `lookup:` yes @@ -640,62 +552,59 @@ Relative must be in this format: `[number] [timespan]s`, e.g. `3 days`. `timespa * `cache (redis db):` none * `description:` Syncs a file via rsync and reads the file. -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * `file`: The filename to process, combine with `rsync_path`. * `temp_directory`: The temporary directory for rsync, by default `$VAR_STATE_PATH/rsync_collector`. `$VAR_STATE_PATH` is `/var/run/intelmq/` or `/opt/intelmq/var/run/`. * `rsync_path`: The path of the file to process -* * * - -### Shodan Stream +Shodan Stream +^^^^^^^^^^^^^ Requires the shodan library to be installed: * https://github.com/achillean/shodan-python/ * https://pypi.org/project/shodan/ -#### Information: +**Information** + * `name:` intelmq.bots.collectors.shodan.collector_stream * `lookup:` yes * `public:` yes * `cache (redis db):` none * `description:` Queries the Shodan Streaming API -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * **HTTP parameters** (see above). Only the proxy is used (requires `shodan-python > 1.8.1`). Certificate is always verified. * `countries`: A list of countries to query for. If it is a string, it will be spit by `,`. -* * * +TCP +^^^ -### TCP +**Information** -#### Information: * `name:` intelmq.bots.collectors.tcp.collector * `lookup:` no * `public:` yes * `cache (redis db):` none * `description:` TCP is the bot responsible to receive events on a TCP port (ex: from TCP Output of another IntelMQ instance). Might not be working on Python3.4.6. -#### Configuration Parameters: +**Configuration Parameters** * `ip`: IP of destination server * `port`: port of destination server -#### Response +**Response** TCP collector just sends an "Ok" message after every recevied message, this should not pose a problem for an arbitrary input. If you intend to link two IntelMQ instance via TCP, have a look at the TCP output bot documentation. -* * * - - -### XMPP collector - +XMPP collector +^^^^^^^^^^^^^^ +**Information** -#### Information: * `name:` intelmq.bots.collectors.xmpp.collector * `lookup:` yes * `public:` yes @@ -704,13 +613,15 @@ If you intend to link two IntelMQ instance via TCP, have a look at the TCP outpu **Warning:** This bot is currently *unmaintained* and needs to be adapted. The used XMPP library *sleekxmpp* is deprecated, therefore the bots needs to be adapted to the successor library *slixmpp*. For more information see [Issue #1614](https://github.com/certtools/intelmq/issues/1614). -#### Requirements +**Requirements** + The Sleekxmpp - Library needs to be installed on your System -```bash -pip3 install -r intelmq/bots/collectors/xmpp/REQUIREMENTS.txt -``` -#### Configuration Parameters: +.. code-block:: bash + + pip3 install -r intelmq/bots/collectors/xmpp/REQUIREMENTS.txt + +**Configuration Parameters** * **Feed parameters** (see above) * `xmpp_server`: The domain name of the server of the XMPP-Account (part after the @ sign) @@ -719,47 +630,50 @@ pip3 install -r intelmq/bots/collectors/xmpp/REQUIREMENTS.txt * `xmpp_room`: The room which has to be joined by the XMPP-Collector (full address room@conference.server.tld) * `xmpp_room_nick`: The username / nickname the collector shall use within the room * `xmpp_room_password`: The password which might be required to join a room + - `use_muc` : If this parameter is `true`, the bot will join the room `xmpp_room`. - `xmpp_userlist`: An array of usernames whose messages will (not) be processed. - `xmpp_whitelist_mode`: If `true` the list provided in `xmpp_userlist` is a whitelist. Else it is a blacklist. In case of a whitelist, only messages from the configured users will be processed, else their messages are not processed. Default is `false` / blacklist. + * `ca_certs`: A path to a file containing the CA's which should be used (default: `/etc/ssl/certs/ca-certificates.crt`) * `strip_message`: If `true` trailing white space will be removed from the message. Does not happen if `pass_full_xml` is set to `true` (default: `true`) * `pass_full_xml`: If this parameter is set to `true` the collector will read the full-xmpp-xml message and add it to the pipeline. this is useful if other systems like AbuseHelper should be processed. (default: `false`) -* * * +Alien Vault OTX +^^^^^^^^^^^^^^^ +**Information** -### Alien Vault OTX - -#### Information: * `name:` intelmq.bots.collectors.alienvault_otx.collector * `lookup:` yes * `public:` yes * `cache (redis db):` none * `description:` collect report messages from Alien Vault OTX API -#### Requirements +**Requirements** + Install the library from GitHub, as there is no package in PyPi: -```bash -pip3 install -r intelmq/bots/collectors/alienvault_otx/REQUIREMENTS.txt -``` -#### Configuration Parameters: +.. code-block:: bash + + pip3 install -r intelmq/bots/collectors/alienvault_otx/REQUIREMENTS.txt + +**Configuration Parameters** * **Feed parameters** (see above) * `api_key`: API Key * `modified_pulses_only`: get only modified pulses instead of all, set to it to true or false, default false * `interval`: if "modified_pulses_only" is set, define the time in hours (integer value) to get modified pulse since then, default 24 hours -* * * +Blueliv Crimeserver +^^^^^^^^^^^^^^^^^^^ -### Blueliv Crimeserver +**Information** -#### Information: * `name:` intelmq.bots.collectors.blueliv.collector_crimeserver * `lookup:` yes * `public:` no @@ -768,42 +682,44 @@ pip3 install -r intelmq/bots/collectors/alienvault_otx/REQUIREMENTS.txt For more information visit https://github.com/Blueliv/api-python-sdk -#### Requirements +**Requirements** + Install the required library: -```bash -pip3 install -r intelmq/bots/collectors/blueliv/REQUIREMENTS.txt -``` -#### Configuration Parameters: +.. code-block:: bash + + pip3 install -r intelmq/bots/collectors/blueliv/REQUIREMENTS.txt + +**Configuration Parameters** * **Feed parameters** (see above) * `api_key`: location of information resource, see https://map.blueliv.com/?redirect=get-started#signup * `api_url`: The optional API endpoint, by default `https://freeapi.blueliv.com`. -* * * - -### Calidog Certstream +Calidog Certstream +^^^^^^^^^^^^^^^^^^ A Bot to collect data from the Certificate Transparency Log (CTL) This bot works based on certstream library (https://github.com/CaliDog/certstream-python) -#### Information: +**Information** + * `name:` intelmq.bots.collectors.calidog.collector_certstream * `lookup:` yes * `public:` no * `cache (redis db):` none * `description:` collect data from Certificate Transparency Log -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) -* * * +ESET ETI +^^^^^^^^ -### ESET ETI +**Information** -#### Information: * `name:` intelmq.bots.collectors.eset.collector * `lookup:` yes * `public:` no @@ -812,14 +728,16 @@ This bot works based on certstream library (https://github.com/CaliDog/certstrea For more information visit https://www.eset.com/int/business/services/threat-intelligence/ -#### Requirements +**Requirements** + Install the required `cabby` library: -```bash -pip3 install -r intelmq/bots/collectors/eset/REQUIREMENTS.txt -``` -#### Configuration Parameters: +.. code-block:: bash + + pip3 install -r intelmq/bots/collectors/eset/REQUIREMENTS.txt + +**Configuration Parameters** * **Feed parameters** (see above) * `username`: Your username @@ -828,61 +746,61 @@ pip3 install -r intelmq/bots/collectors/eset/REQUIREMENTS.txt * `time_delta`: The time span to look back, in seconds. Default `3600`. * `collection`: The collection to fetch. -* * * +McAfee openDXL +^^^^^^^^^^^^^^ -### McAfee openDXL +**Information** -#### Information: * `name:` intelmq.bots.collectors.opendxl.collector * `lookup:` yes * `public:` no * `cache (redis db):` none * `description:` collect messages via openDXL -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * `dxl_config_file`: location of the configuration file containing required information to connect $ * `dxl_topic`: the name of the DXL topic to subscribe -* * * - -### Microsoft Azure +Microsoft Azure +^^^^^^^^^^^^^^^ Iterates over all blobs in all containers in an Azure storage. The Cache is required to memorize which files have already been processed (TTL needs to be high enough to cover the oldest files available!). This bot significantly changed in a backwards-incompatible way in IntelMQ Version 2.2.0 to support current versions of the Microsoft Azure Python libraries. -#### Information: +**Information** + * `name`: intelmq.bots.collectors.microsoft.collector_azure * `lookup`: yes * `public`: no * `cache (redis db)`: 5 * `description`: collect blobs from Microsoft Azure using their library -#### Configuration Parameters: +**Configuration Parameters** * **Cache parameters** (see above) * **Feed parameters** (see above) * `connection_string`: connection string as given by Microsoft * `container_name`: name of the container to connect to -* * * - -### Microsoft Interflow +Microsoft Interflow +^^^^^^^^^^^^^^^^^^^ Iterates over all files available by this API. Make sure to limit the files to be downloaded with the parameters, otherwise you will get a lot of data! The cache is used to remember which files have already been downloaded. Make sure the TTL is high enough, higher than `not_older_than`. -#### Information: +**Information** + * `name:` intelmq.bots.collectors.microsoft.collector_interflow * `lookup:` yes * `public:` no * `cache (redis db):` 5 * `description:` collect files from Microsoft Interflow using their API -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * `api_key`: API generate in their portal @@ -890,29 +808,31 @@ The cache is used to remember which files have already been downloaded. Make sur * `not_older_than`: an optional relative (minutes) or absolute time (UTC is assumed) expression to determine the oldest time of a file to be downloaded * `redis_cache_*` and especially `redis_cache_ttl`: Settings for the cache where file names of downloaded files are saved. The cache's TTL must always be bigger than `not_older_than`. -#### Additional functionalities +**Additional functionalities** * Files are automatically ungzipped if the filename ends with `.gz`. -* * * +Stomp +^^^^^ -### Stomp +**Information** -#### Information: * `name:` intelmq.bots.collectors.stomp.collector * `lookup:` yes * `public:` no * `cache (redis db):` none * `description:` collect messages from a stomp server -#### Requirements +**Requirements** + Install the `stomp.py` library from PyPI: -```bash -pip3 install -r intelmq/bots/collectors/stomp/REQUIREMENTS.txt -``` -#### Configuration Parameters: +.. code-block:: bash + + pip3 install -r intelmq/bots/collectors/stomp/REQUIREMENTS.txt + +**Configuration Parameters** * **Feed parameters** (see above) * `exchange`: exchange point @@ -922,19 +842,20 @@ pip3 install -r intelmq/bots/collectors/stomp/REQUIREMENTS.txt * `ssl_client_certificate`: path to client cert file * `ssl_client_certificate_key`: path to client cert key file -* * * - -### Twitter +Twitter +^^^^^^^ Collects tweets from target_timelines. Up to tweet_count tweets from each user and up to timelimit back in time. The tweet text is sent separately and if allowed, links to pastebin are followed and the text sent in a separate report -#### Information: +**Information** + * `name:` intelmq.bots.collectors.twitter.collector_twitter * `lookup:` yes * `public:` yes * `cache (redis db):` none * `description:` Collects tweets -#### Configuration Parameters: + +**Configuration Parameters** * **Feed parameters** (see above) * `target_timelines`: screen_names of twitter accounts to be followed @@ -948,9 +869,11 @@ Collects tweets from target_timelines. Up to tweet_count tweets from each user a * `access_token_key`: Twitter API login data * `access_token_secret`: Twitter API login data -### API collector bot +API collector bot +^^^^^^^^^^^^^^^^^ + +**Information** -#### Information: * `name:` intelmq.bots.collectors.api.collector_api * `lookup:` no * `public:` no @@ -958,91 +881,107 @@ Collects tweets from target_timelines. Up to tweet_count tweets from each user a * `description:` Bot for collecting data using API, you need to post JSON to /intelmq/push endpoint example usage: -``` -curl -X POST http://localhost:5000/intelmq/push -H 'Content-Type: application/json' --data '{"source.ip": "127.0.0.101", "classification.type": "backdoor"}' -``` -#### Configuration Parameters: +.. code-block:: bash + + curl -X POST http://localhost:5000/intelmq/push -H 'Content-Type: application/json' --data '{"source.ip": "127.0.0.101", "classification.type": "backdoor"}' + +**Configuration Parameters** * **Feed parameters** (see above) * `port`: 5000 -## Parsers +Parsers +------- -### Not complete +Not complete +^^^^^^^^^^^^ This list is not complete. Look at `intelmq/bots/BOTS` or the list of parsers shown in the manager. But most parsers do not need configuration parameters. TODO -### AnubisNetworks Cyberfeed Stream +AnubisNetworks Cyberfeed Stream +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +**Information** -#### Information * `name`: `intelmq.bots.parsers.anubisnetworks.parser` * `lookup`: no * `public`: yes * `cache (redis db)`: none * `description`: parsers data from AnubisNetworks Cyberfeed Stream -#### Description +**Description** The feed format changes over time. The parser supports at least data from 2016 and 2020. Events with the Malware "TestSinkholingLoss" are ignored, as they are for the feed provider's internal purpose only and should not be processed at all. -#### Configuration parameters +**Configuration parameters** * `use_malware_familiy_as_classification_identifier`: default: `true`. Use the `malw.family` field as `classification.type`. If `false`, check if the same as `malw.variant`. If it is the same, it is ignored. Otherwise saved as `extra.malware.family`. -* * * - -### Generic CSV Parser +Generic CSV Parser +^^^^^^^^^^^^^^^^^^ Lines starting with `'#'` will be ignored. Headers won't be interpreted. -#### Configuration parameters +**Configuration parameters** * `"columns"`: A list of strings or a string of comma-separated values with field names. The names must match the harmonization's field names. Empty column specifications and columns named `"__IGNORE__"` are ignored. E.g. - ```json - "columns": [ - "", - "source.fqdn", - "extra.http_host_header", - "__IGNORE__" - ], - ``` + + .. code-block:: json + + "columns": [ + "", + "source.fqdn", + "extra.http_host_header", + "__IGNORE__" + ], + is equivalent to: - ```json - "columns": ",source.fqdn,extra.http_host_header," - ``` + + .. code-block:: json + + "columns": ",source.fqdn,extra.http_host_header," + The first and the last column are not used in this example. - It is possible to specify multiple columns using the `|` character. E.g. - ``` - "columns": "source.url|source.fqdn|source.ip" - ``` - First, bot will try to parse the value as URL, if it fails, it will try to parse it as FQDN, if that fails, it will try to parse it as IP, if that fails, an error will be raised. - Some use cases - + + It is possible to specify multiple columns using the `|` character. E.g. + + .. code-block:: - - mixed data set, e.g. URL/FQDN/IP/NETMASK `"columns": "source.url|source.fqdn|source.ip|source.network"` + "columns": "source.url|source.fqdn|source.ip" - - parse a value and ignore if it fails `"columns": "source.url|__IGNORE__"` + First, bot will try to parse the value as URL, if it fails, it will try to parse it as FQDN, if that fails, it will try to parse it as IP, if that fails, an error will be raised. + Some use cases - + + - mixed data set, e.g. URL/FQDN/IP/NETMASK `"columns": "source.url|source.fqdn|source.ip|source.network"` + - parse a value and ignore if it fails `"columns": "source.url|__IGNORE__"` * `"column_regex_search"`: Optional. A dictionary mapping field names (as given per the columns parameter) to regular expression. The field is evaluated using `re.search`. Eg. to get the ASN out of `AS1234` use: `{"source.asn": "[0-9]*"}`. Make sure to properly escape any backslashes in your regular expression (See also [#1579](https://github.com/certtools/intelmq/issues/1579). * `"compose_fields"`: Optional, dictionary. Create fields from columns, e.g. with data like this: - ```csv - # Host,Path - example.com,/foo/ - example.net,/bar/ - ``` + + .. code-block:: csv + + # Host,Path + example.com,/foo/ + example.net,/bar/ + using this compose_fields parameter: - ```json - {"source.url": "http://{0}{1}"} - ``` + + .. code-block:: json + + {"source.url": "http://{0}{1}"} + You get: - ``` - http://example.com/foo/ - http://example.net/bar/ - ``` + + .. code-block:: + + http://example.com/foo/ + http://example.net/bar/ + in the respective `source.url` fields. The value in the dictionary mapping is formatted whereas the columns are available with their index. * `"default_url_protocol"`: For URLs you can give a default protocol which will be pretended to the data. * `"delimiter"`: separation character of the CSV, e.g. `","` @@ -1051,80 +990,84 @@ Lines starting with `'#'` will be ignored. Headers won't be interpreted. * `"type"`: set the `classification.type` statically, optional * `"data_type"`: sets the data of specific type, currently only `"json"` is supported value. An example - ```{ - "columns": [ "source.ip", "source.url", "extra.tags"], - "data_type": "{\"extra.tags\":\"json\"}" - }``` + .. code-block:: json + + { + "columns": [ "source.ip", "source.url", "extra.tags"], + "data_type": "{\"extra.tags\":\"json\"}" + } - It will ensure `extra.tags` is treated as `json`. + It will ensure `extra.tags` is treated as `json`. * `"filter_text"`: only process the lines containing or not containing specified text, to be used in conjunction with `filter_type` * `"filter_type"`: value can be whitelist or blacklist. If `whitelist`, only lines containing the text in `filter_text` will be processed, if `blacklist`, only lines NOT containing the text will be processed. - To process ipset format files use - ``` - { - "filter_text": "ipset add ", - "filter_type": "whitelist", - "columns": [ "__IGNORE__", "__IGNORE__", "__IGNORE__", "source.ip"] - } - ``` + To process ipset format files use + + .. code-block:: json + + { + "filter_text": "ipset add ", + "filter_type": "whitelist", + "columns": [ "__IGNORE__", "__IGNORE__", "__IGNORE__", "source.ip"] + } + * `"type_translation"`: If the source does have a field with information for `classification.type`, but it does not correspond to IntelMQ's types, -you can map them to the correct ones. The `type_translation` field can hold a dictionary, or a string with a JSON dictionary which maps the feed's values to IntelMQ's. - Example: - ```json - {"malware_download": "malware-distribution"} - ``` + you can map them to the correct ones. The `type_translation` field can hold a dictionary, or a string with a JSON dictionary which maps the feed's values to IntelMQ's. + Example: + + .. code-block:: json + + {"malware_download": "malware-distribution"} + * `"columns_required"`: A list of true/false for each column. By default, it is true for every column. -* * * -### Calidog Certstream +Calidog Certstream +^^^^^^^^^^^^^^^^^^ +**Information** -#### Information: * `name:` intelmq.bots.parsers.calidog.parser_certstream * `lookup:` no * `public:` yes * `cache (redis db):` none * `description:` parsers data from Certificate Transparency Log -#### Description +**Description** For each domain in the `leaf_cert.all_domains` object one event with the domain in `source.fqdn` (and `source.ip` as fallback) is produced. The seen-date is saved in `time.source` and the classification type is `other`. * **Feed parameters** (see above) -* * * +ESET +^^^^ -### ESET +**Information** - -#### Information: * `name:` intelmq.bots.parsers.eset.parser * `lookup:` no * `public:` yes * `cache (redis db):` none * `description:` Parses data from ESET ETI TAXII server -#### Description +**Description** Supported collections: * "ei.urls (json)" * "ei.domains v2 (json)" +Cymru CAP Program +^^^^^^^^^^^^^^^^^ -* * * - -### Cymru CAP Program +**Information** -#### Information: * `name:` intelmq.bots.parsers.cymru.parser_cap_program * `public:` no * `cache (redis db):` none * `description:` Parses data from Cymru's CAP program feed. -#### Description +**Description** There are two different feeds available: * `infected_$date.txt` ("old") @@ -1132,227 +1075,246 @@ There are two different feeds available: The new will replace the old at some point in time, currently you need to fetch both. The parser handles both formats. -##### Old feed +**Old feed** As little information on the format is available, the mappings might not be correct in all cases. Some reports are not implemented at all as there is no data available to check if the parsing is correct at all. If you do get errors like `Report ... not implement` or similar please open an issue and report the (anonymized) example data. Thanks. The information about the event could be better in many cases but as Cymru does not want to be associated with the report, we can't add comments to the events in the parser, because then the source would be easily identifiable for the recipient. -### Cymru Full Bogons +Cymru Full Bogons +^^^^^^^^^^^^^^^^^ http://www.team-cymru.com/bogon-reference.html -#### Information: +**Information** + * `name:` intelmq.bots.parsers.cymru.parser_full_bogons * `public:` no * `cache (redis db):` none * `description:` Parses data from full bogons feed. -* * * +Github Feed +^^^^^^^^^^^ -### Github Feed +**Information** -#### Information * `name:` intelmq.bots.parsers.github_feed.parser * `description:` Parses Feeds available publicly on GitHub (should receive from `github_api` collector) -* * * +Have I Been Pwned Callback Parser +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -### Have I Been Pwned Callback Parser +**Information** -#### Information: * `name:` intelmq.bots.parsers.hibp.parser_callback * `public:` no * `cache (redis db):` none * `description:` Parses data from Have I Been Pwned feed. -#### Description +**Description** Parsers the data from a Callback of a Have I Been Pwned Enterprise Subscription. Parses breaches and pastes and creates one event per e-mail address. The e-mail address is stored in `source.account`. `classification.type` is `leak` and `classification.identifier` is `breach` or `paste`. -* * * -### HTML Table Parser +HTML Table Parser +^^^^^^^^^^^^^^^^^ -#### Configuration parameters +**Configuration parameters** * `"columns"`: A list of strings or a string of comma-separated values with field names. The names must match the harmonization's field names. Empty column specifications and columns named `"__IGNORE__"` are ignored. E.g. - ```json - "columns": [ - "", - "source.fqdn", - "extra.http_host_header", - "__IGNORE__" - ], - ``` + + .. code-block:: json + + "columns": [ + "", + "source.fqdn", + "extra.http_host_header", + "__IGNORE__" + ], + is equivalent to: - ```json - "columns": ",source.fqdn,extra.http_host_header," - ``` + + .. code-block:: json + + "columns": ",source.fqdn,extra.http_host_header," + The first and the last column are not used in this example. - It is possible to specify multiple columns using the `|` character. E.g. - ``` - "columns": "source.url|source.fqdn|source.ip" - ``` - First, bot will try to parse the value as URL, if it fails, it will try to parse it as FQDN, if that fails, it will try to parse it as IP, if that fails, an error will be raised. - Some use cases - + It is possible to specify multiple columns using the `|` character. E.g. + + .. code-block:: json + + "columns": "source.url|source.fqdn|source.ip" - - mixed data set, e.g. URL/FQDN/IP/NETMASK `"columns": "source.url|source.fqdn|source.ip|source.network"` + First, bot will try to parse the value as URL, if it fails, it will try to parse it as FQDN, if that fails, it will try to parse it as IP, if that fails, an error will be raised. + Some use cases - - - parse a value and ignore if it fails `"columns": "source.url|__IGNORE__"` + - mixed data set, e.g. URL/FQDN/IP/NETMASK `"columns": "source.url|source.fqdn|source.ip|source.network"` + - parse a value and ignore if it fails `"columns": "source.url|__IGNORE__"` * `"ignore_values"`: A list of strings or a string of comma-separated values which will not considered while assigning to the corresponding fields given in `columns`. E.g. - ```json - "ignore_values": [ - "", - "unknown", - "Not listed", - ], - ``` + + .. code-block:: json + + "ignore_values": [ + "", + "unknown", + "Not listed", + ], + is equivalent to: - ```json - "ignore_values": ",unknown,Not listed," - ``` + + .. code-block:: json + + "ignore_values": ",unknown,Not listed," + The following configuration will lead to assigning all values to malware.name and extra.SBL except `unknown` and `Not listed` respectively. - ```json - "columns": [ - "source.url", - "malware.name", - "extra.SBL", - ], - "ignore_values": [ - "", - "unknown", - "Not listed", - ], - ``` + + .. code-block:: json + + "columns": [ + "source.url", + "malware.name", + "extra.SBL", + ], + "ignore_values": [ + "", + "unknown", + "Not listed", + ], + Parameters **columns and ignore_values must have same length** * `"attribute_name"`: Filtering table with table attributes, to be used in conjunction with `attribute_value`, optional. E.g. `class`, `id`, `style`. * `"attribute_value"`: String. - To filter all tables with attribute `class='details'` use - ```json - "attribute_name": "class", - "attribute_value": "details" - ``` + To filter all tables with attribute `class='details'` use + + .. code-block:: json + + "attribute_name": "class", + "attribute_value": "details" + * `"table_index"`: Index of the table if multiple tables present. If `attribute_name` and `attribute_value` given, index according to tables remaining after filtering with table attribute. Default: `0`. * `"split_column"`: Padded column to be split to get values, to be used in conjunction with `split_separator` and `split_index`, optional. * `"split_separator"`: Delimiter string for padded column. * `"split_index"`: Index of unpadded string in returned list from splitting `split_column` with `split_separator` as delimiter string. Default: `0`. E.g. - ```json - "split_column": "source.fqdn", - "split_separator": " ", - "split_index": 1, - ``` - With above configuration, column corresponding to `source.fqdn` with value `[D] lingvaworld.ru` will be assigned as `"source.fqdn": "lingvaworld.ru"`. + + .. code-block:: json + + "split_column": "source.fqdn", + "split_separator": " ", + "split_index": 1, + + With above configuration, column corresponding to `source.fqdn` with value `[D] lingvaworld.ru` will be assigned as `"source.fqdn": "lingvaworld.ru"`. * `"skip_table_head"`: Boolean, skip the first row of the table, optional. Default: `true`. * `"default_url_protocol"`: For URLs you can give a default protocol which will be pretended to the data. Default: `"http://"`. * `"time_format"`: Optional. If `"timestamp"`, `"windows_nt"` or `"epoch_millis"` the time will be converted first. With the default `null` fuzzy time parsing will be used. * `"type"`: set the `classification.type` statically, optional * `"html_parser"`: The HTML parser to use, by default "html.parser", can also be e.g. "lxml", have a look at https://www.crummy.com/software/BeautifulSoup/bs4/doc/ -* * * +Key-Value Parser +^^^^^^^^^^^^^^^^ -### Key-Value Parser +**Information** -#### Information: * `name:` intelmq.bots.parsers.key_value.parser * `lookup:` no * `public:` no * `cache (redis db):` none * `description:` Parses text lines in key=value format, for example FortiGate firewall logs. -#### Configuration Parameters: +**Configuration Parameters** -* `pair_separator`: String separating key=value pairs, default "` `" (space). +* `pair_separator`: String separating key=value pairs, default `" "` (space). * `kv_separator`: String separating key and value, default `=`. * `keys`: Array of string->string, names of keys to propagate mapped to IntelMQ event fields. Example: - ```json - "keys": { - "srcip": "source.ip", - "dstip": "destination.ip" - } - ``` - The value mapped to `time.source` is parsed. If the value is numeric, it is interpreted. Otherwise, or if it fails, it is parsed fuzzy with dateutil. - If the value cannot be parsed, a warning is logged per line. + + .. code-block:: json + + "keys": { + "srcip": "source.ip", + "dstip": "destination.ip" + } + + The value mapped to `time.source` is parsed. If the value is numeric, it is interpreted. Otherwise, or if it fails, it is parsed fuzzy with dateutil. + If the value cannot be parsed, a warning is logged per line. * `strip_quotes`: Boolean, remove opening and closing quotes from values, default true. -#### Parsing limitations +**Parsing limitations** The input must not have (quoted) occurrences of the separator in the values. For example, this is not parsable (with space as separator): -``` -key="long value" key2="other value" -``` +.. code-block:: + + key="long value" key2="other value" In firewall logs like FortiGate, this does not occur. These logs usually look like: -``` -srcip=192.0.2.1 srcmac="00:00:5e:00:17:17" -``` -* * * +.. code-block:: + + srcip=192.0.2.1 srcmac="00:00:5e:00:17:17" + +McAfee Advanced Threat Defense File +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -### McAfee Advanced Threat Defense File +**Information** -#### Information: * `name:` intelmq.bots.parsers.mcafee.parser_atd_file * `lookup:` yes * `public:` no * `cache (redis db):` none * `description:` parses file hash information off ATD reports -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * `verdict_severity`: min report severity to parse -* * * +McAfee Advanced Threat Defense IP +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -### McAfee Advanced Threat Defense IP +**Information** -#### Information: * `name:` intelmq.bots.parsers.mcafee.parser_atd_file * `lookup:` yes * `public:` no * `cache (redis db):` none * `description:` parses IP addresses off ATD reports -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * `verdict_severity`: min report severity to parse -* * * +McAfee Advanced Threat Defense URL +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -### McAfee Advanced Threat Defense URL +**Information** -#### Information: * `name:` intelmq.bots.parsers.mcafee.parser_atd_file * `lookup:` yes * `public:` no * `cache (redis db):` none * `description:` parses URLs off ATD reports -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * `verdict_severity`: min report severity to parse -* * * - -### Microsoft CTIP Parser +Microsoft CTIP Parser +^^^^^^^^^^^^^^^^^^^^^ * `name`: `intelmq.bots.parsers.microsoft.parser_ctip` * `public`: no * `cache (redis db)`: none * `description`: Parses data from the Microsoft CTIP Feed -#### Description +**Description** Can parse the JSON format provided by the Interflow interface (lists of dictionaries) as well as the format provided by the Azure interface (one dictionary per line). The provided data differs between the two formats/providers. @@ -1365,36 +1327,34 @@ The feeds only differ by a few fields, not in the format. The feeds contain a field called `Payload` which is nearly always a base64 encoded JSON structure. If decoding works, the contained fields are saved as `extra.payload.*`, otherwise the field is saved as `extra.payload.text`. -* * * - -### MISP - +MISP +^^^^ * `name:` intelmq.bots.parsers.misp.parser * `public:` no * `cache (redis db):` none * `description:` Parses MISP events -#### Description +**Description** MISP events collected by the MISPCollectorBot are passed to this parser for processing. Supported MISP event categories and attribute types are defined in the `SUPPORTED_MISP_CATEGORIES` and `MISP_TYPE_MAPPING` class constants. -* * * +n6 +^^ -### n6 +**Information** -#### Information: * `name`: `intelmq.bots.parsers.n6.parser_n6stomp` * `public`: no * `cache (redis db)`: none * `description`: Convert n6 data into IntelMQ format. -#### Configuration Parameters: +**Configuration Parameters** None -#### Description +**Description** Test messages are ignored, this is logged with debug logging level. Also contains a mapping for the classification (results in taxonomy, type and identifier). @@ -1402,48 +1362,49 @@ The `name` field is normally used as `malware.name`, if that fails due to disall If an n6 message contains multiple IP addresses, multiple events are generated, resulting in events only differing in the address information. -* * * +Twitter +^^^^^^^ -### Twitter +**Information** -#### Information: * `name:` intelmq.bots.parsers.twitter.parser * `public:` no * `cache (redis db):` none * `description:` Extracts URLs from text, fuzzy, aimed at parsing tweets -#### Configuration Parameters: +**Configuration Parameters** * `domain_whitelist`: domains to be filtered out * `substitutions`: semicolon delimited list of even length of pairs of substitutions (for example: '[.];.;,;.' substitutes '[.]' for '.' and ',' for '.') * `classification_type`: string with a valid classification type as defined in data harmonization * `default_scheme`: Default scheme for URLs if not given. See also the next section. -##### Default scheme +**Default scheme** The dependency `url-normalize` changed it's behavior in version 1.4.0 from using `http://` as default scheme to `https://`. Version 1.4.1 added the possibility to specify it. Thus you can only use the `default_scheme` parameter with a current version of this library >= 1.4.1, with 1.4.0 you will always get `https://` as default scheme and for older versions < 1.4.0 `http://` is used. This does not affect URLs which already include the scheme. -* * * +Shadowserver +^^^^^^^^^^^^ -### Shadowserver +**Information** -#### Information * `name:` intelmq.bots.parsers.shadowserver.parser * `public:` yes * `description:` Parses different reports from Shadowserver. -#### Configuration Parameters +**Configuration Parameters** * `feedname`: Optional, the Name of the feed, see list below for possible values. * `overwrite`: If an existing `feed.name` should be overwritten. -#### How this bot works? +**How this bot works?** There are two possibilities for the bot to determine which feed the data belongs to in order to determine the correct mapping of the columns: -#### Automatic feed detection +**Automatic feed detection** + Since IntelMQ version 2.1 the parser can detect the feed based on metadata provided by the collector. When processing a report, this bot takes `extra.file_name` from the report and @@ -1455,132 +1416,139 @@ The field `extra.file_name` has the following structure: `%Y-%m-%d-${report_name}[-suffix].csv` where suffix can be something like `country-geo`. For example, some possible filenames are `2019-01-01-scan_http-country-geo.csv` or `2019-01-01-scan_tftp.csv`. The important part is `${report_name}`, between the date and the suffix. Since version 2.1.2 the date in the filename is optional, so filenames like `scan_tftp.csv` are also detected. -#### Fixed feed name +**Fixed feed name** + If the method above is not possible and for upgraded instances, the feed can be set with the `feedname` parameter. Feed-names are derived from the subjects of the Shadowserver E-Mails. A list of possible feeds can be found in the table below in the column "feed name". -#### Supported reports: +**Supported reports** These are the supported feed name and their corresponding file name for automatic detection: -| feed name | file name | -|----------------------| ----------| -| Accessible-ADB | `scan_adb` | -| Accessible-AFP | `scan_afp` | -| Accessible-ARD | `scan_ard` | -| Accessible-Cisco-Smart-Install | `cisco_smart_install` | -| Accessible-CoAP | `scan_coap` | -| Accessible-CWMP | `scan_cwmp` | -| Accessible-FTP | `scan_ftp` | -| Accessible-Hadoop | `scan_hadoop` | -| Accessible-HTTP | `scan_http` | -| Accessible-Radmin | `scan_radmin` | -| Accessible-RDP | `scan_rdp` | -| Accessible-Rsync | `scan_rsync` | -| Accessible-SMB | `scan_smb` | -| Accessible-Telnet | `scan_telnet` | -| Accessible-Ubiquiti-Discovery-Service | `scan_ubiquiti` | -| Accessible-VNC | `scan_vnc` | -| Amplification-DDoS-Victim | `ddos_amplification` | -| Blacklisted-IP (deprecated) | `blacklist` | -| Blocklist | `blocklist` | -| CAIDA-IP-Spoofer | `caida_ip_spoofer` | -| Compromised-Website | `compromised_website` | -| Darknet | `darknet` | -| DNS-Open-Resolvers | `scan_dns` | -| Drone | `botnet_drone` | -| Drone-Brute-Force | `drone_brute_force` | -| HTTP-Scanners | `hp_http_scan` | -| ICS-Scanners | `hp_ics_scan` | -| IPv6-Sinkhole-HTTP-Drone | `sinkhole6_http` | -| Microsoft-Sinkhole | `microsoft_sinkhole` | -| NTP-Monitor | `scan_ntpmonitor` | -| NTP-Version | `scan_ntp` | -| Open-Chargen | `scan_chargen` | -| Open-DB2-Discovery-Service | `scan_db2` | -| Open-Elasticsearch | `scan_elasticsearch` | -| Open-IPMI | `scan_ipmi` | -| Open-IPP | `scan_ipp` | -| Open-LDAP | `scan_ldap ` | -| Open-LDAP-TCP | `scan_ldap_tcp` | -| Open-mDNS | `scan_mdns` | -| Open-Memcached | `scan_memcached` | -| Open-MongoDB | `scan_mongodb` | -| Open-MQTT | `scan_mqtt` | -| Open-MSSQL | `scan_mssql` | -| Open-NATPMP | `scan_nat_pmp` | -| Open-NetBIOS-Nameservice | `scan_netbios` | -| Open-Netis | ? | -| Open-Portmapper | `scan_portmapper` | -| Open-QOTD | `scan_qotd` | -| Open-Redis | `scan_redis` | -| Open-SNMP | `scan_snmp` | -| Open-SSDP | `scan_ssdp` | -| Open-TFTP | `scan_tftp` | -| Open-XDMCP | `scan_xdmcp` | -| Outdated-DNSSEC-Key | `outdated_dnssec_key` | -| Outdated-DNSSEC-Key-IPv6 | `outdated_dnssec_key_v6` | -| Sandbox-URL | `cwsandbox_url` | -| Sinkhole-HTTP-Drone | `sinkhole_http_drone` | -| Spam-URL | `spam_url` | -| SSL-FREAK-Vulnerable-Servers | `scan_ssl_freak` | -| SSL-POODLE-Vulnerable-Servers | `scan_ssl_poodle` | -| Vulnerable-ISAKMP | `scan_isakmp` | - -#### Development - -##### Structure of this Parser Bot: + ======================================= ========================= + feed name file name + ======================================= ========================= + Accessible-ADB `scan_adb` + Accessible-AFP `scan_afp` + Accessible-ARD `scan_ard` + Accessible-Cisco-Smart-Install `cisco_smart_install` + Accessible-CoAP `scan_coap` + Accessible-CWMP `scan_cwmp` + Accessible-FTP `scan_ftp` + Accessible-Hadoop `scan_hadoop` + Accessible-HTTP `scan_http` + Accessible-Radmin `scan_radmin` + Accessible-RDP `scan_rdp` + Accessible-Rsync `scan_rsync` + Accessible-SMB `scan_smb` + Accessible-Telnet `scan_telnet` + Accessible-Ubiquiti-Discovery-Service `scan_ubiquiti` + Accessible-VNC `scan_vnc` + Amplification-DDoS-Victim `ddos_amplification` + Blacklisted-IP (deprecated) `blacklist` + Blocklist `blocklist` + CAIDA-IP-Spoofer `caida_ip_spoofer` + Compromised-Website `compromised_website` + Darknet `darknet` + DNS-Open-Resolvers `scan_dns` + Drone `botnet_drone` + Drone-Brute-Force `drone_brute_force` + HTTP-Scanners `hp_http_scan` + ICS-Scanners `hp_ics_scan` + IPv6-Sinkhole-HTTP-Drone `sinkhole6_http` + Microsoft-Sinkhole `microsoft_sinkhole` + NTP-Monitor `scan_ntpmonitor` + NTP-Version `scan_ntp` + Open-Chargen `scan_chargen` + Open-DB2-Discovery-Service `scan_db2` + Open-Elasticsearch `scan_elasticsearch` + Open-IPMI `scan_ipmi` + Open-IPP `scan_ipp` + Open-LDAP `scan_ldap` + Open-LDAP-TCP `scan_ldap_tcp` + Open-mDNS `scan_mdns` + Open-Memcached `scan_memcached` + Open-MongoDB `scan_mongodb` + Open-MQTT `scan_mqtt` + Open-MSSQL `scan_mssql` + Open-NATPMP `scan_nat_pmp` + Open-NetBIOS-Nameservice `scan_netbios` + Open-Netis ? + Open-Portmapper `scan_portmapper` + Open-QOTD `scan_qotd` + Open-Redis `scan_redis` + Open-SNMP `scan_snmp` + Open-SSDP `scan_ssdp` + Open-TFTP `scan_tftp` + Open-XDMCP `scan_xdmcp` + Outdated-DNSSEC-Key `outdated_dnssec_key` + Outdated-DNSSEC-Key-IPv6 `outdated_dnssec_key_v6` + Sandbox-URL `cwsandbox_url` + Sinkhole-HTTP-Drone `sinkhole_http_drone` + Spam-URL `spam_url` + SSL-FREAK-Vulnerable-Servers `scan_ssl_freak` + SSL-POODLE-Vulnerable-Servers `scan_ssl_poodle` + Vulnerable-ISAKMP `scan_isakmp` + ======================================= ========================= + +**Development** + +**Structure of this Parser Bot** + The parser consists of two files: * `config.py` * `parser.py` Both files are required for the parser to work properly. -##### Add new Feedformats: +**Add new Feedformats** + Add a new feed format and conversions if required to the file `config.py`. Don't forget to update the `feed_idx` dict. It is required to look up the correct configuration. Look at the documentation in the bots's `config.py` file for more information. -* * * - +Shodan +^^^^^^ -### Shodan +**Information** -#### Information * `name:` intelmq.bots.parsers.shodan.parser * `public:` yes * `description:` Parses data from Shodan (search, stream etc). The parser is by far not complete as there are a lot of fields in a big nested structure. There is a minimal mode available which only parses the important/most useful fields and also saves everything in `extra.shodan` keeping the original structure. When not using the minimal mode if may be useful to ignore errors as many parsing errors can happen with the incomplete mapping. -#### Configuration Parameters: +**Configuration Parameters** * `ignore_errors`: Boolean (default true) * `minimal_mode`: Boolean (default false) -* * * +ZoneH +^^^^^ -### ZoneH +**Information** -#### Information * `name:` intelmq.bots.parsers.zoneh.parser * `public:` yes * `description:` Parses data from ZoneH. -#### Description +**Description** This bot is designed to consume defacement reports from zone-h.org. It expects fields normally present in CSV files distributed by email. -* * * -## Experts -### Abusix +Experts +------- + +Abusix +^^^^^^ + +**Information** -#### Information: * `name:` abusix * `lookup:` dns * `public:` yes @@ -1588,74 +1556,82 @@ fields normally present in CSV files distributed by email. * `description:` RIPE abuse contacts resolving through DNS TXT queries * `notes`: https://abusix.com/contactdb.html -#### Configuration Parameters: +**Configuration Parameters** -* **Cache parameters** (see in section [common parameters](#common-parameters)) +* **Cache parameters** (see in section :ref:`common-parameters`) + +**Requirements** -#### Requirements This bot can optionally use the python module *querycontacts* by Abusix itself: https://pypi.org/project/querycontacts/ -```bash -pip3 install querycontacts -``` -If the package is not installed, our own routines are used. +.. code-block:: bash + + pip3 install querycontacts -* * * +If the package is not installed, our own routines are used. -### ASN Lookup +ASN Lookup +^^^^^^^^^^ +**Information** -#### Information: * `name:` ASN lookup * `lookup:` local database * `public:` yes * `cache (redis db):` none * `description:` IP to ASN -#### Configuration Parameters: +**Configuration Parameters** * `database`: Path to the downloaded database. -#### Requirements +**Requirements** + Install `pyasn` module -```bash -pip3 install pyasn -``` -#### Database +.. code-block:: bash + + pip3 install pyasn + +**Database** + Use this command to create/update the database and reload the bot: `intelmq.bots.experts.asn_lookup.expert --update-database` -* * * -### CSV Converter +CSV Converter +^^^^^^^^^^^^^ + + +**Information** -#### Information: -* `name`: `intelmq.bots.experts.csv_converter.expert +* `name`: `intelmq.bots.experts.csv_converter.expert` * `lookup`: no * `public`: yes * `cache (redis db)`: none * `description`: Converts an event to CSV format, saved in the `output` field. -#### Configuration Parameters: +**Configuration Parameters** - * `delimiter`: String, default `","` - * `fieldnames`: Comma-separated list of field names, e.g. `"time.source,classification.type,source.ip"` +* `delimiter`: String, default `","` +* `fieldnames`: Comma-separated list of field names, e.g. `"time.source,classification.type,source.ip"` -#### Usage +**Usage** To use the CSV-converted data in an output bot - for example in a file output, use the configuration parameter `single_key` of the output bot and set it to `output`. -* * * -### Cymru Whois -#### Information: +Cymru Whois +^^^^^^^^^^^ + +**Information** + * `name:` cymru-whois * `lookup:` Cymru DNS * `public:` yes @@ -1664,14 +1640,14 @@ use the configuration parameter `single_key` of the output bot and set it to `ou Public documentation: https://www.team-cymru.com/IP-ASN-mapping.html#dns -#### Configuration Parameters: +**Configuration Parameters** -* **Cache parameters** (see in section [common parameters](#common-parameters)) +* **Cache parameters** (see in section :ref:`common-parameters`) * `overwrite`: Overwrite existing fields. Default: `True` if not given (for backwards compatibility, will change in version 3.0.0) -* * * -### Domain Suffix +Domain Suffix +^^^^^^^^^^^^^ This bots adds the public suffix to the event, derived by a domain. See or information on the public suffix list: https://publicsuffix.org/list/ @@ -1683,301 +1659,329 @@ Note that the public suffix is not the same as the top level domain (TLD). E.g. Privately registered suffixes (such as `blogspot.co.at`) which are part of the public suffix list too, are ignored. -#### Information: +**Information** + * `name:` domain suffix * `lookup:` no * `public:` yes * `cache (redis db):` - * `description:` extracts the domain suffix from the FQDN -#### Configuration Parameters: +**Configuration Parameters** * `field`: either `"fqdn"` or `"reverse_dns"` * `suffix_file`: path to the suffix file -#### Rule processing +**Rule processing** A short summary how the rules are processed: The simple ones: -``` -com -at -gv.at -``` + +.. code-block:: + + com + at + gv.at + `example.com` leads to `com`, `example.gv.at` leads to `gv.at`. Wildcards: -``` -*.example.com -``` + +.. code-block:: + + *.example.com + `www.example.com` leads to `www.example.com`. And additionally the exceptions, together with the above wildcard rule: -``` -!www.example.com -``` + +.. code-block:: + + !www.example.com + `www.example.com` does now not lead to `www.example.com`, but to `example.com`. -* * * -### Deduplicator +Deduplicator +^^^^^^^^^^^^ + +**Information** -#### Information: * `name:` deduplicator * `lookup:` redis cache * `public:` yes * `cache (redis db):` 6 * `description:` Bot responsible for ignore duplicated messages. The bot can be configured to perform deduplication just looking to specific fields on the message. -#### Configuration Parameters: +**Configuration Parameters** -* **Cache parameters** (see in section [common parameters](#common-parameters)) +* **Cache parameters** (see in section :ref:`common-parameters`) * `bypass`- true or false value to bypass the deduplicator. When set to true, messages will not be deduplicated. Default: false -##### Parameters for "fine-grained" deduplication +**Parameters for "fine-grained" deduplication** * `filter_type`: type of the filtering which can be "blacklist" or "whitelist". The filter type will be used to define how Deduplicator bot will interpret the parameter `filter_keys` in order to decide whether an event has already been seen or not, i.e., duplicated event or a completely new event. * "whitelist" configuration: only the keys listed in `filter_keys` will be considered to verify if an event is duplicated or not. * "blacklist" configuration: all keys except those in `filter_keys` will be considered to verify if an event is duplicated or not. * `filter_keys`: string with multiple keys separated by comma. Please note that `time.observation` key will not be considered even if defined, because the system always ignore that key. -##### Parameters Configuration Example +**Parameters Configuration Example** -###### Example 1 +*Example 1* The bot with this configuration will detect duplication only based on `source.ip` and `destination.ip` keys. -``` -"parameters": { - "redis_cache_db": 6, - "redis_cache_host": "127.0.0.1", - "redis_cache_password": null, - "redis_cache_port": 6379, - "redis_cache_ttl": 86400, - "filter_type": "whitelist", - "filter_keys": "source.ip,destination.ip", -} -``` +.. code-block:: -###### Example 2 + "parameters": { + "redis_cache_db": 6, + "redis_cache_host": "127.0.0.1", + "redis_cache_password": null, + "redis_cache_port": 6379, + "redis_cache_ttl": 86400, + "filter_type": "whitelist", + "filter_keys": "source.ip,destination.ip", + } + +*Example 2* The bot with this configuration will detect duplication based on all keys, except `source.ip` and `destination.ip` keys. -``` -"parameters": { - "redis_cache_db": 6, - "redis_cache_host": "127.0.0.1", - "redis_cache_password": null, - "redis_cache_port": 6379, - "redis_cache_ttl": 86400, - "filter_type": "blacklist", - "filter_keys": "source.ip,destination.ip", -} -``` +.. code-block:: + + "parameters": { + "redis_cache_db": 6, + "redis_cache_host": "127.0.0.1", + "redis_cache_password": null, + "redis_cache_port": 6379, + "redis_cache_ttl": 86400, + "filter_type": "blacklist", + "filter_keys": "source.ip,destination.ip", + } -#### Flushing the cache +**Flushing the cache** To flush the deduplicator's cache, you can use the `redis-cli` tool. Enter the database used by the bot and submit the `flushdb` command: -``` -redis-cli -n 6 -flushdb -``` -* * * +.. code-block:: bash + + redis-cli -n 6 + flushdb + + + +DO Portal Expert Bot +^^^^^^^^^^^^^^^^^^^^ -### DO Portal Expert Bot +**Information** -#### Information: * `name:` do_portal * `lookup:` yes * `public:` no * `cache (redis db):` none * `description:` The DO portal retrieves the contact information from a DO portal instance: http://github.com/certat/do-portal/ -#### Configuration Parameters: +**Configuration Parameters** * `mode` - Either `replace` or `append` the new abuse contacts in case there are existing ones. * `portal_url` - The URL to the portal, without the API-path. The used URL is `$portal_url + '/api/1.0/ripe/contact?cidr=%s'`. * `portal_api_key` - The API key of the user to be used. Must have sufficient privileges. -* * * +Field Reducer Bot +^^^^^^^^^^^^^^^^^ -### Field Reducer Bot +**Information** -#### Information: * `name:` reducer * `lookup:` none * `public:` yes * `cache (redis db):` none * `description:` The field reducer bot is capable of removing fields from events. -#### Configuration Parameters: +**Configuration Parameters** * `type` - either `"whitelist"` or `"blacklist"` * `keys` - Can be a JSON-list of field names (`["raw", "source.account"]`) or a string with a comma-separated list of field names (`"raw,source.account"`). -##### Whitelist +**Whitelist** Only the fields in `keys` will passed along. -##### Blacklist +**Blacklist** The fields in `keys` will be removed from events. -* * * - -### Filter +Filter +^^^^^^ The filter bot is capable of filtering specific events. -#### Information: +**Information** + * `name:` filter * `lookup:` none * `public:` yes * `cache (redis db):` none * `description:` filter messages (drop or pass messages) FIXME -#### Configuration Parameters: +**Configuration Parameters** + +*Parameters for filtering with key/value attributes* -##### Parameters for filtering with key/value attributes: * `filter_key` - key from data harmonization * `filter_value` - value for the key * `filter_action` - action when a message match to the criteria (possible actions: keep/drop) * `filter_regex` - attribute determines if the `filter_value` shall be treated as regular expression or not. If this attribute is not empty, the bot uses python's "search" function to evaluate the filter. -##### Parameters for time based filtering: +*Parameters for time based filtering* + * `not_before` - events before this time will be dropped * `not_after` - events after this time will be dropped Both parameters accept string values describing absolute or relative time: + * absolute + * basically anything parseable by datetime parser, eg. "2015-09-012T06:22:11+00:00" * `time.source` taken from the event will be compared to this value to decide the filter behavior + * relative + * accepted string formatted like this " ", where epoch could be any of following strings (could optionally end with trailing 's'): hour, day, week, month, year * time.source taken from the event will be compared to the value (now - relative) to decide the filter behavior -Examples of time filter definition: +*Examples of time filter definition* + * ```"not_before" : "2015-09-012T06:22:11+00:00"``` events older than the specified time will be dropped * ```"not_after" : "6 months"``` just events older than 6 months will be passed through the pipeline -#### Possible paths +**Possible paths** * `_default`: default path, according to the configuration * `action_other`: Negation of the default path * `filter_match`: For all events the filter matched on * `filter_no_match`: For all events the filter does not match -| action | match | `_default` | `action_other` | `filter_match` | `filter_no_match` | -| ------ | ----- | ----------- | -------------- | -------------- | ----------------- | -| keep | ✓ | ✓ | ✗ | ✓ | ✗ | -| keep | ✗ | ✗ | ✓ | ✗ | ✓ | -| drop | ✓ | ✗ | ✓ | ✓ | ✗ | -| drop | ✗ | ✓ | ✗ | ✗ | ✓ | + ======= ====== ============ ============== ============== ================= + action match `_default` `action_other` `filter_match` `filter_no_match` + ======= ====== ============ ============== ============== ================= + keep ✓ ✓ ✗ ✓ ✗ + keep ✗ ✗ ✓ ✗ ✓ + drop ✓ ✗ ✓ ✓ ✗ + drop ✗ ✓ ✗ ✗ ✓ + ======= ====== ============ ============== ============== ================= In `DEBUG` logging level, one can see that the message is sent to both matching paths, also if one of the paths is not configured. Of course the message is only delivered to the configured paths. -* * * +Format Field +^^^^^^^^^^^^ -### Format Field +**Information** -#### Information: * `name:` Format Field * `lookup:` none * `cache (redis db):` none * `description:` String method operations on column values -#### Configuration Parameters: +**Configuration Parameters** + +*Parameters for stripping chars* -##### Parameters for stripping chars: * `strip_columns` - A list of strings or a string of comma-separated values with field names. The names must match the harmonization's field names. E.g. - ```json - "columns": [ - "malware.name", - "extra.tags" - ], - ``` + + .. code-block:: json + + "columns": [ + "malware.name", + "extra.tags" + ], + is equivalent to: - ```json + + .. code-block:: json + "columns": "malware.name,extra.tags" - ``` + * `strip_chars` - a set of characters to remove as leading/trailing characters(default: ` ` or whitespace) -##### Parameters for replacing chars: +*Parameters for replacing chars* * `replace_column` - key from data harmonization * `old_value` - the string to search for * `new_value` - the string to replace the old value with * `replace_count` - number specifying how many occurrences of the old value you want to replace(default: `1`) -##### Parameters for splitting string to list of string: +*Parameters for splitting string to list of string* * `split_column` - key from data harmonization * `split_separator` - specifies the separator to use when splitting the string(default: `,`) Order of operation: `strip -> replace -> split`. These three methods can be combined such as first strip and then split. -* * * - -### Generic DB Lookup +Generic DB Lookup +^^^^^^^^^^^^^^^^^ This bot is capable for enriching intelmq events by lookups to a database. Currently only PostgreSQL and SQLite are supported. If more than one result is returned, a ValueError is raised. -#### Information: +**Information** + * `name:` `intelmq.bots.experts.generic_db_lookup.expert` * `lookup:` database * `public:` yes * `cache (redis db):` none * `description:` This bot is capable for enriching intelmq events by lookups to a database. -#### Configuration Parameters: +**Configuration Parameters** -##### Connection +*Connection* * `engine`: `postgresql` or `sqlite` * `database`: string, defaults to "intelmq", database name or the SQLite filename * `table`: defaults to "contacts" -##### PostgreSQL specific +*PostgreSQL specific* + * `host`: string, defaults to "localhost" * `password`: string * `port`: integer, defaults to 5432 * `sslmode`: string, defaults to "require" * `user`: defaults to "intelmq" -##### Lookup +*Lookup* * `match_fields`: defaults to `{"source.asn": "asn"}` The value is a key-value mapping an arbitrary number **intelmq** field names **to table** column names. The values are compared with `=` only. -##### Replace fields. +*Replace fields* * `overwrite`: defaults to `false`. Is applied per field * `replace_fields`: defaults to `{"contact": "source.abuse_contact"}` `replace_fields` is again a key-value mapping an arbitrary number of **table** column names **to intelmq** field names -* * * -### Gethostbyname -#### Information: +Gethostbyname +^^^^^^^^^^^^^ + +**Information** + * `name:` gethostbyname * `lookup:` DNS * `public:` yes * `cache (redis db):` none * `description:` DNS name (FQDN) to IP -#### Configuration Parameters: +**Configuration Parameters** - `fallback_to_url` If True and no `source.fqdn` present, use `source.url` instead while producing `source.ip` - `gaierrors_to_ignore`: Optional, list (comma-separated) of gaierror codes to ignore, e.g. `-3` for EAI_AGAIN (Temporary failure in name resolution). Only accepts the integer values, not the names. -#### Description +**Description** Resolves the `source/destination.fqdn` hostname using the `gethostbyname` syscall and saves the resulting IP address as `source/destination.ip`. The following gaierror resolution errors are ignored and treated as if the hostname cannot be resolved: @@ -1989,37 +1993,41 @@ The following gaierror resolution errors are ignored and treated as if the hostn Other errors result in an exception if not ignored by the parameter `gaierrors_to_ignore` (see above). All gaierrors can be found here: http://www.castaglia.org/proftpd/doc/devel-guide/src/lib/glibc-gai_strerror.c.html -* * * -### IDEA Converter + +IDEA Converter +^^^^^^^^^^^^^^ Converts the event to IDEA format and saves it as JSON in the field `output`. All other fields are not modified. Documentation about IDEA: https://idea.cesnet.cz/en/index -#### Information: +**Information** + * `name:` intelmq.bots.experts.idea.expert * `lookup:` no * `public:` yes * `cache (redis db):` none * `description:` The bot does a best effort translation of events into the IDEA format. -#### Configuration Parameters: +**Configuration Parameters** * `test_mode`: add `Test` category to mark all outgoing IDEA events as informal (meant to simplify setting up and debugging new IDEA producers) (default: `true`) -* * * -### MaxMind GeoIP -#### Information: +MaxMind GeoIP +^^^^^^^^^^^^^ + +**Information** + * `name:` intelmq.bots.experts.maxmind_geoip.expert * `lookup:` local database * `public:` yes * `cache (redis db):` none * `description:` IP to geolocation -#### Setup +**Setup** The bot requires the MaxMind's `geoip2` Python library, version 2.2.0 has been tested. @@ -2027,32 +2035,35 @@ To download the database a free license key is required. More information can be You may want to use a shell script provided in the contrib directory to keep the database up to date: `contrib/cron-jobs/update-geoip-data` -#### Configuration Parameters: +**Configuration Parameters** * `database`: Path to the local database, e.g. `"/opt/intelmq/var/lib/bots/maxmind_geoip/GeoLite2-City.mmdb"` * `overwrite`: boolean * `use_registered`: boolean. MaxMind has two country ISO codes: One for the physical location of the address and one for the registered location. Default is `false` (backwards-compatibility). See also https://github.com/certtools/intelmq/pull/1344 for a short explanation. * `license_key`: License key is necessary for downloading the GeoLite2 database. -#### Database +**Database** + Use this command to create/update the database and reload the bot: `intelmq.bots.experts.maxmind_geoip.expert --update-database` -* * * -### MISP + +MISP +^^^^ Queries a MISP instance for the `source.ip` and adds the MISP Attribute UUID and MISP Event ID of the newest attribute found. -#### Information: +**Information** + * `name:` intelmq.bots.experts.misp.expert * `lookup:` yes * `public:` no * `cache (redis db):` none * `description:` IP address to MISP attribute and event -#### Configuration Parameters: +**Configuration Parameters** * `misp_key`: MISP Authkey * `misp_url`: URL of MISP server (with trailing '/') @@ -2060,18 +2071,20 @@ Queries a MISP instance for the `source.ip` and adds the MISP Attribute UUID and Generic parameters used in this bot: * `http_verify_cert`: Verify the TLS certificate of the server, boolean (default: `true`) -* * * -### McAfee Active Response Hash lookup -#### Information: +McAfee Active Response Hash lookup +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +**Information** + * `name:` intelmq.bots.experts.mcafee.expert_mar * `lookup:` yes * `public:` no * `cache (redis db):` none * `description:` Queries occurrences of hashes within local environment -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * `dxl_config_file`: location of file containing required information to connect to DXL bus @@ -2081,111 +2094,117 @@ Generic parameters used in this bot: - `DestIP`: looks up `destination.ip` - `DestFQDN`: looks up in `destination.fqdn` -* * * -### McAfee Active Response IP lookup -#### Information: +McAfee Active Response IP lookup +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +**Information** + * `name:` intelmq.bots.experts.mcafee.expert_mar_ip * `lookup:` yes * `public:` no * `cache (redis db):` none * `description:` Queries occurrences of connection attempts to destination ip/port within local environment -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * `dxl_config_file`: location of file containing required information to connect to DXL bus -* * * -### McAfee Active Response URL lookup -#### Information: +McAfee Active Response URL lookup +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +**Information** + * `name:` intelmq.bots.experts.mcafee.expert_mar_url * `lookup:` yes * `public:` no * `cache (redis db):` none * `description:` Queries occurrences of FQDN lookups within local environment -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * `dxl_config_file`: location of file containing required information to connect to DXL bus -* * * -### Modify -#### Information: +Modify +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +**Information** + * `name:` modify * `lookup:` local config * `public:` yes * `cache (redis db):` none * `description:` modify expert bot allows you to change arbitrary field values of events just using a configuration file -#### Configuration Parameters: +**Configuration Parameters** * `configuration_path`: filename * `case_sensitive`: boolean, default: true * `maximum_matches`: Maximum number of matches. Processing stops after the limit is reached. Default: no limit (`null`, `0`). * `overwrite`: Overwrite any existing fields by matching rules. Default if the parameter is given: `true`, for backwards compatibility. Default will change to `false` in version 3.0.0. -#### Configuration File +**Configuration File** The modify expert bot allows you to change arbitrary field values of events just using a configuration file. Thus it is possible to adapt certain values or adding new ones only by changing JSON-files without touching the code of many other bots. The configuration is called `modify.conf` and looks like this: -```json -[ - { - "rulename": "Standard Protocols http", - "if": { - "source.port": "^(80|443)$" - }, - "then": { - "protocol.application": "http" - } - }, - { - "rulename": "Spamhaus Cert conficker", - "if": { - "malware.name": "^conficker(ab)?$" - }, - "then": { - "classification.identifier": "conficker" - } - }, - { - "rulename": "bitdefender", - "if": { - "malware.name": "bitdefender-(.*)$" - }, - "then": { - "malware.name": "{matches[malware.name][1]}" - } - }, - { - "rulename": "urlzone", - "if": { - "malware.name": "^urlzone2?$" - }, - "then": { - "classification.identifier": "urlzone" - } - }, - { - "rulename": "default", - "if": { - "feed.name": "^Spamhaus Cert$" - }, - "then": { - "classification.identifier": "{msg[malware.name]}" - } - } -] -``` +.. code-block:: json + + [ + { + "rulename": "Standard Protocols http", + "if": { + "source.port": "^(80|443)$" + }, + "then": { + "protocol.application": "http" + } + }, + { + "rulename": "Spamhaus Cert conficker", + "if": { + "malware.name": "^conficker(ab)?$" + }, + "then": { + "classification.identifier": "conficker" + } + }, + { + "rulename": "bitdefender", + "if": { + "malware.name": "bitdefender-(.*)$" + }, + "then": { + "malware.name": "{matches[malware.name][1]}" + } + }, + { + "rulename": "urlzone", + "if": { + "malware.name": "^urlzone2?$" + }, + "then": { + "classification.identifier": "urlzone" + } + }, + { + "rulename": "default", + "if": { + "feed.name": "^Spamhaus Cert$" + }, + "then": { + "classification.identifier": "{msg[malware.name]}" + } + } + ] In our example above we have five groups labeled `Standard Protocols http`, `Spamhaus Cert conficker`, `bitdefender`, `urlzone` and `default`. @@ -2201,7 +2220,7 @@ If the value for a condition is an empty string, the bot checks if the field doe This is useful to apply default values for empty fields. -##### Actions +**Actions** You can set the value of the field to a string literal or number. @@ -2213,54 +2232,58 @@ Group 0 (`[0]`) contains the full matching string. See also the documentation on Note that `matches` will also contain the match groups from the default conditions if there were any. -##### Examples +**Examples** We have an event with `feed.name = Spamhaus Cert` and `malware.name = confickerab`. The expert loops over all sections in the file and eventually enters section `Spamhaus Cert`. First, the default condition is checked, it matches! OK, going on. Otherwise the expert would have selected a different section that has not yet been considered. Now, go through the rules, until we hit the rule `conficker`. We combine the conditions of this rule with the default conditions, and both rules match! So we can apply the action: `classification.identifier` is set to `conficker`, the trivial name. Assume we have an event with `feed.name = Spamhaus Cert` and `malware.name = feodo`. The default condition matches, but no others. So the default action is applied. The value for `classification.identifier` will be set to `feodo` by `{msg[malware.name]}`. -##### Types +**Types** If the rule is a string, a regex-search is performed, also for numeric values (`str()` is called on them). If the rule is numeric for numeric values, a simple comparison is done. If other types are mixed, a warning will be thrown. For boolean values, the comparison value needs to be `true` or `false` as in JSON they are written all-lowercase. -* * * -### National CERT contact lookup by CERT.AT -#### Information: +National CERT contact lookup by CERT.AT +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +**Information** + * `name:` `national_cert_contact_certat` * `lookup:` https * `public:` yes * `cache (redis db):` none * `description:` https://contacts.cert.at offers an IP address to national CERT contact (and cc) mapping. See https://contacts.cert.at for more info. -#### Configuration Parameters: +**Configuration Parameters** * `filter`: (true/false) act as a filter for AT. * `overwrite_cc`: set to true if you want to overwrite any potentially existing cc fields in the event. -* * * -### RecordedFuture IP risk + +RecordedFuture IP risk +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This Bot tags events with score found in recorded futures large IP risklist. -#### Information: +**Information** + * `name:` recordedfuture_iprisk * `lookup:` local database * `public:` no * `cache (redis db):` none * `description:` Record risk score associated to source and destination IP if they are present. Assigns 0 to IP addresses not in the RF list. -#### Configuration Parameters: +**Configuration Parameters** * `database`: Location of csv file obtained from recorded future API (a script is provided to download the large IP set) * `overwrite`: set to true if you want to overwrite any potentially existing risk score fields in the event. * `api_token`: This needs to contain valid API token to download the latest database data. -#### Description +**Description** For both `source.ip` and `destination.ip` the corresponding risk score is fetched from a local database created from Recorded Future's API. The score is recorded in `extra.rf_iprisk.source` and `extra.rf_iprisk.destination`. If a lookup for an IP fails a score of 0 is recorded. @@ -2273,33 +2296,37 @@ If IP's are not present in the database a risk score of 0 is given A script is supplied that may be run as intelmq to update the database. -#### Database +**Database** + Use this command to create/update the database and reload the bot: `intelmq.bots.experts.recordedfuture_iprisk.expert --update-database` -* * * -### Reverse DNS + +Reverse DNS +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ For both `source.ip` and `destination.ip` the PTR record is fetched and the first valid result is used for `source.reverse_dns`/`destination.reverse_dns`. -#### Information: +**Information** + * `name:` reverse-dns * `lookup:` DNS * `public:` yes * `cache (redis db):` 8 * `description:` IP to domain -#### Configuration Parameters: +**Configuration Parameters** -* **Cache parameters** (see in section [common parameters](#common-parameters)) +* **Cache parameters** (see in section :ref:`common-parameters`) * `cache_ttl_invalid_response`: The TTL for cached invalid responses. * `overwrite`: Overwrite existing fields. Default: `True` if not given (for backwards compatibility, will change in version 3.0.0) -* * * -### RFC1918 + +RFC1918 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Several RFCs define ASNs, IP Addresses and Hostnames (and TLDs) reserved for *documentation*. Events or fields of events can be dropped if they match the criteria of either being reserved for documentation (e.g. AS 64496, Domain `example.com`) @@ -2308,22 +2335,24 @@ or belonging to a local area network (e.g. `192.168.0.0/24`). These checks can a It is configurable if the whole event should be dropped ("policies") or just the field removed, as well as which fields should be checked. Sources: -* https://tools.ietf.org/html/rfc1918 -* https://tools.ietf.org/html/rfc2606 -* https://tools.ietf.org/html/rfc3849 -* https://tools.ietf.org/html/rfc4291 -* https://tools.ietf.org/html/rfc5737 + +* :rfc:`1918` +* :rfc:`2606` +* :rfc:`3849` +* :rfc:`4291` +* :rfc:`5737` * https://en.wikipedia.org/wiki/IPv4 * https://en.wikipedia.org/wiki/Autonomous\_system\_(Internet) -#### Information: +**Information** + * `name:` rfc1918 * `lookup:` none * `public:` yes * `cache (redis db):` none * `description:` removes events or single fields with invalid data -#### Configuration Parameters: +**Configuration Parameters** * `fields`: string, comma-separated list of fields e.g. `destination.ip,source.asn,source.url`. Supported fields are: * `destination.asn` & `source.asn` @@ -2337,20 +2366,22 @@ With the example parameter values given above, this means that: * If a `source.asn` value is in the range of reserved AS numbers, the event will be removed altogether (policy "drop). * If a `source.url` value contains a host with either an IP address part of a reserved network block, or a reserved domain name (or with a reserved TLD), the event will be dropped (policy "drop") -* * * -### Ripe + +Ripe +^^^^ Online RIPE Abuse Contact and Geolocation Finder for IP addresses and Autonomous Systems. -#### Information: +**Information** + * `name:` ripencc-abuse-contact * `lookup:` HTTPS API * `public:` yes * `cache (redis db):` 10 * `description:` IP to abuse contact -#### Configuration Parameters: +**Configuration Parameters** * **Cache parameters** (see in section [common parameters](#common-parameters)) * `mode`: either `append` (default) or `replace` @@ -2360,27 +2391,29 @@ Online RIPE Abuse Contact and Geolocation Finder for IP addresses and Autonomous * `query_ripe_stat_ip`: Query for IPs at `https://stat.ripe.net/data/abuse-contact-finder/data.json?resource=%s`, default `true` * `query_ripe_stat_geolocation`: Query for IPs at `https://stat.ripe.net/data/maxmind-geo-lite/data.json?resource=%s`, default `true` -* * * -### Sieve -#### Information: +Sieve +^^^^^ + +**Information** + * `name:` sieve * `lookup:` none * `public:` yes * `cache (redis db):` none * `description:` Filtering with a sieve-based configuration language -#### Configuration Parameters: +**Configuration Parameters** * `file`: Path to sieve file. Syntax can be validated with `intelmq_sieve_expert_validator`. -#### Description +**Description** The sieve bot is used to filter and/or modify events based on a set of rules. The rules are specified in an external configuration file and with a syntax similar -to the [Sieve language](http://sieve.info/) used for mail filtering. +to the `Sieve language `_ used for mail filtering. Each rule defines a set of matching conditions on received events. Events can be matched based on keys and values in the event. If the processed event matches a @@ -2388,63 +2421,64 @@ rule's conditions, the corresponding actions are performed. Actions can specify whether the event should be kept or dropped in the pipeline (filtering actions) or if keys and values should be changed (modification actions). -#### Requirements +**Requirements** To use this bot, you need to install the required dependencies: -``` -pip3 install -r intelmq/bots/experts/sieve/REQUIREMENTS.txt -``` - -#### Examples -The following excerpts illustrate some of the basic features of the sieve file -format: +.. code-block:: bash -``` -if :exists source.fqdn { - keep // aborts processing of subsequent rules and forwards the event. -} + pip3 install -r intelmq/bots/experts/sieve/REQUIREMENTS.txt +**Examples** -if :notexists source.abuse_contact || source.abuse_contact =~ '.*@example.com' { - drop // aborts processing of subsequent rules and drops the event. -} +The following excerpts illustrate some of the basic features of the sieve file +format: -if source.ip << '192.0.0.0/24' { - add! comment = 'bogon' // sets the field comment to this value and overwrites existing values - path 'other-path' // the message is sent to the given path -} +.. code-block:: -if classification.type == ['phishing', 'malware'] && source.fqdn =~ '.*\.(ch|li)$' { - add! comment = 'domainabuse' - keep -} elif classification.type == 'scanner' { - add! comment = 'ignore' - drop -} else { - remove comment -} -``` + if :exists source.fqdn { + keep // aborts processing of subsequent rules and forwards the event. + } + + + if :notexists source.abuse_contact || source.abuse_contact =~ '.*@example.com' { + drop // aborts processing of subsequent rules and drops the event. + } + + if source.ip << '192.0.0.0/24' { + add! comment = 'bogon' // sets the field comment to this value and overwrites existing values + path 'other-path' // the message is sent to the given path + } + + if classification.type == ['phishing', 'malware'] && source.fqdn =~ '.*\.(ch|li)$' { + add! comment = 'domainabuse' + keep + } elif classification.type == 'scanner' { + add! comment = 'ignore' + drop + } else { + remove comment + } -#### Reference +**Reference** -##### Sieve File Structure +*Sieve File Structure* The sieve file contains an arbitrary number of rules of the form: -``` -if EXPRESSION { - ACTIONS -} elif EXPRESSION { - ACTIONS -} else { - ACTIONS -} -``` +.. code-block:: + + if EXPRESSION { + ACTIONS + } elif EXPRESSION { + ACTIONS + } else { + ACTIONS + } -##### Expressions +*Expressions* Each rule specifies on or more expressions to match an event based on its keys and values. Event keys are specified as strings without quotes. String values @@ -2462,8 +2496,7 @@ specified with quotes. Following operators may be used to match events: * `:contains` matches on substrings. - * `=~` matches strings based on the given regex. `!~` is the inverse regex - match. + * `=~` matches strings based on the given regex. `!~` is the inverse regex match. * Numerical comparisons are evaluated with `<`, `<=`, `>`, `>=`. @@ -2471,8 +2504,7 @@ specified with quotes. Following operators may be used to match events: ```if source.ip << '10.0.0.0/8' { ... }``` - * Values to match against can also be specified as list, in which case any one - of the values will result in a match: + * Values to match against can also be specified as list, in which case any one of the values will result in a match: ```if source.ip == ['8.8.8.8', '8.8.4.4'] { ... }``` @@ -2487,34 +2519,28 @@ specified with quotes. Following operators may be used to match events: The result is *not* that the field must be unequal to all given values. -##### Actions +*Actions* If part of a rule matches the given conditions, the actions enclosed in `{` and `}` are applied. By default, all events that are matched or not matched by rules in the sieve file will be forwarded to the next bot in the pipeline, unless the `drop` action is applied. - * `add` adds a key value pair to the event. This action only applies if the key - is not yet defined in the event. If the key is already defined, the action is - ignored. Example: + * `add` adds a key value pair to the event. This action only applies if the key is not yet defined in the event. If the key is already defined, the action is ignored. Example: ```add comment = 'hello, world'``` * `add!` same as above, but will force overwrite the key in the event. - * `update` modifies an existing value for a key. Only applies if the key is -already defined. If the key is not defined in the event, this action is ignored. -Example: + * `update` modifies an existing value for a key. Only applies if the key is already defined. If the key is not defined in the event, this action is ignored. Example: ```update feed.accuracy = 50``` - * `remove` removes a key/value from the event. Action is ignored if the key is - not defined in the event. Example: + * `remove` removes a key/value from the event. Action is ignored if the key is not defined in the event. Example: ```remove extra.comments``` - * `keep` sends the message to the next bot in the pipeline - (same as the default behaviour), and stops sieve file processing. + * `keep` sends the message to the next bot in the pipeline (same as the default behaviour), and stops sieve file processing. ```keep``` @@ -2524,38 +2550,37 @@ Example: ```path 'named-queue'``` - * `drop` marks the event to be dropped. The event will not be forwarded to the - next bot in the pipeline. The sieve file processing is interrupted upon - reaching this action. No other actions may be specified besides the `drop` - action within `{` and `}`. + * `drop` marks the event to be dropped. The event will not be forwarded to the next bot in the pipeline. The sieve file processing is interrupted upon + reaching this action. No other actions may be specified besides the `drop` action within `{` and `}`. -##### Comments +*Comments* Comments may be used in the sieve file: all characters after `//` and until the end of the line will be ignored. -##### Validating a sieve file +*Validating a sieve file* Use the following command to validate your sieve files: -``` -$ intelmq.bots.experts.sieve.validator -usage: intelmq.bots.experts.sieve.validator [-h] sievefile -Validates the syntax of sievebot files. +.. code-block:: bash -positional arguments: - sievefile Sieve file + $ intelmq.bots.experts.sieve.validator + usage: intelmq.bots.experts.sieve.validator [-h] sievefile + + Validates the syntax of sievebot files. + + positional arguments: + sievefile Sieve file + + optional arguments: + -h, --help show this help message and exit -optional arguments: - -h, --help show this help message and exit -``` +Taxonomy +^^^^^^^^ -* * * +**Information** -### Taxonomy - -#### Information: * `name:` taxonomy * `lookup:` no * `public:` yes @@ -2564,11 +2589,11 @@ optional arguments: Please note that there is a [slight mismatch of IntelMQ's taxonomy to the upstream taxonomy](https://github.com/certtools/intelmq/issues/1409), but it should not matter here much. -#### Configuration Parameters: +**Configuration Parameters** None. -#### Description +**Description** Information on the "Reference Security Incident Taxonomy" can be found here: https://github.com/enisaeu/Reference-Security-Incident-Taxonomy-Task-Force @@ -2578,11 +2603,11 @@ For brevity, "type" means `classification.type` and "taxonomy" means `classifica - If neither taxonomy, not type is given, taxonomy is set to "other" and type to "unknown". - If taxonomy is given, but type is not, type is set to "unknown". -* * * +Threshold +^^^^^^^^^ -### Threshold +**Information** -#### Information: * **Cache parameters** (see in section [common parameters](#common-parameters)) * `name`: threshold @@ -2591,21 +2616,22 @@ For brevity, "type" means `classification.type` and "taxonomy" means `classifica * `cache (redis db)`: 11 * `description`: Check if the number of similar messages during a specified time interval exceeds a set value. -#### Configuration Parameters: +**Configuration Parameters** * `filter_keys`: String, comma-separated list of field names to consider or ignore when determining which messages are similar. * `filter_type`: String, `whitelist` (consider only the fields in `filter_keys`) or `blacklist` (consider everything but the fields in `filter_keys`). * `timeout`: Integer, number of seconds before threshold counter is reset. * `threshold`: Integer, number of messages required before propagating one. In forwarded messages, the threshold is saved in the message as `extra.count`. * `add_keys`: Array of string->string, optional, fields and values to add (or update) to propagated messages. Example: - ```json - "add_keys": { - "classification.type": "spam", - "comment": "Started more than 10 SMTP connections" - } - ``` -#### Limitations + .. code-block:: json + + "add_keys": { + "classification.type": "spam", + "comment": "Started more than 10 SMTP connections" + } + +**Limitations** This bot has certain limitations and is not a true threshold filter (yet). It works like this: 1. Every incoming message is hashed according to the `filter_*` parameters. @@ -2614,55 +2640,58 @@ This bot has certain limitations and is not a true threshold filter (yet). It wo Please note: Even if a message is sent, any further identical messages are dropped, if the time difference to the last message is less than the timeout! The counter is not reset if the threshold is reached. -* * * +Tor Nodes +^^^^^^^^^ -### Tor Nodes +**Information** -#### Information: * `name:` tor-nodes * `lookup:` local database * `public:` yes * `cache (redis db):` none * `description:` check if IP is tor node -#### Configuration Parameters: +**Configuration Parameters** * `database`: Path to the database -#### Database +**Database** + Use this command to create/update the database and reload the bot: `intelmq.bots.experts.tor_nodes.expert --update-database` -* * * - -### Url2FQDN +Url2FQDN +^^^^^^^^ This bot extracts the Host from the `source.url` and `destination.url` fields and writes it to `source.fqdn` or `destination.fqdn` if it is a hostname, or `source.ip` or `destination.ip` if it is an IP address. -#### Information: +**Information** + * `name:` url2fqdn * `lookup:` none * `public:` yes * `cache (redis db):` none * `description:` writes domain name from URL to FQDN or IP address -#### Configuration Parameters: +**Configuration Parameters** * `overwrite`: boolean, replace existing FQDN / IP address? -### Wait +Wait +^^^^ + +**Information** -#### Information: * `name:` wait * `lookup:` none * `public:` yes * `cache (redis db):` none * `description:` Waits for a some time or until a queue size is lower than a given number. -#### Configuration Parameters: +**Configuration Parameters** * `queue_db`: Database number of the database, default `2`. Converted to integer. * `queue_host`: Host of the database, default `localhost`. @@ -2679,25 +2708,26 @@ Otherwise the dummy mode is active, the events are just passed without an additi Note that SIGHUPs and reloads interrupt the sleeping. -* * * +Outputs +------- -## Outputs - -### AMQP Topic +AMQP Topic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Sends data to an AMQP Server See https://www.rabbitmq.com/tutorials/amqp-concepts.html for more details on amqp topic exchange. -Requires the [`pika` python library](https://pypi.org/project/pika/). +Requires the `pika python library `_. + +**Information** -#### Information * `name`: `intelmq.bots.outputs.amqptopic.output` * `lookup`: to the amqp server * `public`: yes * `cache`: no * `description`: Sends the event to a specified topic of an AMQP server -#### Configuration parameters: +**Configuration parameters** * connection_attempts : The number of connection attempts to defined server, defaults to 3 * connection_heartbeat : Heartbeat to server, in seconds, defaults to 3600 @@ -2722,43 +2752,43 @@ Requires the [`pika` python library](https://pypi.org/project/pika/). If no authentication should be used, leave username or password empty or `null`. -#### Examples of usage: +**Examples of usage** * Useful to send events to a RabbitMQ exchange topic to be further processed in other platforms. -#### Confirmation +**Confirmation** If routing key or exchange name are invalid or non existent, the message is accepted by the server but we receive no confirmation. If parameter require_confirmation is True and no confirmation is received, an error is raised. -#### Common errors +**Common errors** -##### Unroutable messages / Undefined destination queue +*Unroutable messages / Undefined destination queue* The destination exchange and queue need to exist beforehand, -with your preferred settings (e.g. durable, [lazy queue](https://www.rabbitmq.com/lazy-queues.html). +with your preferred settings (e.g. durable, `lazy queue `_. If the error message says that the message is "unroutable", the queue doesn't exist. -* * * - -### Blackhole +Blackhole +^^^^^^^^^ This output bot discards all incoming messages. -#### Information +**Information** + * `name`: blackhole * `lookup`: no * `public`: yes * `cache`: no * `description`: discards messages -* * * +Elasticsearch Output Bot +^^^^^^^^^^^^^^^^^^^^^^^^ -### Elasticsearch Output Bot +**Information** -#### Information * `name`: `intelmq.bots.outputs.elasticsearch.output` * `lookup`: yes * `public`: yes @@ -2767,19 +2797,23 @@ This output bot discards all incoming messages. Only ElasticSearch version 7 supported. -#### Configuration parameters: +**Configuration parameters** * `elastic_host`: Name/IP for the Elasticsearch server, defaults to 127.0.0.1 * `elastic_port`: Port for the Elasticsearch server, defaults to 9200 * `elastic_index`: Index for the Elasticsearch output, defaults to intelmq * `rotate_index`: If set, will index events using the date information associated with the event. - Options: 'never', 'daily', 'weekly', 'monthly', 'yearly'. Using 'intelmq' as the elastic_index, the following are examples of the generated index names: - 'never' --> intelmq - 'daily' --> intelmq-2018-02-02 - 'weekly' --> intelmq-2018-42 - 'monthly' --> intelmq-2018-02 - 'yearly' --> intelmq-2018 + Options: 'never', 'daily', 'weekly', 'monthly', 'yearly'. Using 'intelmq' as the elastic_index, the following are examples of the generated index names: + + .. code-block:: + + 'never' --> intelmq + 'daily' --> intelmq-2018-02-02 + 'weekly' --> intelmq-2018-42 + 'monthly' --> intelmq-2018-02 + 'yearly' --> intelmq-2018 + * `http_username`: HTTP basic authentication username * `http_password`: HTTP basic authentication password * `use_ssl`: Whether to use SSL/TLS when connecting to Elasticsearch. Default: False @@ -2788,8 +2822,9 @@ Only ElasticSearch version 7 supported. * `ssl_show_warnings`: Whether to show warnings if the server's certificate cannot be verified. Default: True * `replacement_char`: If set, dots ('.') in field names will be replaced with this character prior to indexing. This is for backward compatibility with ES 2.X. Default: null. Recommended for ES2.X: '_' * `flatten_fields`: In ES, some query and aggregations work better if the fields are flat and not JSON. Here you can provide a list of fields to convert. - Can be a list of strings (fieldnames) or a string with field names separated by a comma (,). eg `extra,field2` or `['extra', 'field2']` - Default: ['extra'] + + Can be a list of strings (fieldnames) or a string with field names separated by a comma (,). eg `extra,field2` or `['extra', 'field2']` + Default: ['extra'] See `contrib/elasticsearch/elasticmapper` for a utility for creating Elasticsearch mappings and templates. @@ -2798,15 +2833,15 @@ To query all intelmq indices at once, use an alias (https://www.elastic.co/guide The data in ES can be retrieved with the HTTP-Interface: -```bash -> curl -XGET 'http://localhost:9200/intelmq/events/_search?pretty=True' -``` +.. code-block:: bash + + > curl -XGET 'http://localhost:9200/intelmq/events/_search?pretty=True' -* * * +File +^^^^ -### File +**Information** -#### Information: * `name:` file * `lookup:` no * `public:` yes @@ -2815,7 +2850,7 @@ The data in ES can be retrieved with the HTTP-Interface: Multihreading is disabled for this bot, as this would lead to corrupted files. -#### Configuration Parameters: +**Configuration Parameters** * `encoding_errors_mode`: By default `'strict'`, see for more details and options: https://docs.python.org/3/library/functions.html#open For example with `'backslashreplace'` all characters which cannot be properly encoded will be written escaped with backslashes. * `file`: file path of output file. Missing directories will be created if possible with the mode 755. @@ -2823,7 +2858,8 @@ Multihreading is disabled for this bot, as this would lead to corrupted files. * `hierarchical_output`: If true, the resulting dictionary will be hierarchical (field names split by dot). * `single_key`: if `none`, the whole event is saved (default); otherwise the bot saves only contents of the specified key. In case of `raw` the data is base64 decoded. -##### Filename formatting +**Filename formatting** + The filename can be formatted using pythons string formatting functions if `format_filename` is set. See https://docs.python.org/3/library/string.html#formatstrings For example: @@ -2832,19 +2868,18 @@ For example: If the field used in the format string is not defined, `None` will be used as fallback. -* * * - +Files +^^^^^ -### Files +**Information** -#### Information: * `name:` files * `lookup:` no * `public:` yes * `cache (redis db):` none * `description:` saving of messages as separate files -#### Configuration Parameters: +**Configuration Parameters** * `dir`: output directory (default `/opt/intelmq/var/lib/bots/files-output/incoming`) * `tmp`: temporary directory (must reside on the same filesystem as `dir`) (default: `/opt/intelmq/var/lib/bots/files-output/tmp`) @@ -2852,19 +2887,18 @@ If the field used in the format string is not defined, `None` will be used as fa * `hierarchical_output`: if `true`, use nested dictionaries; if `false`, use flat structure with dot separated keys (default) * `single_key`: if `none`, the whole event is saved (default); otherwise the bot saves only contents of the specified key +McAfee Enterprise Security Manager +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -* * * +**Information** -### McAfee Enterprise Security Manager - -#### Information: * `name:` intelmq.bots.outputs.mcafee.output_esm_ip * `lookup:` yes * `public:` no * `cache (redis db):` none * `description:` Writes information out to McAfee ESM watchlist -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * `esm_ip`: IP address of ESM instance @@ -2873,21 +2907,20 @@ If the field used in the format string is not defined, `None` will be used as fa * `esm_watchlist`: name of the watchlist to write to * `field`: name of the IntelMQ field to be written to ESM -* * * +MISP Feed +^^^^^^^^^ -### MISP Feed +**Information** -#### Information: * `name:` `intelmq.bots.outputs.misp.output_feed` * `lookup:` no * `public:` no * `cache (redis db):` none * `description:` Create a directory layout in the MISP Feed format -The PyMISP library >= 2.4.119.1 is required, see -[REQUIREMENTS.txt](https://github.com/certtools/intelmq/blob/master/intelmq/bots/outputs/misp/REQUIREMENTS.txt). +The PyMISP library >= 2.4.119.1 is required, see `REQUIREMENTS.txt `_. -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * `misp_org_name`: Org name which creates the event, string @@ -2895,15 +2928,15 @@ The PyMISP library >= 2.4.119.1 is required, see * `output_dir`: Output directory path, e.g. `/opt/intelmq/var/lib/bots/mispfeed-output`. Will be created if it does not exist and possible. * `interval_event`: The output bot creates one event per each interval, all data in this time frame is part of this event. Default "1 hour", string. -#### Usage in MISP +**Usage in MISP** -Configure the destination directory of this feed as feed in MISP, either as local location, or served via a web server. See [the MISP documentation on Feeds](https://www.circl.lu/doc/misp/managing-feeds/) for more information +Configure the destination directory of this feed as feed in MISP, either as local location, or served via a web server. See `the MISP documentation on Feeds `_ for more information -* * * +MISP API +^^^^^^^^ -### MISP API +**Information** -#### Information: * `name:` `intelmq.bots.outputs.misp.output_api` * `lookup:` no * `public:` no @@ -2911,22 +2944,20 @@ Configure the destination directory of this feed as feed in MISP, either as loca * `description:` Connect to a MISP instance and add event as MISPObject if not there already. The PyMISP library >= 2.4.120 is required, see -[REQUIREMENTS.txt](https://github.com/certtools/intelmq/blob/master/intelmq/bots/outputs/misp/REQUIREMENTS.txt). +`REQUIREMENTS.txt `_. -#### Configuration Parameters: +**Configuration Parameters** * **Feed parameters** (see above) * `add_feed_provider_as_tag`: boolean (use `true` when in doubt) * `add_feed_name_as_tag`: boolean (use `true` when in doubt) -* `misp_additional_correlation_fields`: list of fields for which - the correlation flags will be enabled (in addition to those which are - in significant_fields) -* `misp_additional_tags`: list of tags to set not be searched for - when looking for duplicates +* `misp_additional_correlation_fields`: list of fields for which the correlation flags will be enabled (in addition to those which are in significant_fields) +* `misp_additional_tags`: list of tags to set not be searched for when looking for duplicates * `misp_key`: string, API key for accessing MISP * `misp_publish`: boolean, if a new MISP event should be set to "publish". - Expert setting as MISP may really make it "public"! - (Use `false` when in doubt.) + + Expert setting as MISP may really make it "public"! + (Use `false` when in doubt.) * `misp_tag_for_bot`: string, used to mark MISP events * `misp_to_ids_fields`: list of fields for which the `to_ids` flags will be set * `misp_url`: string, URL of the MISP server @@ -2949,23 +2980,22 @@ than MISP (which is by design as MISP is for manual handling). Also remove the fields of the IntelMQ events with an expert bot that you do not want to be inserted into MISP. -(More details can be found in the docstring of -[`output_api.py`](../intelmq/bots/outputs/misp/output_api.py)). +(More details can be found in the docstring of `output_api.py `_. -* * * - -### MongoDB +MongoDB +^^^^^^^ Saves events in a MongoDB either as hierarchical structure or flat with full key names. `time.observation` and `time.source` are saved as datetime objects, not as ISO formatted string. -#### Information: +**Information** + * `name:` mongodb * `lookup:` no * `public:` yes * `cache (redis db):` none * `description:` MongoDB is the bot responsible to send events to a MongoDB database -#### Configuration Parameters: +**Configuration Parameters** * `collection`: MongoDB collection * `database`: MongoDB database @@ -2976,26 +3006,26 @@ Saves events in a MongoDB either as hierarchical structure or flat with full key * `hierarchical_output`: Boolean (default true) as MongoDB does not allow saving keys with dots, we split the dictionary in sub-dictionaries. * `replacement_char`: String (default `'_'`) used as replacement character for the dots in key names if hierarchical output is not used. -#### Installation Requirements +**Installation Requirements** -``` -pip3 install pymongo>=2.7.1 -``` +.. code-block:: bash + + pip3 install pymongo>=2.7.1 The bot has been tested with pymongo versions 2.7.1, 3.4 and 3.10.1 (server versions 2.6.10 and 3.6.8). -* * * +Redis +^^^^^ -### Redis +**Information** -#### Information: * `name:` `intelmq.bots.outputs.redis.output` * `lookup:` to the Redis server * `public:` yes * `cache (redis db):` none * `description:` Output Bot that sends events to a remote Redis server/queue. -#### Configuration Parameters: +**Configuration Parameters** * `redis_db`: remote server database, e.g.: 2 * `redis_password`: remote server password @@ -3006,41 +3036,46 @@ The bot has been tested with pymongo versions 2.7.1, 3.4 and 3.10.1 (server vers * `hierarchical_output`: whether output should be sent in hierarchical JSON format (default: false) * `with_type`: Send the `__type` field (default: true) -#### Examples of usage: +**Examples of usage** * Can be used to send events to be processed in another system. E.g.: send events to Logstash. * In a multi tenant installation can be used to send events to external/remote IntelMQ instance. Any expert bot queue can receive the events. * In a complex configuration can be used to create logical sets in IntelMQ-Manager. -* * * +Request Tracker +^^^^^^^^^^^^^^^ -### Request Tracker +**Information** -#### Information: * `name:` `intelmq.bots.outputs.rt.output` * `lookup:` to the Request Tracker instance * `public:` yes * `cache (redis db):` none * `description:` Output Bot that creates Request Tracker tickets from events. -#### Description +**Description** The bot creates tickets in Request Tracker and uses event fields for the ticket body text. The bot follows the workflow of the RTIR: + - create ticket in Incidents queue (or any other queue) + - all event fields are included in the ticket body, - event attributes are assigned to tickets' CFs according to the attribute mapping, - ticket taxonomy can be assigned according to the CF mapping. If you use taxonomy different from [ENISA RSIT](https://github.com/enisaeu/Reference-Security-Incident-Taxonomy-Task-Force), consider using some extra attribute field and do value mapping with modify or sieve bot, + - create linked ticket in Investigations queue, if these conditions are met + - if first ticket destination was Incidents queue, - if there is source.abuse_contact is specified, - if description text is specified in the field appointed by configuration, + - RT/RTIR supposed to do relevant notifications by scrip working on condition "On Create", - configuration option investigation_fields specifies which event fields has to be included in the investigation, - Resolve Incident ticket, according to configuration (Investigation ticket status should depend on RT scrip configuration), Take extra caution not to flood your ticketing system with enormous amount of tickets. Add extra filtering for that to pass only critical events to the RT, and/or deduplicating events. -#### Configuration Parameters: +**Configuration Parameters** - `rt_uri`, `rt_user`, `rt_password`, `verify_cert`: RT API endpoint connection details, string. - `queue`: ticket destination queue. If set to 'Incidents', 'Investigations' ticket will be created if create_investigation is set to true, string. @@ -3050,18 +3085,20 @@ Take extra caution not to flood your ticketing system with enormous amount of ti - `investigation_fields`: attributes to include into investigation ticket, comma-separated string. E.g. `time.source,source.ip,source.port,source.fqdn,source.url,classification.taxonomy,classification.type,classification.identifier,event_description.url,event_description.text,malware.name,protocol.application,protocol.transport`. - `description_attr`: which event attribute contains text message being sent to the recipient, string. If it is not specified or not found in the event, the Investigation ticket is not going to be created. Example: `extra.message.text`. -* * * -### REST API -#### Information: +REST API +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +**Information** + * `name:` restapi * `lookup:` no * `public:` yes * `cache (redis db):` none * `description:` REST API is the bot responsible to send events to a REST API listener through POST -#### Configuration Parameters: +**Configuration Parameters** * `auth_token`: the user name / HTTP header key * `auth_token_name`: the password / HTTP header value @@ -3070,20 +3107,22 @@ Take extra caution not to flood your ticketing system with enormous amount of ti * `host`: destination URL * `use_json`: boolean -* * * -### SMTP Output Bot + +SMTP Output Bot +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Sends a MIME Multipart message containing the text and the event as CSV for every single event. -#### Information: +**Information** + * `name:` smtp * `lookup:` no * `public:` yes * `cache (redis db):` none * `description:` Sends events via SMTP -#### Configuration Parameters: +**Configuration Parameters** * `fieldnames`: a list of field names to be included in the email, comma separated string or list of strings * `mail_from`: string. Supports formatting, see below @@ -3097,8 +3136,7 @@ Sends a MIME Multipart message containing the text and the event as CSV for ever * `subject`: string. Supports formatting, see below * `text`: string or null. Supports formatting, see below -For several strings you can use values from the string using the -[standard Python string format syntax](https://docs.python.org/3/library/string.html#format-string-syntax). +For several strings you can use values from the string using the `standard Python string format syntax `_. Access the event's values with `{ev[source.ip]}` and similar. Any not existing fields will result in `None`. Authentication is optional. If both username and password are given, these @@ -3106,11 +3144,13 @@ mechanism are tried: CRAM-MD5, PLAIN, and LOGIN. Client certificates are not supported. If `http_verify_cert` is true, TLS certificates are checked. -* * * -### SQL -#### Information: +SQL +^^^ + +**Information** + * `name:` sql * `lookup:` no * `public:` yes @@ -3118,14 +3158,11 @@ Client certificates are not supported. If `http_verify_cert` is true, TLS certif * `description:` SQL is the bot responsible to send events to a PostgreSQL or SQLite Database * `notes`: When activating autocommit, transactions are not used: http://initd.org/psycopg/docs/connection.html#connection.autocommit -#### Configuration Parameters: +**Configuration Parameters** -The parameters marked with 'PostgreSQL' will be sent -to libpq via psycopg2. Check the -[libpq parameter documentation] (https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-PARAMKEYWORDS) -for the versions you are using. +The parameters marked with 'PostgreSQL' will be sent to libpq via psycopg2. Check the `libpq parameter documentation `_ for the versions you are using. -* `autocommit`: [psycopg's autocommit mode](http://initd.org/psycopg/docs/connection.html?#connection.autocommit), optional, default True +* `autocommit`: `psycopg's autocommit mode `_, optional, default True * `connect_timeout`: Database connect_timeout, optional, default 5 seconds * `engine`: 'postgresql' or 'sqlite' * `database`: PostgreSQL database or SQLite file @@ -3137,7 +3174,7 @@ for the versions you are using. * `sslmode`: PostgreSQL sslmode, can be `'disable'`, `'allow'`, `'prefer'` (default), `'require'`, `'verify-ca'` or `'verify-full'`. See postgresql docs: https://www.postgresql.org/docs/current/static/libpq-connect.html#libpq-connect-sslmode * `table`: name of the database table into which events are to be inserted -#### PostgreSQL +**PostgreSQL** You have two basic choices to run PostgreSQL: 1. on the same machine as intelmq, then you could use Unix sockets if available on your platform @@ -3146,16 +3183,16 @@ You have two basic choices to run PostgreSQL: Make sure to consult your PostgreSQL documentation about how to allow network connections and authentication in case 2. -##### PostgreSQL Version -Any supported version of PostgreSQL should work -(v>=9.2 as of Oct 2016)[[1](https://www.postgresql.org/support/versioning/)]. +**PostgreSQL Version** + +Any supported version of PostgreSQL should work (v>=9.2 as of Oct 2016) `[1] `_. If you use PostgreSQL server v >= 9.4, it gives you the possibility -to use the time-zone [formatting string](https://www.postgresql.org/docs/9.4/static/functions-formatting.html) "OF" for date-times -and the [GiST index for the CIDR type](https://www.postgresql.org/docs/9.4/static/release-9-4.html#AEN120769). This may be useful depending on how +to use the time-zone `formatting string `_ "OF" for date-times +and the `GiST index for the CIDR type `_. This may be useful depending on how you plan to use the events that this bot writes into the database. -##### How to install: +**How to install** Use `intelmq_psql_initdb` to create initial SQL statements from `harmonization.conf`. The script will create the required table layout @@ -3169,14 +3206,16 @@ the expert/certbund_contact bot.) Therefore if still necessary: create the database-user as postgresql superuser, which usually is done via the system user `postgres`: -``` -createuser --no-superuser --no-createrole --no-createdb --encrypted --pwprompt intelmq -``` + +.. code-block:: bash + + createuser --no-superuser --no-createrole --no-createdb --encrypted --pwprompt intelmq Create the new database: -``` -createdb --encoding='utf-8' --owner=intelmq intelmq-events -``` + +.. code-block:: bash + + createdb --encoding='utf-8' --owner=intelmq intelmq-events (The encoding parameter should ensure the right encoding on platform where this is not the default.) @@ -3184,43 +3223,46 @@ where this is not the default.) Now initialize it as database-user `intelmq` (in this example a network connection to localhost is used, so you would get to test if the user `intelmq` can authenticate): -``` -psql -h localhost intelmq-events intelmq .read /tmp/initdb.sql -``` +.. code-block:: bash + + sqlite3 your-db.db + sqlite> .read /tmp/initdb.sql Then, set the `database` parameter to the `your-db.db` file path. -* * * +STOMP +^^^^^ -### STOMP +**Information** -#### Information: * `name`: intelmq.bots.outputs.stomp.output * `lookup`: yes * `public`: yes * `cache (redis db)`: none * `description`: This collector will push data to any STOMP stream. STOMP stands for Streaming Text Oriented Messaging Protocol. See: https://en.wikipedia.org/wiki/Streaming_Text_Oriented_Messaging_Protocol -#### Requirements: +**Requirements** +: Install the stomp.py library, e.g. `apt install python3-stomp.py` or `pip install stomp.py`. You need a CA certificate, client certificate and key file from the organization / server you are connecting to. Also you will need a so called "exchange point". -#### Configuration Parameters: +**Configuration Parameters** * `exchange`: The exchange to push at * `heartbeat`: default: 60000 @@ -3234,11 +3276,11 @@ Also you will need a so called "exchange point". * `ssl_client_certificate`: path to client cert file * `ssl_client_certificate_key`: path to client cert key file -* * * +TCP +^^^ -### TCP +**Information** -#### Information: * `name:` intelmq.bots.outputs.tcp.output * `lookup:` no * `public:` yes @@ -3247,40 +3289,40 @@ Also you will need a so called "exchange point". Multihreading is disabled for this bot. -#### Configuration Parameters: +**Configuration Parameters** * `counterpart_is_intelmq`: Boolean. If you are sending to an IntelMQ TCP collector, set this to True, otherwise e.g. with filebeat, set it to false. * `ip`: IP of destination server * `hierarchical_output`: true for a nested JSON, false for a flat JSON (when sending to a TCP collector). * `port`: port of destination server * `separator`: separator of messages, e.g. "\n", optional. When sending to a TCP collector, parameter shouldn't be present. - In that case, the output waits every message is acknowledged by "Ok" message the TCP collector bot implements. + In that case, the output waits every message is acknowledged by "Ok" message the TCP collector bot implements. -#### Sending to an IntelMQ TCP collector +**Sending to an IntelMQ TCP collector** If you intend to link two IntelMQ instance via TCP, set the parameter `counterpart_is_intelmq` to true. The bot then awaits an "Ok" message to be received after each message is sent. The TCP collector just sends "Ok" after every message it gets. -* * * +Touch +^^^^^ -### Touch +**Information** -#### Information: * `name:` intelmq.bots.outputs.touch.output * `lookup:` no * `public:` yes * `cache (redis db):` none * `description:` Touches a file for every event received. -#### Configuration Parameters: +**Configuration Parameters** * `path`: Path to the file to touch. -* * * +UDP +^^^ -### UDP +**Information** -#### Information: * `name:` intelmq.bots.outputs.udp.output * `lookup:` no * `public:` yes @@ -3289,7 +3331,7 @@ The TCP collector just sends "Ok" after every message it gets. Multihreading is disabled for this bot. -#### Configuration Parameters: +**Configuration Parameters** * `field_delimiter`: If the format is 'delimited' this will be added between fields. String, default: `"|"` * `format`: Can be `'json'` or `'delimited'`. The JSON format outputs the event 'as-is'. Delimited will deconstruct the event and print each field:value separated by the field delimit. See examples bellow. @@ -3298,12 +3340,14 @@ Multihreading is disabled for this bot. * `udp_host`: Destination's server's Host name or IP address * `udp_port`: Destination port -#### Examples of usage: +**Examples of usage** Consider the following event: -```json -{"raw": "MjAxNi8wNC8yNV8xMTozOSxzY2hpenppbm8ub21hcmF0aG9uLmNvbS9na0NDSnVUSE0vRFBlQ1pFay9XdFZOSERLbC1tWFllRk5Iai8sODUuMjUuMTYwLjExNCxzdGF0aWMtaXAtODUtMjUtMTYwLTExNC5pbmFkZHIuaXAtcG9vbC5jb20uLEFuZ2xlciBFSywtLDg5NzI=", "source": {"asn": 8972, "ip": "85.25.160.114", "url": "http://schizzino.omarathon.com/gkCCJuTHM/DPeCZEk/WtVNHDKl-mXYeFNHj/", "reverse_dns": "static-ip-85-25-160-114.inaddr.ip-pool.com"}, "classification": {"type": "malware"}, "event_description": {"text": "Angler EK"}, "feed": {"url": "http://www.malwaredomainlist.com/updatescsv.php", "name": "Malware Domain List", "accuracy": 100.0}, "time": {"observation": "2016-04-29T10:59:34+00:00", "source": "2016-04-25T11:39:00+00:00"}} -``` + +.. code-block:: json + + {"raw": "MjAxNi8wNC8yNV8xMTozOSxzY2hpenppbm8ub21hcmF0aG9uLmNvbS9na0NDSnVUSE0vRFBlQ1pFay9XdFZOSERLbC1tWFllRk5Iai8sODUuMjUuMTYwLjExNCxzdGF0aWMtaXAtODUtMjUtMTYwLTExNC5pbmFkZHIuaXAtcG9vbC5jb20uLEFuZ2xlciBFSywtLDg5NzI=", "source": {"asn": 8972, "ip": "85.25.160.114", "url": "http://schizzino.omarathon.com/gkCCJuTHM/DPeCZEk/WtVNHDKl-mXYeFNHj/", "reverse_dns": "static-ip-85-25-160-114.inaddr.ip-pool.com"}, "classification": {"type": "malware"}, "event_description": {"text": "Angler EK"}, "feed": {"url": "http://www.malwaredomainlist.com/updatescsv.php", "name": "Malware Domain List", "accuracy": 100.0}, "time": {"observation": "2016-04-29T10:59:34+00:00", "source": "2016-04-25T11:39:00+00:00"}} + With the following Parameters: * field_delimiter : | @@ -3315,9 +3359,10 @@ With the following Parameters: Resulting line in syslog: -``` -Apr 29 11:01:29 header example {"raw": "MjAxNi8wNC8yNV8xMTozOSxzY2hpenppbm8ub21hcmF0aG9uLmNvbS9na0NDSnVUSE0vRFBlQ1pFay9XdFZOSERLbC1tWFllRk5Iai8sODUuMjUuMTYwLjExNCxzdGF0aWMtaXAtODUtMjUtMTYwLTExNC5pbmFkZHIuaXAtcG9vbC5jb20uLEFuZ2xlciBFSywtLDg5NzI=", "source": {"asn": 8972, "ip": "85.25.160.114", "url": "http://schizzino.omarathon.com/gkCCJuTHM/DPeCZEk/WtVNHDKl-mXYeFNHj/", "reverse_dns": "static-ip-85-25-160-114.inaddr.ip-pool.com"}, "classification": {"type": "malware"}, "event_description": {"text": "Angler EK"}, "feed": {"url": "http://www.malwaredomainlist.com/updatescsv.php", "name": "Malware Domain List", "accuracy": 100.0}, "time": {"observation": "2016-04-29T10:59:34+00:00", "source": "2016-04-25T11:39:00+00:00"}} -``` +.. code-block:: + + Apr 29 11:01:29 header example {"raw": "MjAxNi8wNC8yNV8xMTozOSxzY2hpenppbm8ub21hcmF0aG9uLmNvbS9na0NDSnVUSE0vRFBlQ1pFay9XdFZOSERLbC1tWFllRk5Iai8sODUuMjUuMTYwLjExNCxzdGF0aWMtaXAtODUtMjUtMTYwLTExNC5pbmFkZHIuaXAtcG9vbC5jb20uLEFuZ2xlciBFSywtLDg5NzI=", "source": {"asn": 8972, "ip": "85.25.160.114", "url": "http://schizzino.omarathon.com/gkCCJuTHM/DPeCZEk/WtVNHDKl-mXYeFNHj/", "reverse_dns": "static-ip-85-25-160-114.inaddr.ip-pool.com"}, "classification": {"type": "malware"}, "event_description": {"text": "Angler EK"}, "feed": {"url": "http://www.malwaredomainlist.com/updatescsv.php", "name": "Malware Domain List", "accuracy": 100.0}, "time": {"observation": "2016-04-29T10:59:34+00:00", "source": "2016-04-25T11:39:00+00:00"}} + With the following Parameters: * field_delimiter : | @@ -3329,15 +3374,15 @@ With the following Parameters: Resulting line in syslog: -``` -Apr 29 11:17:47 localhost IntelMQ-event|source.ip: 85.25.160.114|time.source:2016-04-25T11:39:00+00:00|feed.url:http://www.malwaredomainlist.com/updatescsv.php|time.observation:2016-04-29T11:17:44+00:00|source.reverse_dns:static-ip-85-25-160-114.inaddr.ip-pool.com|feed.name:Malware Domain List|event_description.text:Angler EK|source.url:http://schizzino.omarathon.com/gkCCJuTHM/DPeCZEk/WtVNHDKl-mXYeFNHj/|source.asn:8972|classification.type:malware|feed.accuracy:100.0 -``` +.. code-block:: -* * * + Apr 29 11:17:47 localhost IntelMQ-event|source.ip: 85.25.160.114|time.source:2016-04-25T11:39:00+00:00|feed.url:http://www.malwaredomainlist.com/updatescsv.php|time.observation:2016-04-29T11:17:44+00:00|source.reverse_dns:static-ip-85-25-160-114.inaddr.ip-pool.com|feed.name:Malware Domain List|event_description.text:Angler EK|source.url:http://schizzino.omarathon.com/gkCCJuTHM/DPeCZEk/WtVNHDKl-mXYeFNHj/|source.asn:8972|classification.type:malware|feed.accuracy:100.0 -### XMPP +XMPP +^^^^ + +**Information** -#### Information: * `name:` intelmq.bots.outputs.xmpp.collector * `lookup:` yes * `public:` yes @@ -3346,13 +3391,15 @@ Apr 29 11:17:47 localhost IntelMQ-event|source.ip: 85.25.160.114|time.source:201 **Warning:** This bot is currently *unmaintained* and needs to be adapted. The used XMPP library *sleekxmpp* is deprecated, therefore the bots needs to be adapted to the successor library *slixmpp*. For more information see [Issue #1614](https://github.com/certtools/intelmq/issues/1614). -#### Requirements +**Requirements** + The Sleekxmpp - Library needs to be installed on your System -```bash -pip3 install -r intelmq/bots/collectors/xmpp/REQUIREMENTS.txt -``` -#### Configuration Parameters: +.. code-block:: bash + + pip3 install -r intelmq/bots/collectors/xmpp/REQUIREMENTS.txt + +**Configuration Parameters** - `xmpp_user` : The username of the XMPP-Account the output shall use (part before the @ sign) - `xmpp_server` : The domain name of the server of the XMPP-Account (part after the @ sign) diff --git a/docs/guides/User-Guide.md b/docs/user/configuration-management.rst similarity index 68% rename from docs/guides/User-Guide.md rename to docs/user/configuration-management.rst index 8db6facae..33836162a 100644 --- a/docs/guides/User-Guide.md +++ b/docs/user/configuration-management.rst @@ -1,54 +1,35 @@ -# User Guide - -For installation instructions, see [INSTALL](INSTALL.md). -For upgrade instructions, see [UPGRADING](UPGRADING.md). - -**Table of Contents:** -- [Where to get help?](#where-to-get-help) -- [Configure services](#configure-services) -- [Configuration](#configuration) -- [System Configuration (defaults)](#system-configuration-defaults) - - [Error Handling](#error-handling) - - [Miscellaneous](#miscellaneous) -- [Pipeline Configuration](#pipeline-configuration) -- [Runtime Configuration](#runtime-configuration) - - [Multithreading (Beta)](#multithreading-beta) -- [Harmonization Configuration](#harmonization-configuration) -- [Utilities](#utilities) -- [Management](#management) - - [Web interface: IntelMQ Manager](#web-interface-intelmq-manager) - - [Command-line interface: intelmqctl](#command-line-interface-intelmqctl) - - [Botnet Concept](#botnet-concept) - - [Scheduled Run Mode](#scheduled-run-mode) - - [Continuous Run Mode](#continuous-run-mode) - - [Reloading](#reloading) - - [Forcing reset pipeline and cache (be careful)](#forcing-reset-pipeline-and-cache-be-careful) -- [Error Handling](#error-handling) - - [Tool: intelmqdump](#tool-intelmqdump) -- [Monitoring Logs](#monitoring-logs) -- [Uninstall](#uninstall) -- [Integration with ticket systems, etc.](#integration-with-ticket-systems-etc) -- [Frequently Asked Questions](#frequently-asked-questions) -- [Additional Information](#additional-information) - - [Bash Completion](#bash-completion) - -## Where to get help? - -In case you are lost or something is not discussed in this guide, you might want to subscribe to the [intelmq users mailinglist](https://lists.cert.at/cgi-bin/mailman/listinfo/intelmq-users) and ask your questions there. +Configuration and Management +============================ + +.. contents:: + +For installation instructions, see :doc:`installation`. +For upgrade instructions, see :doc:`upgrade`. + +Where to get help? +------------------ + +In case you are lost or something is not discussed in this guide, you might want to subscribe to the |intelmq-users-list-link| and ask your questions there. With that clarified, let's dig into the details... -## Configure services +Configure services +------------------ You need to enable and start Redis if not already done. Using systemd it can be done with: -```bash -systemctl enable redis.service -systemctl start redis.service -``` -## Configuration +.. code-block:: bash -### /opt and LSB paths + systemctl enable redis.service + systemctl start redis.service + +.. _configuration: + +Configuration +------------- + +/opt and LSB paths +^^^^^^^^^^^^^^^^^^ If you installed the packages, standard Linux paths (LSB paths) are used: `/var/log/intelmq/`, `/etc/intelmq/`, `/var/lib/intelmq/`, `/var/run/intelmq/`. Otherwise, the configuration directory is `/opt/intelmq/etc/`. Using the environment variable `INTELMQ_ROOT_DIR` allows setting any arbitrary root directory. @@ -59,18 +40,18 @@ You can switch this by setting the environment variables `INTELMQ_PATHS_NO_OPT` The environment variable `ROOT_DIR` is meant to set an alternative root directory instead of `/`. This is primarily meant for package build environments an analogous to setuptools' `--root` parameter. Thus it is only used in LSB-mode. -### Overview +Overview +^^^^^^^^ All configuration files are in the JSON format. For new installations a default setup with some examples is provided by the `intelmqsetup` tool. If this is not the case, make sure the program was run (see installation instructions). * `defaults.conf`: default values for all bots and their behavior, e.g. -error handling, log options and pipeline configuration. Will be removed in the [future](https://github.com/certtools/intelmq/issues/267). +error handling, log options and pipeline configuration. Will be removed in the `future `_. * `runtime.conf`: Configuration for the individual bots. See [Bots](Bots.md) for more details. * `pipeline.conf`: Defines source and destination queues per bot (i.e. where does a bot get its data from, where does it send it to?). -* `BOTS`: Includes configuration hints for all bots. E.g. feed URLs or -database connection parameters. Use this as a template for `runtime.conf`. This is also read by the intelmq-manager. +* `BOTS`: Includes configuration hints for all bots. E.g. feed URLs or database connection parameters. Use this as a template for `runtime.conf`. This is also read by the intelmq-manager. To configure a new bot, you need to define and configure it in `runtime.conf` using the template from BOTS. Configure source and destination queues in `pipeline.conf`. @@ -78,7 +59,8 @@ Use the IntelMQ Manager mentioned above to generate the configuration files if u In the shipped examples 4 collectors and parsers, 6 common experts and one output are configured. The default collector and the parser handle data from malware domain list, the file output bot writes all data to `/opt/intelmq/var/lib/bots/file-output/events.txt`/`/var/lib/intelmq/bots/file-output/events.txt`. -### System Configuration (defaults) +System Configuration (defaults) +------------------------------- All bots inherit this configuration parameters and they can overwrite them using the same parameters in their respective configuration in the ''runtime.conf'' file. @@ -93,7 +75,8 @@ We recommend `logging_level` `WARNING` for production environments and `INFO` if You can set these parameters per bot as well. The settings will take effect after the runtime configuration has been (re-)read (which is after loading the defaults configuration. See the intelmqctl documentation). -##### Error Handling +Error Handling +^^^^^^^^^^^^^^ * **`error_log_message`** - in case of an error, this option will allow the bot to write the message (report or event) to the log file. Use the following values: * **`true/false`** - write or not write message to the log file @@ -116,7 +99,8 @@ You can set these parameters per bot as well. The settings will take effect afte If the path `_on_error` exists for a bot, the message is also sent to this queue, instead of (only) dumping the file if configured to do so. -##### Miscellaneous +Miscellaneous +^^^^^^^^^^^^^ * **`load_balance`** - this option allows you to choose the behavior of the queue. Use the following values: * **`true`** - splits the messages into several queues without duplication @@ -146,7 +130,7 @@ If the path `_on_error` exists for a bot, the message is also sent to this queue * **`destination_pipeline_db`** - broker database that the bot will use to connect and send messages (requirement from redis broker). -* **`http_proxy`** - HTTP proxy the that bot will use when performing HTTP requests (e.g. bots/collectors/collector_http.py). The value must follow [RFC1738](https://www.ietf.org/rfc/rfc1738.txt). +* **`http_proxy`** - HTTP proxy the that bot will use when performing HTTP requests (e.g. bots/collectors/collector_http.py). The value must follow :rfc:`1738`. * **`https_proxy`** - HTTPS proxy that the bot will use when performing secure HTTPS requests (e.g. bots/collectors/collector_http.py). @@ -156,61 +140,72 @@ If the path `_on_error` exists for a bot, the message is also sent to this queue * **`true/false`** - verify or not verify SSL certificates -#### Using supervisor as process manager (Beta) +Using supervisor as process manager (Beta) +"""""""""""""""""""""""""""""""""""""""""" First of all: Do not use it in production environments yet! It has not been tested thoroughly yet. -[Supervisor](http://supervisord.org) is process manager written in Python. The main advantage is that it take care about processes, so if bot process exit with failure (exit code different than 0), supervisor try to run it again. Another advantage is that it not require writing PID files. +`Supervisor `_ is process manager written in Python. The main advantage is that it take care about processes, so if bot process exit with failure (exit code different than 0), supervisor try to run it again. Another advantage is that it not require writing PID files. This was tested on Ubuntu 18.04. Install supervisor. `supervisor_twiddler` is extension for supervisor, that makes possible to create process dynamically. (Ubuntu `supervisor` package is currently based on Python 2, so `supervisor_twiddler` must be installed with Python 2 `pip`.) -``` -apt install supervisor python-pip -pip install supervisor_twiddler -``` + +.. code-block:: bash + + apt install supervisor python-pip + pip install supervisor_twiddler Create default config `/etc/supervisor/conf.d/intelmq.conf` and restart `supervisor` service: -```ini -[rpcinterface:twiddler] -supervisor.rpcinterface_factory=supervisor_twiddler.rpcinterface:make_twiddler_rpcinterface +.. code-block:: ini + + [rpcinterface:twiddler] + supervisor.rpcinterface_factory=supervisor_twiddler.rpcinterface:make_twiddler_rpcinterface -[group:intelmq] -``` + [group:intelmq] Change IntelMQ process manager in the *defaults* configuration: -``` -"process_manager": "supervisor", -``` +.. code-block:: + + "process_manager": "supervisor", After this it is possible to manage bots like before with `intelmqctl` command. -### Pipeline Configuration +Pipeline Configuration +---------------------- + +The pipeline configuration defines how the data is exchanges between the bots. For each bot, it defines the source queue (there is always only one) and one or multiple destination queues. This section shows the possibilities and definition as well as examples. The configuration of the pipeline can be done by the |intelmq-manager-github-link| with no need to intervene manually. It is recommended to use this tool as it guarantees that the configuration is correct. The location of the file is `etc/pipeline.conf` in your IntelMQ directory, for example `/opt/intelmq/etc/pipeline.conf` or `/etc/intelmq/pipeline.conf`. -The pipeline configuration defines how the data is exchanges between the bots. For each bot, it defines the source queue (there is always only one) and one or multiple destination queues. This section shows the possibilities and definition as well as examples. The configuration of the pipeline can be done by the [IntelMQ Manager](https://github.com/certtools/intelmq-manager) with no need to intervene manually. It is recommended to use this tool as it guarantees that the configuration is correct. The location of the file is `etc/pipeline.conf` in your IntelMQ directory, for example `/opt/intelmq/etc/pipeline.conf` or `/etc/intelmq/pipeline.conf`. +Structure +^^^^^^^^^ -### Structure The pipeline configuration has the same structure on the first level as the runtime configuration, i.e. it's a dictionary with the bot IDs' as keys. Each item holds again a dictionary with one entry for each the source and destination queue. A full example can be found later in this section. -```json -{ - "example-bot": { - "source-queue": , - "destination-queues": - } -} -``` -#### Source queue +.. code-block:: json + + { + "example-bot": { + "source-queue": , + "destination-queues": + } + } + +Source queue +"""""""""""" + The source queue is only a string, by convention the bot ID plus "-queue" appended. For example, if the bot ID is `example-bot`, the source queue name is `example-bot-queue`. -```json -"source-queue": "example-bot-queue" -``` + +.. code-block:: + + "source-queue": "example-bot-queue" + For collectors, this field does not exist, as the fetch the data from outside the IntelMQ system by definition. -#### Destination queues +Destination queues +"""""""""""""""""" There are multiple possibilities for the destination queues: - no value, i.e. the field does not exist. This is the case for outputs, as they push the data outside the IntelMQ system by default. @@ -219,52 +214,62 @@ There are multiple possibilities for the destination queues: - *Named queues*: a dictionary of either strings or lists. Before going into the details of named paths, first dive into some simpler cases. A typical configuration may look like this: -```json + +.. code-block:: json + "deduplicator-expert": { "source-queue": "deduplicator-expert-queue", "destination-queues": [ "taxonomy-expert-queue" ] } -``` + And a bot with two destination queues: -```json + +.. code-block:: json + "cymru-whois-expert": { "source-queue": "cymru-whois-expert-queue", "destination-queues": [ "file-output-queue", "misp-output-queue" ] - }, -``` + } + These are the usual configurations you mostly see. -##### Named queues / paths +Named queues / paths +"""""""""""""""""""" + Beginning with version 1.1.0, queues can be "named", these are the so-called *paths*. The following two configurations are equivalent: -```json -"destination-queues": ["taxonomy-expert-queue"] -"destination-queues": {"_default": ["taxonomy-expert-queue"]} -``` + +.. code-block:: json + + "destination-queues": ["taxonomy-expert-queue"] + "destination-queues": {"_default": ["taxonomy-expert-queue"]} + As we can see the *default* path name is obviously `_default`. Let's have a look at a more complex and complete example: -``` -"destination-queues": { - "_default": "", - "_on_error": "", - "other-path": [ - "", - "", - ... - ], - ... - } -``` +.. code-block:: json + + "destination-queues": { + "_default": "", + "_on_error": "", + "other-path": [ + "", + "", + ... + ], + ... + } + In that case, bot will be able to send the message to one of defined paths. The path `"_default"` is used if none is not specified. In case of errors during processing, and the optional path `"_on_error"` is specified, the message will be sent to the pipelines given given as on-error. Other destination queues can be explicitly addressed by the bots, e.g. bots with filtering capabilities. Some expert bots are capable of sending messages to paths, this feature is explained in their documentation, e.g. the [filter expert](Bots.html#filter) and the [sieve expert](Bots.html#sieve). The named queues need to be explicitly addressed by the bot (e.g. fitering) or the core (`_on_error`) to be used. Setting arbitrary paths has no effect. -#### AMQP (Beta) +AMQP (Beta) +^^^^^^^^^^^ Starting with IntelMQ 1.2 the AMQP protocol is supported as message queue. To use it, install a broker, for example RabbitMQ. @@ -295,7 +300,9 @@ You need to set the parameter `source_pipeline_broker`/`destination_pipeline_bro For getting the queue sizes, `intelmqctl` needs to connect to the monitoring interface of RabbitMQ. If the monitoring interface is not available under "http://{host}:15672" you can manually set using the parameter `intelmqctl_rabbitmq_monitoring_url`. In a RabbitMQ's default configuration you might not provide a user account, as by default the administrator (`guest`:`guest`) allows full access from localhost. If you create a separate user account, make sure to add the tag "monitoring" to it, otherwise IntelMQ can't fetch the queue sizes. -![RabbitMQ User Account Monitoring Tag](./images/rabbitmq-user-monitoring.png) + +.. figure:: /_static/rabbitmq-user-monitoring.png + :alt: RabbitMQ User Account Monitoring Tag Setting the statistics (and cache) parameters is necessary when the local redis is running under a non-default host/port. If this is the case, you can set them explicitly: @@ -304,130 +311,139 @@ Setting the statistics (and cache) parameters is necessary when the local redis * `statistics_password`: `null` * `statistics_port`: `6379` -### Runtime Configuration +Runtime Configuration +--------------------- This configuration is used by each bot to load its specific (runtime) parameters. Usually, the `BOTS` file is used to generate `runtime.conf`. Also, the IntelMQ Manager generates this configuration. You may edit it manually as well. Be sure to re-load the bot (see the intelmqctl documentation). **Template:** -``` -{ - "": { - "group": "", - "name": "", - "module": "", - "description": "", - "parameters": { - "": "", - "": "", - "": "" - } - } -} -``` + +.. code-block:: json + + { + "": { + "group": "", + "name": "", + "module": "", + "description": "", + "parameters": { + "": "", + "": "", + "": "" + } + } + } **Example:** -``` -{ - "malware-domain-list-collector": { - "group": "Collector", - "name": "Malware Domain List", - "module": "intelmq.bots.collectors.http.collector_http", - "description": "Malware Domain List Collector is the bot responsible to get the report from source of information.", - "parameters": { - "http_url": "http://www.malwaredomainlist.com/updatescsv.php", - "feed": "Malware Domain List", - "rate_limit": 3600 - } - } -} -``` + +.. code-block:: json + + { + "malware-domain-list-collector": { + "group": "Collector", + "name": "Malware Domain List", + "module": "intelmq.bots.collectors.http.collector_http", + "description": "Malware Domain List Collector is the bot responsible to get the report from source of information.", + "parameters": { + "http_url": "http://www.malwaredomainlist.com/updatescsv.php", + "feed": "Malware Domain List", + "rate_limit": 3600 + } + } + } More examples can be found in the `intelmq/etc/runtime.conf` directory. See [Bots](Bots.md) for more details. By default, all of the bots are started when you start the whole botnet, however there is a possibility to *disable* a bot. This means that the bot will not start every time you start the botnet, but you can start and stop the bot if you specify the bot explicitly. To disable a bot, add the following to your runtime.conf: `"enabled": false`. For example: -``` -{ - "malware-domain-list-collector": { - "group": "Collector", - "name": "Malware Domain List", - "module": "intelmq.bots.collectors.http.collector_http", - "description": "Malware Domain List Collector is the bot responsible to get the report from source of information.", - "enabled": false, - "parameters": { - "http_url": "http://www.malwaredomainlist.com/updatescsv.php", - "feed": "Malware Domain List", - "rate_limit": 3600 - } - } -} -``` - -#### Multithreading (Beta) +.. code-block:: json + + { + "malware-domain-list-collector": { + "group": "Collector", + "name": "Malware Domain List", + "module": "intelmq.bots.collectors.http.collector_http", + "description": "Malware Domain List Collector is the bot responsible to get the report from source of information.", + "enabled": false, + "parameters": { + "http_url": "http://www.malwaredomainlist.com/updatescsv.php", + "feed": "Malware Domain List", + "rate_limit": 3600 + } + } + } + +Multithreading (Beta) +^^^^^^^^^^^^^^^^^^^^^ First of all: Do not use it in production environments yet! There are a few bugs, see below Since IntelMQ 2.0 it is possible to provide the following parameter: - * `instances_threads` + +* `instances_threads` + Set it to a non-zero integer, then this number of worker threads will be spawn. This is useful if bots often wait for system resources or if network-based lookups are a bottleneck. However, there are currently a few cavecats: - * This is not possible for all bots, there are some exceptions (collectors and some outputs), see the [FAQ](FAQ.html#multithreading-is-not-available-for-this-bot) for some reasons. - * Only use it with the AMQP pipeline, as with Redis, messages may get duplicated because there's only one internal queue - * In the logs, you can see the main thread initializing first, then all of the threads which log with the name `[bot-id].[thread-id]`. -### Harmonization Configuration +* This is not possible for all bots, there are some exceptions (collectors and some outputs), see the [FAQ](FAQ.md#multithreading-is-not-available-for-this-bot) for some reasons. +* Only use it with the AMQP pipeline, as with Redis, messages may get duplicated because there's only one internal queue +* In the logs, you can see the main thread initializing first, then all of the threads which log with the name `[bot-id].[thread-id]`. + +Harmonization Configuration +--------------------------- This configuration is used to specify the fields for all message types. The harmonization library will load this configuration to check, during the message processing, if the values are compliant to the "harmonization" format. Usually, this configuration doesn't need any change. It is mostly maintained by the intelmq maintainers. **Template:** -``` -{ - "": { - "": { - "description": "", - "type": "" - }, - "": { - "description": "", - "type": "" - } - }, -} -``` + +.. code-block:: json + + { + "": { + "": { + "description": "", + "type": "" + }, + "": { + "description": "", + "type": "" + } + }, + } **Example:** -``` -{ - "event": { - "destination.asn": { - "description": "The autonomous system number from which originated the connection.", - "type": "Integer" - }, - "destination.geolocation.cc": { - "description": "Country-Code according to ISO3166-1 alpha-2 for the destination IP.", - "regex": "^[a-zA-Z0-9]{2}$", - "type": "String" - }, - }, -} -``` -More examples can be found in the `intelmq/etc/harmonization.conf` directory. +.. code-block:: json + + { + "event": { + "destination.asn": { + "description": "The autonomous system number from which originated the connection.", + "type": "Integer" + }, + "destination.geolocation.cc": { + "description": "Country-Code according to ISO3166-1 alpha-2 for the destination IP.", + "regex": "^[a-zA-Z0-9]{2}$", + "type": "String" + }, + }, + } +More examples can be found in the `intelmq/etc/harmonization.conf` directory. -## Utilities +Utilities +--------- -### Management +Management +^^^^^^^^^^ IntelMQ has a modular structure consisting of bots. There are four types of bots: -* [CollectorBots](Bots.html#collectors) retrieve data from internal or external sources, the output -are *reports* consisting of many individual data sets / log lines. -* [ParserBots](Bots.html#parsers) parse the (report) data by splitting it into individual *events* (log lines) and -giving them a defined structure, see also [Data Harmonization](Data-Harmonization.md) for the list of fields an event may be split up into. +* [CollectorBots](Bots.html#collectors) retrieve data from internal or external sources, the output are *reports* consisting of many individual data sets / log lines. +* [ParserBots](Bots.html#parsers) parse the (report) data by splitting it into individual *events* (log lines) and giving them a defined structure, see also [Data Harmonization](Data-Harmonization.md) for the list of fields an event may be split up into. * [ExpertBots](Bots.html#experts) enrich the existing events by e.g. lookup up information such as DNS reverse records, geographic location information (country code) or abuse contacts for an IP address or domain name. * [OutputBots](Bots.html#outputs) write events to files, databases, (REST)-APIs or any other data sink that you might want to write to. @@ -445,9 +461,10 @@ Example: multiple gethostbyname bots (with different bot ids) may run in paralle IntelMQ has a tool called IntelMQ Manager that gives users an easy way to configure all pipelines with bots that your team needs. For beginners, it's recommended to use the IntelMQ Manager to become acquainted with the functionalities and concepts. The IntelMQ Manager offers some of the possibilities of the intelmqctl tool and has a graphical interface for runtime and pipeline configurations. -See the [IntelMQ Manager repository](https://github.com/certtools/intelmq-manager). +See the |intelmq-manager-github-link| repository. -#### Command-line interface: intelmqctl +Command-line interface: intelmqctl +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ **Syntax** see `intelmqctl -h` @@ -481,7 +498,8 @@ See the [IntelMQ Manager repository](https://github.com/certtools/intelmq-manage Make a backup of your configuration first, also including bot's configuration files. -##### Botnet Concept +Botnet Concept +"""""""""""""" The "botnet" represents all currently configured bots which are explicitly enabled. It is, in essence, the graph (pipeline.conf) of the bots which are connected together via their input source queues and destination queues. @@ -490,173 +508,187 @@ To get an overview which bots are running, use `intelmqctl status` or use the In Disabled bots can still be started explicitly using `intelmqctl start `, but will remain in the state `disabled` if stopped (and not be implicitly enabled by the `start` command). They are not started by `intelmqctl start` in analogy to the behavior of widely used initialization systems. -##### Scheduled Run Mode +Scheduled Run Mode +"""""""""""""""""" In many cases, it is useful to schedule a bot at a specific time (i.e. via cron(1)), for example to collect information from a website every day at midnight. To do this, set `run_mode` to `scheduled` in the `runtime.conf` for the bot. Check out the following example: -```json -"blocklistde-apache-collector": { - "name": "Generic URL Fetcher", - "group": "Collector", - "module": "intelmq.bots.collectors.http.collector_http", - "description": "All IP addresses which have been reported within the last 48 hours as having run attacks on the service Apache, Apache-DDOS, RFI-Attacks.", - "enabled": false, - "run_mode": "scheduled", - "parameters": { - "feed": "Blocklist.de Apache", - "provider": "Blocklist.de", - "http_url": "https://lists.blocklist.de/lists/apache.txt", - "ssl_client_certificate": null - }, -}, -``` +.. code-block:: json + + "blocklistde-apache-collector": { + "name": "Generic URL Fetcher", + "group": "Collector", + "module": "intelmq.bots.collectors.http.collector_http", + "description": "All IP addresses which have been reported within the last 48 hours as having run attacks on the service Apache, Apache-DDOS, RFI-Attacks.", + "enabled": false, + "run_mode": "scheduled", + "parameters": { + "feed": "Blocklist.de Apache", + "provider": "Blocklist.de", + "http_url": "https://lists.blocklist.de/lists/apache.txt", + "ssl_client_certificate": null + }, + } You can schedule the bot with a crontab-entry like this: -``` -0 0 * * * intelmqctl start blocklistde-apache-collector -``` + +.. code-block:: cron + + 0 0 * * * intelmqctl start blocklistde-apache-collector Bots configured as `scheduled` will exit after the first successful run. Setting `enabled` to `false` will cause the bot to not start with `intelmqctl start`, but only with an explicit start, in this example `intelmqctl start blocklistde-apache-collector`. -##### Continuous Run Mode +Continuous Run Mode +""""""""""""""""""" Most of the cases, bots will need to be configured as `continuous` run mode (the default) in order to have them always running and processing events. Usually, the types of bots that will require the continuous mode will be Parsers, Experts and Outputs. To do this, set `run_mode` to `continuous` in the `runtime.conf` for the bot. Check the following example: -```json -"blocklistde-apache-parser": { - "name": "Blocklist.de Parser", - "group": "Parser", - "module": "intelmq.bots.parsers.blocklistde.parser", - "description": "Blocklist.DE Parser is the bot responsible to parse the report and sanitize the information.", - "enabled": false, - "run_mode": "continuous", - "parameters": { - }, -}, -``` +.. code-block:: json + + "blocklistde-apache-parser": { + "name": "Blocklist.de Parser", + "group": "Parser", + "module": "intelmq.bots.parsers.blocklistde.parser", + "description": "Blocklist.DE Parser is the bot responsible to parse the report and sanitize the information.", + "enabled": false, + "run_mode": "continuous", + "parameters": { + }, + } You can now start the bot using the following command: -``` -intelmqctl start blocklistde-apache-parser -``` + +.. code-block:: bash + + intelmqctl start blocklistde-apache-parser Bots configured as `continuous` will never exit except if there is an error and the error handling configuration requires the bot to exit. See the Error Handling section for more details. -##### Reloading +Reloading +""""""""" Whilst restart is a mere stop & start, performing `intelmqctl reload ` will not stop the bot, permitting it to keep the state: the same common behavior as for (Linux) daemons. It will initialize again (including reading all configuration again) after the current action is finished. Also, the rate limit/sleep is continued (with the *new* time) and not interrupted like with the restart command. So if you have a collector with a rate limit of 24 h, the reload does not trigger a new fetching of the source at the time of the reload, but just 24 h after the last run – with the new configuration. Which state the bots are keeping depends on the bots of course. -##### Forcing reset pipeline and cache (be careful) +Forcing reset pipeline and cache (be careful) +""""""""""""""""""""""""""""""""""""""""""""" If you are using the default broker (Redis), in some test situations you may need to quickly clear all pipelines and caches. Use the following procedure: -```bash -redis-cli FLUSHDB -redis-cli FLUSHALL -``` -### Error Handling +.. code-block:: bash + + redis-cli FLUSHDB + redis-cli FLUSHALL -#### Tool: intelmqdump +Error Handling +-------------- + +Tool: intelmqdump +^^^^^^^^^^^^^^^^^ When bots are failing due to bad input data or programming errors, they can dump the problematic message to a file along with a traceback, if configured accordingly. These dumps are saved at in the logging directory as `[botid].dump` as JSON files. IntelMQ comes with an inspection and reinjection tool, called `intelmqdump`. It is an interactive tool to show all dumped files and the number of dumps per file. Choose a file by bot-id or listed numeric id. You can then choose to delete single entries from the file with `e 1,3,4`, show a message in more readable format with `s 1` (prints the raw-message, can be long!), recover some messages and put them back in the pipeline for the bot by `a` or `r 0,4,5`. Or delete the file with all dumped messages using `d`. -```bash - $ intelmqdump -h -usage: - intelmqdump [botid] - intelmqdump [-h|--help] - -intelmqdump can inspect dumped messages, show, delete or reinject them into -the pipeline. It's an interactive tool, directly start it to get a list of -available dumps or call it with a known bot id as parameter. - -positional arguments: - botid botid to inspect dumps of - -optional arguments: - -h, --help show this help message and exit - --truncate TRUNCATE, -t TRUNCATE - Truncate raw-data with more characters than given. 0 for no truncating. Default: 1000. - -Interactive actions after a file has been selected: -- r, Recover by IDs - > r id{,id} [queue name] - > r 3,4,6 - > r 3,7,90 modify-expert-queue - The messages identified by a consecutive numbering will be stored in the - original queue or the given one and removed from the file. -- a, Recover all - > a [queue name] - > a - > a modify-expert-queue - All messages in the opened file will be recovered to the stored or given - queue and removed from the file. -- e, Delete entries by IDs - > e id{,id} - > e 3,5 - The entries will be deleted from the dump file. -- d, Delete file - > d - Delete the opened file as a whole. -- s, Show by IDs - > s id{,id} - > s 0,4,5 - Show the selected IP in a readable format. It's still a raw format from - repr, but with newlines for message and traceback. -- v, Edit by ID - > v id - > v 0 - > v 1,2 - Opens an editor (by calling `sensible-editor`) on the message. The modified message is then saved in the dump. -- q, Quit - > q - -$ intelmqdump - id: name (bot id) content - 0: alienvault-otx-parser 1 dumps - 1: cymru-whois-expert 8 dumps - 2: deduplicator-expert 2 dumps - 3: dragon-research-group-ssh-parser 2 dumps - 4: file-output2 1 dumps - 5: fraunhofer-dga-parser 1 dumps - 6: spamhaus-cert-parser 4 dumps - 7: test-bot 2 dumps -Which dump file to process (id or name)? 3 -Processing dragon-research-group-ssh-parser: 2 dumps - 0: 2015-09-03T13:13:22.159014 InvalidValue: invalid value u'NA' () for key u'source.asn' - 1: 2015-09-01T14:40:20.973743 InvalidValue: invalid value u'NA' () for key u'source.asn' -recover (a)ll, delete (e)ntries, (d)elete file, (q)uit, (s)how by ids, (r)ecover by ids? d -Deleted file /opt/intelmq/var/log/dragon-research-group-ssh-parser.dump -``` +.. code-block:: bash + + intelmqdump -h + usage: + intelmqdump [botid] + intelmqdump [-h|--help] + + intelmqdump can inspect dumped messages, show, delete or reinject them into + the pipeline. It's an interactive tool, directly start it to get a list of + available dumps or call it with a known bot id as parameter. + + positional arguments: + botid botid to inspect dumps of + + optional arguments: + -h, --help show this help message and exit + --truncate TRUNCATE, -t TRUNCATE + Truncate raw-data with more characters than given. 0 for no truncating. Default: 1000. + + Interactive actions after a file has been selected: + - r, Recover by IDs + > r id{,id} [queue name] + > r 3,4,6 + > r 3,7,90 modify-expert-queue + The messages identified by a consecutive numbering will be stored in the + original queue or the given one and removed from the file. + - a, Recover all + > a [queue name] + > a + > a modify-expert-queue + All messages in the opened file will be recovered to the stored or given + queue and removed from the file. + - e, Delete entries by IDs + > e id{,id} + > e 3,5 + The entries will be deleted from the dump file. + - d, Delete file + > d + Delete the opened file as a whole. + - s, Show by IDs + > s id{,id} + > s 0,4,5 + Show the selected IP in a readable format. It's still a raw format from + repr, but with newlines for message and traceback. + - v, Edit by ID + > v id + > v 0 + > v 1,2 + Opens an editor (by calling `sensible-editor`) on the message. The modified message is then saved in the dump. + - q, Quit + > q + + $ intelmqdump + id: name (bot id) content + 0: alienvault-otx-parser 1 dumps + 1: cymru-whois-expert 8 dumps + 2: deduplicator-expert 2 dumps + 3: dragon-research-group-ssh-parser 2 dumps + 4: file-output2 1 dumps + 5: fraunhofer-dga-parser 1 dumps + 6: spamhaus-cert-parser 4 dumps + 7: test-bot 2 dumps + Which dump file to process (id or name)? 3 + Processing dragon-research-group-ssh-parser: 2 dumps + 0: 2015-09-03T13:13:22.159014 InvalidValue: invalid value u'NA' () for key u'source.asn' + 1: 2015-09-01T14:40:20.973743 InvalidValue: invalid value u'NA' () for key u'source.asn' + recover (a)ll, delete (e)ntries, (d)elete file, (q)uit, (s)how by ids, (r)ecover by ids? d + Deleted file /opt/intelmq/var/log/dragon-research-group-ssh-parser.dump Bots and the intelmqdump tool use file locks to prevent writing to already opened files. Bots are trying to lock the file for up to 60 seconds if the dump file is locked already by another process (intelmqdump) and then give up. Intelmqdump does not wait and instead only shows an error message. By default, the `show` command truncates the `raw` field of messages at 1000 characters to change this limit or disable truncating at all (value 0), use the `--truncate` parameter. -### Monitoring Logs +Monitoring Logs +--------------- All bots and `intelmqctl` log to `/opt/intelmq/var/log/`/`var/log/intelmq/` (depending on your installation). In case of failures, messages are dumped to the same directory with the file ending `.dump`. -```bash -tail -f /opt/intelmq/var/log/*.log -tail -f /var/log/intelmq/*.log -``` +.. code-block:: bash + + tail -f /opt/intelmq/var/log/*.log + tail -f /var/log/intelmq/*.log -## Uninstall +Uninstall +--------- If you installed intelmq with native packages: Use the package management tool to remove the package `intelmq`. These tools do not remove configuration by default. If you installed manually via pip (note that this also deletes all configuration and possibly data): -```bash -pip3 uninstall intelmq -rm -r /opt/intelmq -``` -## Integration with ticket systems, etc. +.. code-block:: bash + + pip3 uninstall intelmq + rm -r /opt/intelmq + +Integration with ticket systems, etc. +------------------------------------- + First of all, IntelMQ is a message (event) processing system: it collects feeds, processes them, enriches them, filters them and then stores them somewhere or sends them to another system. It does this in a composable, data flow oriented fashion, based on single events. There are no aggregation or grouping features. Now, if you want to integrate IntelMQ with your ticket system or some other system, you need to send its output to somewhere where your ticket system or other services can pick up IntelMQ's data. This could be a database, splunk, or you could send your events directly via email to a ticket system. Different users came up with different solutions for this, each of them fitting their own organisation. Hence these solutions are not part of the core IntelMQ repository. @@ -665,18 +697,21 @@ Different users came up with different solutions for this, each of them fitting The following lists external github repositories which you might consult for examples on how to integrate IntelMQ into your workflow: - * [certat repository](https://github.com/certat/intelmq) - * [Intevation's Mailgen](https://github.com/Intevation/intelmq-mailgen) + * `certat repository `_ + * `Intevation's Mailgen `_ -If you came up with another solution for integration, we'd like to hear from you! Please reach out to us on the [intelmq-users list](https://lists.cert.at/cgi-bin/mailman/listinfo/intelmq-users). +If you came up with another solution for integration, we'd like to hear from you! Please reach out to us on the |intelmq-users-list-link|. -## Frequently Asked Questions +Frequently Asked Questions +-------------------------- -Consult the [FAQ](FAQ.md) if you encountered any problems. +Consult the :doc:`FAQ` if you encountered any problems. -## Additional Information +Additional Information +---------------------- -### Bash Completion +Bash Completion +^^^^^^^^^^^^^^^ -To enable bash completion on `intelmqctl` and `intelmqdump` in order to help you run the commands in an easy manner, follow the installation process [here](../contrib/bash-completion/README.md). +To enable bash completion on `intelmqctl` and `intelmqdump` in order to help you run the commands in an easy manner, follow the installation process `here `_. diff --git a/docs/user/ecosystem.rst b/docs/user/ecosystem.rst new file mode 100644 index 000000000..f07e64eff --- /dev/null +++ b/docs/user/ecosystem.rst @@ -0,0 +1,80 @@ +IntelMQ Ecosystem +================= + + +IntelMQ is more than a the core library itself and many programs are developed around in the IntelMQ initiative. +This document provides an overview of the ecosystem and all related tools. If you think something is missing, please let us know! + +IntelMQ "Core" +-------------- + +This is IntelMQ itself, as it is available on `github `_. + +It includes all the bots, the harmonization, etc. + +IntelMQ Manager +--------------- + +The Manager is the most known software and can be seen as the face of IntelMQ. +This software provides a graphical user interface to the management tool `intelmqctl`. + +→ `Repository: IntelMQ Manager `_ + +EventDB +------- + +This is not a software itself but listed here because the term it is often mentioned. + +The EventDB is a (usually PostgreSQL) database with data from intelmq. + +For some related scripts see the `contrib/eventdb `_ directory and the `eventdb-stats `_ repository for simple statistics generation. + +intelmq-webinput-csv +-------------------- + +A web-based interface to inject CSV data into IntelMQ with on-line validation and live feedback. + +→ `Repository: intelmq-webinput-csv `_ + +intelmq-mailgen +--------------- + +A solution to send grouped notifications to network owners using SMTP/OTRS. + +→ `Repository: intelmq-mailgen `_ + +IntelMQ Fody + Backend +---------------------- + +Fody is an interface for intelmq-mailgen's contact database, it's OTRS and the EventDB. +The certbund-contact expert fetches the information from this contact database and provides scripts to import RIPE data into the contact database. + +→ `Repository: intelmq-fody `_ + +→ `Repository: intelmq-fody-backend `_ + +→ `Repository: intelmq-certbund-contact `_ + +"Constituency Portal" do-portal (not developed any further) +----------------------------------------------------------- + +*Note:* A new version is being developed from scratch, see `do-portal#133 `_ for more information. + +A contact portal with organizational hierarchies, role functionality and network objects based on RIPE, allows self-administration by the contacts. +Can be queried from IntelMQ and integrates the stats-portal. + +→ `Repository: do-portal `_ + +stats-portal +------------ + +A Grafana-based statistics portal for the EventDB. Integrated in do-portal. + +→ `Repository: stats-portal `_ + +Malware Name Mapping +-------------------- + +A mapping for malware names of different feeds with different names to a common family name. + +→ `Repository: malware_name_mapping `_ diff --git a/docs/user/installation.rst b/docs/user/installation.rst new file mode 100644 index 000000000..935b6d922 --- /dev/null +++ b/docs/user/installation.rst @@ -0,0 +1,127 @@ +Installation +============ + +.. contents:: + +Please report any errors you encounter at https://github.com/certtools/intelmq/issues + +For upgrade instructions, see :doc:`upgrade`. +For setting up a development environment see the :doc:`../dev/guide` section *Development Environment*. +For testing pre-releases see also the :doc:`../dev/guide` section *Testing Pre-releases*. + +Requirements +------------ + +The following instructions assume the following requirements. Python versions >= 3.5 are supported. + +Supported and recommended operating systems are: + +* CentOS 7 +* Debian 9 and 10 +* OpenSUSE Leap 15.1, 15.2 +* Ubuntu: 16.04, 18.04, 20.04 + +Other distributions which are (most probably) supported include CentOS 8, RHEL, Fedora and openSUSE Tumbleweed. + +Install Dependencies +-------------------- + +If you are using native packages, you can simply skip this section as all dependencies are installed automatically. + +Ubuntu / Debian +^^^^^^^^^^^^^^^ + +.. code-block:: bash + + apt install python3-pip python3-dnspython python3-psutil python3-redis python3-requests python3-termstyle python3-tz python3-dateutil + apt install redis-server + +Optional dependencies: + +.. code-block:: bash + + apt install bash-completion jq + apt install python3-sleekxmpp python3-pymongo python3-psycopg2 + +CentOS 7 / RHEL 7 +^^^^^^^^^^^^^^^^^ + +.. code-block:: bash + + yum install epel-release + yum install python36 python36-devel python36-requests + yum install gcc gcc-c++ + yum install redis + +openSUSE 15.1 +^^^^^^^^^^^^^ + +.. code-block:: bash + + zypper install python3-dateutil python3-dnspython python3-psutil python3-pytz python3-redis python3-requests python3-python-termstyle + zypper install redis + +Optional dependencies: + +.. code-block:: bash + + zypper in bash-completion jq + zypper in python3-psycopg2 python3-pymongo python3-sleekxmpp + +Installation +------------ + +Installation methods available: + +* native packages (`.deb`, `.rpm`) +* PyPi (latest releases as python package) + +**Note:** installation for development purposes must follow the instructions available on :ref:`development environment`. + +Native Packages +^^^^^^^^^^^^^^^ + +Supported Operating Systems: + +* **CentOS 7** (requires `epel-release`) +* **Debian 8** (requires `python3-typing`) +* **Debian 9** +* **Debian 10** +* **Fedora 29** +* **Fedora 30** +* **RHEL 7** (requires `epel-release`) +* **openSUSE Leap 15.0** +* **openSUSE Leap 15.1** +* **openSUSE Tumbleweed** +* **Ubuntu 16.04** (enable the universe repositories by appending ` universe` in `/etc/apt/sources.list` to `deb http://[...].archive.ubuntu.com/ubuntu/ xenial main`) +* **Ubuntu 18.04** (enable the universe repositories by appending ` universe` in `/etc/apt/sources.list` to `deb http://[...].archive.ubuntu.com/ubuntu/ bionic main`) +* **Ubuntu 19.10** (enable the universe repositories by appending ` universe` in `/etc/apt/sources.list` to `deb http://[...].archive.ubuntu.com/ubuntu/ eoan main`) +* **Ubuntu 20.04** (enable the universe repositories by appending ` universe` in `/etc/apt/sources.list` to `deb http://[...].archive.ubuntu.com/ubuntu/ focal main`) + +Get the installation instructions for your operating system here: `Installation Native Packages `_. +To import the key on Debian and Ubuntu, use: + +.. code-block:: bash + + curl https://build.opensuse.org/projects/home:sebix:intelmq/public_key | sudo apt-key add - + +Please report any errors or improvements at `IntelMQ Issues `_. Thanks! + +PyPi +^^^^ + +.. code-block:: bash + + sudo -i + + pip3 install intelmq + + useradd -d /opt/intelmq -U -s /bin/bash intelmq + sudo intelmqsetup + +`intelmqsetup` will create all necessary directories, provides a default configuration for new setups. See the :ref:`configuration` for more information on them and how to influence them. + +Additional Information +^^^^^^^^^^^^^^^^^^^^^^ + +Following any one of the installation methods mentioned before, will setup the IntelMQ base. However, some bots may have additional dependencies which are mentioned in their :doc:`own documentation `). diff --git a/docs/user/intelmqctl.rst b/docs/user/intelmqctl.rst new file mode 100644 index 000000000..a433c8138 --- /dev/null +++ b/docs/user/intelmqctl.rst @@ -0,0 +1,452 @@ +intelmqctl documentation +======================== + +.. contents:: + +Introduction +------------ + +intelmqctl is the main tool to handle a intelmq installation. +It handles the bots themselves and has some tools to handle the installation. + +Output type +----------- + +intelmqctl can be used as command line tool, as library and as tool by other programs. +If called directly, it will print all output to the console (stderr). +If used as python library, the python types themselves are returned. +The third option is to use machine-readable JSON as output (used by other managing tools). + +Manage individual bots +---------------------- + +As all init systems, intelmqctl has the methods start, stop, restart, reload and status. + +start +^^^^^ + +This will start the bot with the ID `file-output`. A file with it's PID will be created in `/opt/intelmq/var/run/[bot-id].pid`. + +.. code-block:: bash + + > intelmqctl start file-output + Starting file-output... + file-output is running. + +If the bot is already running, it won't be started again: + +.. code-block:: bash + + > intelmqctl start file-output + file-output is running. + +stop +^^^^^^^^^^^^^^^ + +If the PID file does exist, a SIGINT will be sent to the process. After 0.25s we check if the process is running. If not, the PID file will be removed. + +.. code-block:: bash + + > intelmqctl stop file-output + Stopping file-output... + file-output is stopped. + +If there's no running bot, there's nothing to do. + +.. code-block:: bash + + > intelmqctl stop file-output + file-output was NOT RUNNING. + +If the bot did not stop in 0.25s, intelmqctl will say it's still running: + +.. code-block:: bash + + > intelmqctl stop file-output + file-output is still running + +status +^^^^^^^^^^^^^^^ + +Checks for the PID file and if the process with the given PID is alive. If the PID file exists, but the process does not exist, it will be removed. + +.. code-block:: bash + + > intelmqctl status file-output + file-output is stopped. + > intelmqctl start file-output + Starting file-output... + file-output is running. + > intelmqctl status file-output + file-output is running. + +restart +^^^^^^^^^^^^^^^ + +The same as stop and start consecutively. + +.. code-block:: bash + + > intelmqctl restart file-output + Stopping file-output... + file-output is stopped. + Starting file-output... + file-output is running. + +reload +^^^^^^^^^^^^^^^ + +Sends a SIGHUP to the bot, which will then reload the configuration. + +.. code-block:: bash + + > intelmqctl reload file-output + Reloading file-output ... + file-output is running. + +If the bot is not running, we can't reload it: +.. code-block:: bash + + > intelmqctl reload file-output + file-output was NOT RUNNING. + +run +^^^^^^^^^^^^^^^ + +Run a bot directly for debugging purpose. + +If launched with no arguments, the bot will call its init method and start processing messages as usual – but you see everything happens. + +.. code-block:: bash + + > intelmqctl run file-output + file-output: RestAPIOutputBot initialized with id file-output and version 3.5.2 as process 12345. + file-output: Bot is starting. + file-output: Loading source pipeline and queue 'file-output-queue'. + file-output: Connected to source queue. + file-output: No destination queues to load. + file-output: Bot initialization completed. + file-output: Waiting for incoming message. + +Should you get lost any time, just use the **--help** after any argument for further explanation. + +.. code-block:: bash + + > intelmqctl run file-output --help + +Note that if another instance of the bot is running, only warning will be displayed. + +.. code-block:: bash + + > intelmqctl run file-output + Main instance of the bot is running in the background. You may want to launch: intelmqctl stop file-output + +You can set the log level with the `-l` flag, e.g. `-l DEBUG`. For the 'console' subcommand, 'DEBUG' is the default. + +console +^^^^^^^ + +If launched with **console** argument, you get a ```pdb``` live console; or ```ipdb``` or ```pudb``` consoles if they were previously installed (I.E. ```pip3 install ipdb --user```). + +.. code-block:: bash + + > intelmqctl run file-output console + *** Using console ipdb. Please use 'self' to access to the bot instance properties. *** + ipdb> self. ... + +You may specify the desired console in the next argument. + +.. code-block:: bash + + > intelmqctl run file-output console pudb + +message +^^^^^^^ + +Operate directly with the input / output pipelines. + +If **get** is the parameter, you see the message that waits in the input (source or internal) queue. If the argument is **pop**, the message gets popped as well. + +.. code-block:: bash + + > intelmqctl run file-output message get + file-output: Waiting for a message to get... + { + "classification.type": "c&c", + "feed.url": "https://example.com", + "raw": "1233", + "source.ip": "1.2.3.4", + "time.observation": "2017-05-17T22:00:33+00:00", + "time.source": "2017-05-17T22:00:32+00:00" + } + +To send directly to the bot's ouput queue, just as it was sent by ```self.send_message()``` in bot's ```process()``` method, use the **send** argument. +In our case of ```file-output```, it has no destionation queue so that nothing happens. + +.. code-block:: bash + + > intelmqctl run file-output message send '{"time.observation": "2017-05-17T22:00:33+00:00", "time.source": "2017-05-17T22:00:32+00:00"}' + file-output: Bot has no destination queues. + +Note, if you would like to know possible parameters of the message, put a wrong one – you will be prompted if you want to list all the current bot harmonization. + +process +^^^^^^^ + +With no other arguments, bot\'s ```process()``` method will be run one time. + +.. code-block:: bash + + > intelmqctl run file-output process + file-output: Bot is starting. + file-output: Bot initialization completed. + file-output: Processing... + file-output: Waiting for incoming message. + file-output: Received message {'raw': '1234'}. + +If run with **--dryrun|-d** flag, the message gets never really popped out from the source or internal pipeline, nor sent to the output pipeline. +Plus, you receive a note about the exact moment the message would get sent, or acknowledged. If the message would be sent to a non-default path, the name of this path is printed on the console. + +.. code-block:: bash + + > intelmqctl run file-output process -d + file-output: * Dryrun only, no message will be really sent through. + ... + file-output: DRYRUN: Message would be acknowledged now! + +You may trick the bot to process a JSON instead of the Message in its pipeline with **--msg|-m** flag. + +.. code-block:: bash + + > intelmqctl run file-output process -m '{"source.ip":"1.2.3.4"}' + file-output: * Message from cli will be used when processing. + ... + +If you wish to display the processed message as well, you the **--show-sent|-s** flag. Then, if sent through (either with `--dryrun` or without), the message gets displayed as well. + + +disable +^^^^^^^ + +Sets the `enabled` flag in the runtime configuration of the bot to `false`. +By default, all bots are enabled. + +Example output: + +.. code-block:: bash + + > intelmqctl status file-output + file-output is stopped. + > intelmqctl disable file-output + > intelmqctl status file-output + file-output is disabled. + +enable +^^^^^^^^^^^^^^^ + +Sets the `enabled` flag in the runtime configuration of the bot to `true`. + +Example output: + +.. code-block:: bash + + > intelmqctl status file-output + file-output is disabled. + > intelmqctl enable file-output + > intelmqctl status file-output + file-output is stopped. + +Manage the botnet +------------------ + +In IntelMQ, the botnet is the set of all currently configured and enabled bots. +All configured bots have their configuration in runtime.conf and their queues in pipeline.conf. +By default, all bots are enabled. To disable a bot set `enabled` to `false`. +Also see [Bots.md](Bots) and [User-Guide.md#runtime-configuration](User Guide: Runtime Configuration). + +If not bot id is given, the command applies to all bots / the botnet. +All commands except the start action are applied to all bots. +But only enabled bots are started. + +In the examples below, a very minimal botnet is used. + +start +^^^^^^^^^^^^^^^ + +The start action applies to all bots which are enabled. + +.. code-block:: bash + + > intelmqctl start + Starting abusech-domain-parser... + abusech-domain-parser is running. + Starting abusech-feodo-domains-collector... + abusech-feodo-domains-collector is running. + Starting deduplicator-expert... + deduplicator-expert is running. + file-output is disabled. + Botnet is running. + +As we can file-output is disabled and thus has not been started. You can always explicitly start disabled bots. + +stop +^^^^^^^^^^^^^^^ +The stop action applies to all bots. Assume that all bots have been running: + +.. code-block:: bash + + > intelmqctl stop + Stopping Botnet... + Stopping abusech-domain-parser... + abusech-domain-parser is stopped. + Stopping abusech-feodo-domains-collector... + abusech-feodo-domains-collector is stopped. + Stopping deduplicator-expert... + deduplicator-expert is stopped. + Stopping file-output... + file-output is stopped. + Botnet is stopped. + +status +^^^^^^^^^^^^^^^ + +With this command we can see the status of all configured bots. Here, the botnet was started beforehand: + +.. code-block:: bash + + > intelmqctl status + abusech-domain-parser is running. + abusech-feodo-domains-collector is running. + deduplicator-expert is running. + file-output is disabled. + +And if the disabled bot has also been started: + +.. code-block:: bash + + > intelmqctl status + abusech-domain-parser is running. + abusech-feodo-domains-collector is running. + deduplicator-expert is running. + file-output is running. + +If the botnet is stopped, the output looks like this: + +.. code-block:: bash + + > intelmqctl status + abusech-domain-parser is stopped. + abusech-feodo-domains-collector is stopped. + deduplicator-expert is stopped. + file-output is disabled. + +restart +^^^^^^^^^^^^^^^ +The same as start and stop consecutively. + +reload +^^^^^^^^^^^^^^^ +The same as reload of every bot. + +enable / disable +^^^^^^^^^^^^^^^^ +The sub commands `enable` and `disable` set the corresponding flags in runtime.conf. + +.. code-block:: bash + + > intelmqctl status + file-output is stopped. + malware-domain-list-collector is stopped. + malware-domain-list-parser is stopped. + > intelmqctl disable file-output + > intelmqctl status + file-output is disabled. + malware-domain-list-collector is stopped. + malware-domain-list-parser is stopped. + > intelmqctl enable file-output + > intelmqctl status + file-output is stopped. + malware-domain-list-collector is stopped. + malware-domain-list-parser is stopped. + +List bots +--------------- +`intelmqctl list bots` does list all configured bots and their description. + +List queues +--------------- +`intelmqctl list queues` shows all queues which are currently in use according to the configuration and how much events are in it: + +.. code-block:: bash + + > intelmqctl list queues + abusech-domain-parser-queue - 0 + abusech-domain-parser-queue-internal - 0 + deduplicator-expert-queue - 0 + deduplicator-expert-queue-internal - 0 + file-output-queue - 234 + file-output-queue-internal - 0 + +Use the `-q` or `--quiet` flag to only show non-empty queues: + +.. code-block:: bash + + > intelmqctl list queues -q + file-output-queue - 234 + +The `--sum` or `--count` flag will show the sum of events on all queues: + +.. code-block:: bash + + > intelmqctl list queues --sum + 42 + +Log +--------------- + +intelmqctl can show the last log lines for a bot, filtered by the log level. + +See the help page for more information. + +Check +--------------- +This command will do various sanity checks on the installation and especially the configuration. + +Orphaned Queues +^^^^^^^^^^^^^^^ + +The `intelmqctl check` tool can search for orphaned queues. "Orphaned queues" are queues that have been used in the past and are no longer in use. For example you had a bot which you removed or renamed afterwards, but there were still messages in it's source queue. The source queue won't be renamed automatically and is now disconnected. As this queue is no longer configured, it won't show up in the list of IntelMQ's queues too. In case you are using redis as message broker, you can use the `redis-cli` tool to examine or remove these queues: + +.. code-block:: bash + + redis-cli -n 2 + keys * # lists all existing non-empty queues + llen [queue-name] # shows the length of the queue [queue-name] + lindex [queue-name] [index] # show the [index]'s message of the queue [queue-name] + del [queue-name] # remove the queue [queue-name] + +To ignore certain queues in this check, you can set the parameter `intelmqctl_check_orphaned_queues_ignore` in the *defaults* configuration file. For example: + +.. code-block:: json + + "intelmqctl_check_orphaned_queues_ignore": ["Taichung-Parser"], + +Configuration upgrade +---------------------- +The `intelmqctl upgrade-config` function upgrade, upgrade the configuration from previous versions to the current one. +It keeps track of previously installed versions and the result of all "upgrade functions" in the "state file", locate in the `$var_state_path/state.json` (`/opt/intelmq/var/lib/state.json` or `/var/lib/intelmq/state.json`). + +This function has been introduced in version 2.0.1. + +It makes backups itself for all changed files before every run. Backups are overridden if they already exists. So make sure to always have a backup of your configuration just in case. + +Exit code +--------------- +In case of errors, unsuccessful operations, the exit code is higher than 0. +For example, when running `intelmqctl start` and one enabled bot is not running, the exit code is 1. +The same is valid for e.g. `intelmqctl status`, which can be used for monitoring, and all other operations. + +Known issues +--------------- + +The currently implemented process managing using PID files is very erroneous. diff --git a/docs/user/introduction.rst b/docs/user/introduction.rst new file mode 100644 index 000000000..f922645ab --- /dev/null +++ b/docs/user/introduction.rst @@ -0,0 +1,84 @@ +############ +Introduction +############ + +***** +About +***** + +**IntelMQ** is a solution for IT security teams (CERTs & CSIRTs, SOCs abuse +departments, etc.) for collecting and processing security feeds (such as +log files) using a message queuing protocol. It's a community driven +initiative called **IHAP** (Incident Handling Automation Project) which +was conceptually designed by European CERTs/CSIRTs during several +InfoSec events. Its main goal is to give to incident responders an easy +way to collect & process threat intelligence thus improving the incident +handling processes of CERTs. + +**Incident Handling Automation Project** + +- **URL:** +- **Mailing-list:** + +Several pieces of software are evolved around IntelMQ. For an overview, +look at the :doc:`ecosystem`. + +IntelMQ can be used for +- automated incident handling +- situational awareness +- automated notifications +- as data collector for other tools +- etc. + +IntelMQ's design was influenced by +`AbuseHelper `__ however it was +re-written from scratch and aims at: + +- Reducing the complexity of system administration +- Reducing the complexity of writing new bots for new data feeds +- Reducing the probability of events lost in all process with + persistence functionality (even system crash) +- Use and improve the existing Data Harmonization Ontology +- Use JSON format for all messages +- Provide easy way to store data into Log Collectors like + ElasticSearch, Splunk, databases (such as PostgreSQL) +- Provide easy way to create your own black-lists +- Provide easy communication with other systems via HTTP RESTful API + +It follows the following basic meta-guidelines: + +- Don't break simplicity - KISS +- Keep it open source - forever +- Strive for perfection while keeping a deadline +- Reduce complexity/avoid feature bloat +- Embrace unit testing +- Code readability: test with unexperienced programmers +- Communicate clearly + +***** +Usage +***** + +Various approaches of installing `intelmq` are described in :doc:`installation`. + +The :doc:`configuration-management` gives an overview how a `intelmq` installation is set up and how to configure and maintain the setup. +There is also a list of available :doc:`Feeds` as well as a detailed description of the different :doc:`bots` intelmq brings with it. + +If you know additional feeds and how to parse them, please contribute your code or your configuration (by issues or the mailing lists). + +For support questions please use the |intelmq-users-list-link|. + +IntelMQ Manager +=============== + +Check out `this graphical tool `_ to easily manage an IntelMQ system. + +********** +Contribute +********** + +- Subscribe to the |intelmq-developers-list-link| +- Watch out for our regular developers conf call +- IRC: server: irc.freenode.net, channel: \#intelmq +- Via github issues +- Via Pull requests (please do read help.github.com first) diff --git a/docs/guides/n6-integrations.md b/docs/user/n6-integrations.rst similarity index 81% rename from docs/guides/n6-integrations.md rename to docs/user/n6-integrations.rst index 03ef5da91..77f5d75ff 100644 --- a/docs/guides/n6-integrations.md +++ b/docs/user/n6-integrations.rst @@ -1,4 +1,5 @@ -# IntelMQ - n6 Integration +IntelMQ - n6 Integration +======================== n6 is an Open Source Tool with very similar aims as IntelMQ, processing and distributing IoC data, developed by CERT.pl. The covered use-cases differ and both tools have non-overlapping strengths. @@ -7,15 +8,18 @@ Information about n6 can be found here: - Website: https://n6.cert.pl/en/ - Development: https://github.com/CERT-Polska/n6/ -![n6 schema](https://n6.cert.pl/n6-schemat2.png) +.. figure:: https://n6.cert.pl/n6-schemat2.png + :alt: n6 schema -## Data format +Data format +------------------------------- The internal data representation differs for the systems, so any data exchanged between the systems needs to be converted. As n6 can save multiple IP addresses per event, which IntelMQ is unable to do, one n6 event results in one or more IntelMQ events. Thus and because of some other reasons, the conversion is *not* bidirectional. -## Data exchange interface +Data exchange interface +------------------------------- n6 offers a STOMP interface via the RabbitMQ broker, which can be used for both sending and receiving data. IntelMQ has both a STOMP collector bot as well as a STOMP output bot. @@ -24,13 +28,15 @@ IntelMQ has both a STOMP collector bot as well as a STOMP output bot. - [IntelMQ's n6 parser bot](Bots.html#n6) - [IntelMQ's Stomp output bot](Bots.html#stomp-1) -## Data conversion +Data conversion +------------------------------- IntelMQ can parse n6 data using the n6 parser and n6 can parse IntelMQ data using the Intelmq2n6 parser. - [IntelMQ's n6 parser bot](Bots.html#n6) -## Webinput CSV +Webinput CSV +------------------------------- The IntelMQ Webinput CSV software can also be used together with n6. The documentation can be found in the software's repository: diff --git a/docs/user/upgrade.rst b/docs/user/upgrade.rst new file mode 100644 index 000000000..4f88fd513 --- /dev/null +++ b/docs/user/upgrade.rst @@ -0,0 +1,84 @@ +Upgrade instructions +==================== + +.. contents:: + +For installation instructions, see :doc:`installation`. + +Read NEWS.md +------------ + +Read the `NEWS.md `_ file to look for things you need to have a look at. + +Stop IntelMQ and create a Backup +-------------------------------- + +* Make sure that your IntelMQ system is completely stopped: `intelmqctl stop` +* Create a backup of IntelMQ Home directory, which includes all configurations. They are not overwritten, but backups are always nice to have! + +.. code-block:: bash + + sudo cp -R /opt/intelmq /opt/intelmq-backup + +Upgrade IntelMQ +--------------- + +Before upgrading, check that your setup is clean and there are no events in the queues: + +.. code-block:: bash + + intelmqctl check + intelmqctl list queues -q + +The upgrade depends on how you installed IntelMQ. + +Packages +^^^^^^^^ + +Use your systems package management. + +PyPi +^^^^ + +.. code-block:: bash + + pip install -U --no-deps intelmq + sudo intelmqsetup + +Using `--no-deps` will not upgrade dependencies, which would probably overwrite the system's libraries. +Remove this option to also upgrade dependencies. + +Local repository +^^^^^^^^^^^^^^^^ + +If you have an editable installation, refer to the instructions in the [Developers Guide](Developers-Guide.html#development-environment). + +Update the repository depending on your setup (e.g. `git pull origin master`). + +And run the installation again: + +.. code-block:: bash + + pip install . + sudo intelmqsetup + +For editable installations (development only), run `pip install -e .` instead. + +Upgrade configuration and check the installation +------------------------------------------------ + +Go through `NEWS.md `_ and apply necessary adaptions to your setup. +If you have adapted IntelMQ's code, also read the `CHANGELOG.md `_. + +Check your installation and configuration to detect any problems: + +.. code-block:: bash + + intelmqctl upgrade-config + intelmqctl check + +## Start IntelMQ + +.. code-block:: bash + + intelmqctl start