Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
cccs-shellyw committed Feb 13, 2024
0 parents commit db25b09
Show file tree
Hide file tree
Showing 51 changed files with 3,705 additions and 0 deletions.
33 changes: 33 additions & 0 deletions .github/workflows/build-pipeline.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Java CI with Gradle

on:
push:
tags:
- '*'


permissions:
# need contents write permission to create release... careful with this because it is a broad permission
contents: write

jobs:
build-release-jar:
name: build and release Kangooroo ShadowJar
runs-on: ubuntu-latest

steps:
- uses: actions/[email protected]
- name: Set up JDK 11
uses: actions/[email protected]
with:
java-version: '11'
distribution: 'temurin'
- name: Build with Gradle
uses: gradle/gradle-build-action@v3
with:
gradle-version: 7.0.2
- run: gradle shadowJar
- uses: softprops/action-gh-release@v1
if: startsWith(github.ref, 'refs/tags/')
with:
files: build/libs/KangoorooStandalone.jar
144 changes: 144 additions & 0 deletions .github/workflows/test-pipeline.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
name: Java Test CI with Gradle

on:
push:
# only trigger on branches, not on tags
branches: '**'

jobs:
unit-test:
name: Unit Test Kangooroo Source Code
runs-on: ubuntu-latest

steps:
- uses: actions/[email protected]
- name: Set up JDK 11
uses: actions/[email protected]
with:
java-version: '11'
distribution: 'temurin'
- name: Build with Gradle
uses: gradle/gradle-build-action@v3
with:
gradle-version: 7.0.2
- run: gradle test
- name: 'Upload Artifact'
uses: actions/upload-artifact@v4
if: always()
with:
name: test-result
path: build/reports/tests/
retention-days: 5

no-proxy-integration-test:
name: Integration Test Kangooroo without Proxy
runs-on: ubuntu-latest

steps:
- uses: actions/[email protected]
- name: Set up JDK 11
uses: actions/[email protected]
with:
java-version: '11'
distribution: 'temurin'
- name: Build with Gradle
uses: gradle/gradle-build-action@v3
with:
gradle-version: 7.0.2
- run: gradle shadowJar
- uses: browser-actions/[email protected]
with:
chrome-version: 120
- uses: nanasess/setup-chromedriver@v2
with:
chromedriver-version: '120.0.6099.109'
- name: Get all files ready to run Kangooroo
run: cp /usr/local/bin/chromedriver . && mkdir output && mkdir tmp && cp build/libs/KangoorooStandalone.jar .

- name: Run Kangooroo with default settings
run: java -jar KangoorooStandalone.jar --url https://duckduckgo.com/

- name: Check all output files of Kangooroo is present
run: |
ls --format=commas output/168ce875a2188cce97924a11f6a918df/ > output1.txt
cmp --silent test/integration_test/assert_output_files.txt output1.txt && echo "All expected output files are present." || exit 1
- name: Check if tmp folder is empty
run: ls -l tmp | grep -q "total 0" || exit 1
- name: Check if result.json have expected keys
run: |
grep -qe "response_code" output/168ce875a2188cce97924a11f6a918df/results.json || exit 1
grep -qe "connection_success" output/168ce875a2188cce97924a11f6a918df/results.json || exit 1
grep -qe "requested_url" output/168ce875a2188cce97924a11f6a918df/results.json || exit 1
grep -qe "requested_url_ip" output/168ce875a2188cce97924a11f6a918df/results.json || exit 1
grep -qe "actual_url" output/168ce875a2188cce97924a11f6a918df/results.json || exit 1
grep -qe "actual_url_ip" output/168ce875a2188cce97924a11f6a918df/results.json || exit 1
grep -qe "has_timed_out" output/168ce875a2188cce97924a11f6a918df/results.json || exit 1
grep -qe "processing_time" output/168ce875a2188cce97924a11f6a918df/results.json || exit 1
grep -qe "creationDate" output/168ce875a2188cce97924a11f6a918df/results.json || exit 1
grep -qe "engineName" output/168ce875a2188cce97924a11f6a918df/results.json || exit 1
grep -qe "engineVersion" output/168ce875a2188cce97924a11f6a918df/results.json || exit 1
with-proxy-integration-test:
name: Integration Test Kangooroo with Proxy
runs-on: ubuntu-latest

services:
proxypy:
image: abhinavsingh/proxy.py
ports:
- 8899:8899

steps:
- uses: actions/[email protected]
- name: Set up JDK 11
uses: actions/[email protected]
with:
java-version: '11'
distribution: 'temurin'

- name: Build with Gradle
uses: gradle/gradle-build-action@v3
with:
gradle-version: 7.0.2
- run: gradle shadowJar

- uses: browser-actions/[email protected]
with:
chrome-version: 120

- uses: nanasess/setup-chromedriver@v2
with:
chromedriver-version: '120.0.6099.109'

- name: Get all files ready to run Kangooroo
run: cp /usr/local/bin/chromedriver . && mkdir output2 && mkdir tmp && cp build/libs/KangoorooStandalone.jar .

- name: Run Kangooroo with Proxy
run: java -jar KangoorooStandalone.jar -cf test/integration_test/test_conf.yml --url https://duckduckgo.com/

- name: Check all output files of Kangooroo is present
run: |
ls --format=commas output2/168ce875a2188cce97924a11f6a918df/ > output2.txt
cmp --silent test/integration_test/assert_output_files.txt output2.txt && echo "All expected output files are present." || exit 1
- name: Check if kangooroo connected to the proxy
run: |
sleep 10
docker logs "${{ job.services.proxypy.id }}" >& output3.txt
grep -qe "duckduckgo.com:443" output3.txt
- name: Check if tmp folder is empty
run: ls -l tmp | grep -q "total 0" || exit 1
- run: cat output2/168ce875a2188cce97924a11f6a918df/results.json
- name: Check if result.json have expected keys
run: |
grep -qe "response_code" output2/168ce875a2188cce97924a11f6a918df/results.json || exit 1
grep -qe "connection_success" output2/168ce875a2188cce97924a11f6a918df/results.json || exit 1
grep -qe "requested_url" output2/168ce875a2188cce97924a11f6a918df/results.json || exit 1
grep -qe "actual_url" output2/168ce875a2188cce97924a11f6a918df/results.json || exit 1
grep -qe "has_timed_out" output2/168ce875a2188cce97924a11f6a918df/results.json || exit 1
grep -qe "processing_time" output2/168ce875a2188cce97924a11f6a918df/results.json || exit 1
grep -qe "creationDate" output2/168ce875a2188cce97924a11f6a918df/results.json || exit 1
grep -qe "engineName" output2/168ce875a2188cce97924a11f6a918df/results.json || exit 1
grep -qe "engineVersion" output2/168ce875a2188cce97924a11f6a918df/results.json || exit 1
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/.gradle/
/build/
.idea/
11 changes: 11 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
MIT License

Copyright (c) 2024 Crown Copyright, Government of Canada (Canadian Centre for Cyber Security / Communications Security Establishment)

Copyright title to all 3rd party software distributed with Kangooroo is held by the respective copyright holders as noted in those files. Users are asked to read the 3rd Party Licenses referenced with those assets.

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
41 changes: 41 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Kangooroo

## Description

Kangooroo is a Java utility for crawling malicious URLs.
It is integrated with [Assemblyline's](https://github.com/CybercentreCanada/assemblyline) [URLDownloader](https://github.com/CybercentreCanada/assemblyline-service-urldownloader) service and can be used as a standalone commandline application.


## Build Instruction
We are using Java 11 with Gradle 7.0.2 for building the Kangooroo Jars. You can also use the gradle wrapper in the project to build the Jar.
To create a uber jar for Kangooroo to run as a standalone utility, run command `gradle shadowJar`. You should be able to find the `KangoorooStandalone.jar` in the build directory.

## Installation
- Have Java version 11 installed
- Have `chromium-browser` and a `chromedriver` that matches the chromium browser version installed (chromedriver can be downloaded from https://chromedriver.chromium.org/downloads)
- Have `chromedriver` and `KangoorooStandalone.jar` placed in the same folder

- if you want to configure simple logging (we have log4j included in the Jar), copy the file at [log4j2.xml](https://github.com/CybercentreCanada/kangooroo/blob/stage/resources/log4j2.xml) to your current directory and add this to your java command `java -Dlog4j2.configurationFile=./log4j2.xml`
- if you want to configure `output_folder` and `temporary_folder` location, copy over the `conf.yml` file and modify the value for `output_folder` and `temporary_folder` to your desired location.

## Usage
The program can be run with command `java -jar KangoorooStandalone.jar [ARGUMENTS]`

Running the program with logging configured:
` java -Dlog4j2.configurationFile=./log4j2.xml -jar KangoorooStandalone.jar [ARGUMENTS]`
- `url` the url that you wish to crawl. It should be full url that starts with "http://" or "https://"
- `url-type` Either `PHISHING` (default) or `SMISHING`. The `PHISHING` options sets the window size to `1280x720` and sets the user-agent to a desktop client. The `SMISHING` option sets the window size to be `375x667` and sets the user-agent to a phone client.

User can specify which directory for output and which directory for storing temporary files by modifying the [conf.yml](https://github.com/CybercentreCanada/kangooroo/blob/stage/resources/conf.yml) file and specify the new conf file with: `-cf path/to/conf/file`

The current default is `./tmp/` directory for temporary files and `./output/` for output result file. These two folder must exist on disk before running the program.


## Output
All files of interest would be in the **output_folder** specified in `conf.yml`. The result of the web crawl is stored in the directory `{output_folder}/{MD5 HASH of URL}/`.
No more zip files in the output directory, instead we have:
- favicon.ico : favicon of website if exist
- screenshot.png : a screenshot of the website of interest
- session.har : the HAR file of the communication with the website
- source.html: the source html file of the website of interest
- result.json: json summary of the url fetch result
99 changes: 99 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
plugins {
id 'java-library'
id 'maven-publish'
id 'eclipse'
id 'com.github.johnrengelman.shadow' version '7.0.0+'
}
repositories {
mavenCentral()
}

group 'ca.gc.cyber'
version '1.0.8'

eclipse {
project.natures 'org.eclipse.buildship.core.gradleprojectnature'
}

dependencies {

api 'com.browserup:browserup-proxy-core:2.1.2'
api('com.browserup:browserup-proxy:2.1.2') {
exclude group: 'org.apache.logging.log4j', module: 'log4j-slf4j-impl'
exclude group: 'org.slf4j', module: 'slf4j-simple'
exclude group: 'org.slf4j', module: 'slf4j-jdk14'
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
}

api 'org.yaml:snakeyaml:1.28'

api 'org.seleniumhq.selenium:selenium-api:3.141.59'
api 'org.seleniumhq.selenium:selenium-chrome-driver:3.141.59'
api 'org.seleniumhq.selenium:selenium-java:3.141.59'
api 'org.seleniumhq.selenium:selenium-remote-driver:3.141.59'
api 'org.seleniumhq.selenium:selenium-support:3.141.59'
api 'org.apache.httpcomponents:fluent-hc:4.5.13'

// logging dependencies
api 'org.apache.logging.log4j:log4j-api:2.22.0'
api 'org.apache.logging.log4j:log4j-core:2.22.0'
api 'org.apache.logging.log4j:log4j-slf4j2-impl:2.22.0'
api 'org.slf4j:slf4j-api:1.7.30'


api 'commons-cli:commons-cli:1.4'
api 'commons-io:commons-io:2.5'
api 'commons-net:commons-net:3.7.2'
api 'org.apache.commons:commons-lang3:3.11'
api 'org.apache.commons:commons-text:1.9'

api 'org.projectlombok:lombok:1.18.12'
api 'com.fasterxml.jackson.core:jackson-databind:2.12.1'
api 'io.netty:netty-all:4.1.45.Final'


// be able to build with lombok annotation
annotationProcessor 'org.projectlombok:lombok:1.18.12'

testImplementation 'junit:junit:4.12'
testImplementation "org.mockito:mockito-core:5.+"
testImplementation 'org.mockito:mockito-inline:5.2.0'

api('com.google.code.gson:gson:2.8.6') {
force = true
}

constraints {
implementation('org.apache.logging.log4j:log4j-api:2.16.0') {
because 'of the log4shell vulnerability'
}
}
}


sourceSets {
test.java.srcDirs = ['test/src']
main.java.srcDirs = ['src']
main.resources.srcDirs = ['resources']
}

jar {
zip64 = true
duplicatesStrategy "exclude"

manifest {
attributes(
'Main-Class': 'ca.gc.cyber.kangooroo.KangoorooStandaloneRunner',
'Class-Path': getProject().getConfigurations().runtimeClasspath.collect { 'lib/' + it.getName() }.join(' '),
'Multi-Release': true
)
}

}

shadowJar {
zip64 = true
archiveBaseName.set('KangoorooStandalone')
archiveClassifier.set('')
archiveVersion.set('')
}
Binary file added gradle/wrapper/gradle-wrapper.jar
Binary file not shown.
5 changes: 5 additions & 0 deletions gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
distributionUrl=https://services.gradle.org/distributions/gradle-7.0.2-bin.zip
Loading

0 comments on commit db25b09

Please sign in to comment.