From 00be47cd6aa7add63d52bab777614863e2cb7987 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20B=C3=A1n?= Date: Wed, 28 Apr 2021 13:06:26 +0200 Subject: [PATCH] Initial commit --- .dockerignore | 6 + .github/workflows/ci.yaml | 29 ++++ .gitignore | 4 + Dockerfile | 15 ++ LICENSE | 201 +++++++++++++++++++++++++ Makefile | 50 ++++++ README.md | 130 ++++++++++++++++ cmd/asnlookup-utils/convert.go | 92 +++++++++++ cmd/asnlookup-utils/main.go | 22 +++ cmd/asnlookup-utils/version.go | 18 +++ cmd/asnlookup/main.go | 93 ++++++++++++ go.mod | 9 ++ go.sum | 26 ++++ hack/pull_rib.sh | 9 ++ pkg/binarytrie/array.go | 118 +++++++++++++++ pkg/binarytrie/array_test.go | 20 +++ pkg/binarytrie/arraymarshaling.go | 103 +++++++++++++ pkg/binarytrie/arraymarshaling_test.go | 52 +++++++ pkg/binarytrie/errors.go | 14 ++ pkg/binarytrie/naive.go | 120 +++++++++++++++ pkg/binarytrie/naive_test.go | 106 +++++++++++++ pkg/binarytrie/optimize.go | 154 +++++++++++++++++++ pkg/binarytrie/optimize_test.go | 52 +++++++ pkg/binarytrie/types.go | 12 ++ pkg/binarytrie/util.go | 37 +++++ pkg/database/builder.go | 88 +++++++++++ pkg/database/database.go | 67 +++++++++ pkg/database/errors.go | 8 + pkg/database/mrt.go | 35 +++++ version.go | 4 + 30 files changed, 1694 insertions(+) create mode 100644 .dockerignore create mode 100644 .github/workflows/ci.yaml create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 README.md create mode 100644 cmd/asnlookup-utils/convert.go create mode 100644 cmd/asnlookup-utils/main.go create mode 100644 cmd/asnlookup-utils/version.go create mode 100644 cmd/asnlookup/main.go create mode 100644 go.mod create mode 100644 go.sum create mode 100755 hack/pull_rib.sh create mode 100644 pkg/binarytrie/array.go create mode 100644 pkg/binarytrie/array_test.go create mode 100644 pkg/binarytrie/arraymarshaling.go create mode 100644 pkg/binarytrie/arraymarshaling_test.go create mode 100644 pkg/binarytrie/errors.go create mode 100644 pkg/binarytrie/naive.go create mode 100644 pkg/binarytrie/naive_test.go create mode 100644 pkg/binarytrie/optimize.go create mode 100644 pkg/binarytrie/optimize_test.go create mode 100644 pkg/binarytrie/types.go create mode 100644 pkg/binarytrie/util.go create mode 100644 pkg/database/builder.go create mode 100644 pkg/database/database.go create mode 100644 pkg/database/errors.go create mode 100644 pkg/database/mrt.go create mode 100644 version.go diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..a12beb0 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,6 @@ +.idea/ +build/ +*.db +Dockerfile +README.md +rib.*.bz2 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..bc1c187 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,29 @@ +name: ci +on: + push: + branches: + - main + pull_request: +jobs: + build: + name: Build + runs-on: ubuntu-20.04 + container: + image: golang:1.16-alpine + steps: + - uses: actions/checkout@v2 + - name: Install dependencies + run: apk add --no-cache make + - name: Build + run: make + test: + name: Test + runs-on: ubuntu-20.04 + container: + image: golang:1.16-alpine + steps: + - uses: actions/checkout@v2 + - name: Install dependencies + run: apk add --no-cache gcc make musl-dev + - name: Test + run: make test diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..44bc046 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.idea/ +build/ +*.db +rib.*.bz2 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..93e7047 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +FROM golang:1.16-alpine3.13 AS builder + +RUN apk add --no-cache bash curl make wget +WORKDIR /go/src/asnlookup +COPY go.mod go.sum ./ +RUN go mod download +COPY . . +RUN make && make install + +FROM alpine:3.13 +COPY --from=builder /usr/local/bin/asnlookup /usr/local/bin/asnlookup-utils /usr/local/bin/ +USER nobody +ENV ASNLOOKUP_DB=/default.db +ENTRYPOINT ["/usr/local/bin/asnlookup"] +CMD [] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..71416ca --- /dev/null +++ b/Makefile @@ -0,0 +1,50 @@ +VERSION = 0.1.0 +BUILDDIR ?= build +BINDIR ?= /usr/local/bin +DOCKER_IMAGE ?= banviktor/asnlookup +GOOS ?= $(shell go env GOOS) +GOARCH ?= $(shell go env GOARCH) +DATE = $(shell date -u +%Y%m%d) + +.PHONY: build +build: $(BUILDDIR)/asnlookup $(BUILDDIR)/asnlookup-utils + +.PHONY: clean +clean: + rm -f $(BUILDDIR)/* + +.PHONY: deps +deps: + go mod download + +$(BUILDDIR)/asnlookup: deps + GOOS=$(GOOS) GOARCH=$(GOARCH) go build -o $(BUILDDIR)/asnlookup ./cmd/asnlookup + +$(BUILDDIR)/asnlookup-utils: deps + GOOS=$(GOOS) GOARCH=$(GOARCH) go build -o $(BUILDDIR)/asnlookup-utils ./cmd/asnlookup-utils + +.PHONY: release +release: + $(MAKE) clean + $(MAKE) + tar -zcf asnlookup-$(GOOS)-$(GOARCH)-v$(VERSION).tar.gz -C $(BUILDDIR) . + +release-all: + $(MAKE) release GOOS=linux GOARCH=amd64 + $(MAKE) release GOOS=linux GOARCH=arm64 + $(MAKE) release GOOS=linux GOARCH=386 + $(MAKE) release GOOS=darwin GOARCH=amd64 + $(MAKE) release GOOS=darwin GOARCH=arm64 + +.PHONY: test +test: + go test -race ./... + +.PHONY: install +install: + cp -f $(BUILDDIR)/asnlookup $(BINDIR)/ + cp -f $(BUILDDIR)/asnlookup-utils $(BINDIR)/ + +.PHONY: uninstall +uninstall: + rm -f $(BINDIR)/asnlookup $(BINDIR)/asnlookup-utils diff --git a/README.md b/README.md new file mode 100644 index 0000000..ec76c3c --- /dev/null +++ b/README.md @@ -0,0 +1,130 @@ +# asnlookup +CLI and Go package for fast, offline ASN lookups. + +A level compressed trie in array representation is used for achieving very fast +lookups with a small memory footprint. The level compression is user-tunable +between space-efficiency and time-efficiency. In LC-trie terms, the tuning +adjusts the fill factor of the redundancy-enabled level compression. + +Due to the array-represented trie and binary marshaling, the inflation of a +pre-converted database can be measured in tens of milliseconds. In other words +the CLI tool can even be used for one-off lookups without any perceivable +startup slowness. + +``` +time asnlookup --db ~/.asnlookup.db 8.8.8.8 +15169 + +real 0m0,027s +user 0m0,025s +sys 0m0,018s +``` + +## Installation + +Using prebuilt binaries: +```shell +curl -fsSL https://github.com/banviktor/asnlookup/releases/download/v0.1.0/asnlookup-linux-amd64-v0.1.0.tar.gz | sudo tar -zx -C /usr/local/bin +``` + +From source: +```shell +make +sudo make install +``` + +## Usage + +### CLI + +1. Download a fresh RIB dump, e.g. from http://archive.routeviews.org/: + ```shell + ./hack/pull_rib.sh + ``` +2. Convert it to `asnlookup`'s own format: + ```shell + bzcat rib.*.bz2 | asnlookup-utils convert --input - --output /path/to/my.db + ``` +3. Use it with `asnlookup`: + ```shell + asnlookup --db /path/to/my.db 8.8.8.8 + ``` + or using the `ASNLOOKUP_DB` environment variable: + ```shell + export ASNLOOKUP_DB=/path/to/my.db + asnlookup 8.8.8.8 + ``` + +#### Batch lookups + +You may also do batch lookups for IPs provided to standard input using the +`--batch` flag: +```shell +echo -ne '1.1.1.1\n8.8.8.8\n' | asnlookup --db ~/.asnlookup.db --batch +13335 +15169 +``` +If you have tons of IPs to check, this will be a lot faster than inflating the +multi-megabyte database each time `asnlookup` is invoked. + +### Go package + +1. Build a database + + * Manually: + ```go + builder := database.NewBuilder() + + _, prefix, _ := net.ParseCIDR("8.8.0.0/16") + err := builder.InsertMapping(prefix, 420) + if err != nil { + panic(err) + } + + db, err := builder.Build() + if err != nil { + panic(err) + } + ``` + + * Using an MRT file: + ```go + mrtFile, err := os.OpenFile("/path/to/file.mrt", os.O_RDONLY, 0) + if err != nil { + panic(err) + } + defer mrtFile.Close() + + builder := database.NewBuilder() + if err = builder.ImportMRT(mrtFile); err != nil { + panic(err) + } + + db, err := builder.Build() + if err != nil { + panic(err) + } + ``` + + * Using a marshaled database (see `asnlookup-utils convert`): + ```go + dbFile, err := os.OpenFile("/path/to/file.db", os.O_RDONLY, 0) + if err != nil { + panic(err) + } + defer dbFile.Close() + + db, err := database.NewFromDump(dbFile) + if err != nil { + panic(err) + } + ``` + +2. Look things up! + ```go + as, err := db.Lookup(net.ParseIP("8.8.8.8")) + if err != nil { + panic(err) + } + fmt.Println(as.Number) + ``` diff --git a/cmd/asnlookup-utils/convert.go b/cmd/asnlookup-utils/convert.go new file mode 100644 index 0000000..c1c5314 --- /dev/null +++ b/cmd/asnlookup-utils/convert.go @@ -0,0 +1,92 @@ +package main + +import ( + "github.com/banviktor/asnlookup/pkg/database" + "github.com/urfave/cli/v2" + "log" + "os" +) + +var convertCommand = &cli.Command{ + Name: "convert", + Usage: "converts an MRT file to an asnlookup database", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "input", + Aliases: []string{"i", "in"}, + Required: true, + Usage: "input MRT `file`", + }, + &cli.StringFlag{ + Name: "output", + Aliases: []string{"o", "out"}, + Required: true, + Usage: "output destination `file`", + }, + &cli.IntFlag{ + Name: "optimization", + Value: 5, + Usage: "set optimization `level` (1 - smallest, 8 - fastest)", + }, + }, + Action: convertAction, +} + +func convertAction(ctx *cli.Context) error { + var err error + + optimization := ctx.Int("optimization") + if optimization < 1 || optimization > 8 { + log.Fatalf("Optimization level must be between 1 and 8") + } + + // Initialize input. + inFile := os.Stdin + inFilePath := ctx.String("input") + if inFilePath != "-" { + inFile, err = os.OpenFile(inFilePath, os.O_RDONLY, 0) + if err != nil { + log.Fatalf("Failed to open input file: %v", err) + } + defer inFile.Close() + } + + // Initialize output. + outFile := os.Stdout + outFilePath := ctx.String("output") + if outFilePath != "-" { + outFile, err = os.OpenFile(outFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + log.Fatalf("Failed to create output file: %v", err) + } + defer outFile.Close() + } + + // Build database. + builder := database.NewBuilder() + err = builder.ImportMRT(inFile) + if err != nil { + log.Fatalf("Failed to import: %v", err) + } + builder.SetFillFactor(optimizationLevelToFillFactor(optimization)) + db, err := builder.Build() + if err != nil { + log.Fatalf("Failed to build database: %v", err) + } + + // Dump optimized database. + data, err := db.MarshalBinary() + if err != nil { + log.Fatalf("Unexpected error: %v", err) + } + _, err = outFile.Write(data) + if err != nil { + log.Fatalf("Unexpected error: %v", err) + } + + return nil +} + +func optimizationLevelToFillFactor(level int) float32 { + return float32(9-level) * 0.125 +} diff --git a/cmd/asnlookup-utils/main.go b/cmd/asnlookup-utils/main.go new file mode 100644 index 0000000..2ad28d0 --- /dev/null +++ b/cmd/asnlookup-utils/main.go @@ -0,0 +1,22 @@ +package main + +import ( + "github.com/urfave/cli/v2" + "log" + "os" +) + +func main() { + app := &cli.App{ + Name: "asnlookup-utils", + Usage: "utilities for asnlookup", + Commands: []*cli.Command{ + convertCommand, + versionCommand, + }, + } + + if err := app.Run(os.Args); err != nil { + log.Fatal(err) + } +} diff --git a/cmd/asnlookup-utils/version.go b/cmd/asnlookup-utils/version.go new file mode 100644 index 0000000..18a3b83 --- /dev/null +++ b/cmd/asnlookup-utils/version.go @@ -0,0 +1,18 @@ +package main + +import ( + "fmt" + "github.com/banviktor/asnlookup" + "github.com/urfave/cli/v2" +) + +var versionCommand = &cli.Command{ + Name: "version", + Usage: "", + Action: versionAction, +} + +func versionAction(_ *cli.Context) error { + fmt.Printf("asnlookup-utils v%s\n", asnlookup.Version) + return nil +} diff --git a/cmd/asnlookup/main.go b/cmd/asnlookup/main.go new file mode 100644 index 0000000..c3a7ebe --- /dev/null +++ b/cmd/asnlookup/main.go @@ -0,0 +1,93 @@ +package main + +import ( + "bufio" + "flag" + "fmt" + "github.com/banviktor/asnlookup" + "github.com/banviktor/asnlookup/pkg/database" + "net" + "os" +) + +const ( + databaseFilenameEnvVar = "ASNLOOKUP_DB" +) + +func main() { + // Initialize flags. + dbFilename := flag.String( + "db", + os.Getenv(databaseFilenameEnvVar), + fmt.Sprintf("database file to use (env: %s)", databaseFilenameEnvVar), + ) + batch := flag.Bool( + "batch", + false, + "process IPs from stdin", + ) + version := flag.Bool( + "version", + false, + "print version information and exit", + ) + flag.Usage = func() { + fmt.Printf("Usage: asnlookup [OPTION]... [IP]\n") + flag.PrintDefaults() + } + + // Check provided arguments. + flag.Parse() + if *version { + fmt.Printf("asnlookup v%s\n", asnlookup.Version) + os.Exit(0) + } + if !*batch && flag.NArg() != 1 { + fmt.Println("Missing argument: IP") + flag.Usage() + os.Exit(1) + } + if *dbFilename == "" { + fmt.Println("Missing required option: db") + flag.Usage() + os.Exit(1) + } + + // Inflate database. + dbFile, err := os.OpenFile(*dbFilename, os.O_RDONLY, 0) + if err != nil { + fmt.Println("Failed to open database file:", err) + os.Exit(1) + } + defer dbFile.Close() + db, err := database.NewFromDump(dbFile) + if err != nil { + fmt.Println("Failed to parse database file:", err) + os.Exit(1) + } + + // Do the lookup(s). + if *batch { + r := bufio.NewScanner(os.Stdin) + for r.Scan() { + ip := net.ParseIP(r.Text()) + lookup(db, &ip) + } + } else { + ip := net.ParseIP(flag.Arg(0)) + lookup(db, &ip) + } +} + +func lookup(db database.Database, ip *net.IP) { + as, err := db.Lookup(*ip) + if err == database.ErrNotFound { + fmt.Println("not found") + return + } + if err != nil { + fmt.Println("error:", err) + return + } + fmt.Println(as.Number) +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..862ffd3 --- /dev/null +++ b/go.mod @@ -0,0 +1,9 @@ +module github.com/banviktor/asnlookup + +go 1.16 + +require ( + github.com/kaorimatz/go-mrt v0.0.0-20210326003454-aa11f3646f93 + github.com/stretchr/testify v1.7.0 + github.com/urfave/cli/v2 v2.3.0 +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..d11470c --- /dev/null +++ b/go.sum @@ -0,0 +1,26 @@ +github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d h1:U+s90UTSYgptZMwQh2aRr3LuazLJIa+Pg3Kc1ylSYVY= +github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/kaorimatz/go-mrt v0.0.0-20210326003454-aa11f3646f93 h1:w3cdcsXZUIL9cAD3jlAPtsjIAOyGbgMRGfNcXTTgH5Q= +github.com/kaorimatz/go-mrt v0.0.0-20210326003454-aa11f3646f93/go.mod h1:KUXNgiu1+bxftJBB5MHhuDqBiL0gi6G4lTALnxp3qwE= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q= +github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo= +github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= +github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/urfave/cli/v2 v2.3.0 h1:qph92Y649prgesehzOrQjdWyxFOp/QVM+6imKHad91M= +github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.3 h1:fvjTMHxHEw/mxHbtzPi3JCcKXQRAnQTBRo6YCJSVHKI= +gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/hack/pull_rib.sh b/hack/pull_rib.sh new file mode 100755 index 0000000..b77adda --- /dev/null +++ b/hack/pull_rib.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -euo pipefail + +BASE_URL="http://archive.routeviews.org/bgpdata" +TIMESTAMP=$((($(date -u +%s)-3600)/7200*7200)) +YEAR=$(date -u -d "@${TIMESTAMP}" +%Y) +MONTH=$(date -u -d "@${TIMESTAMP}" +%m) +FILENAME=$(curl -fsSL "${BASE_URL}/${YEAR}.${MONTH}/RIBS/" | grep -oE 'rib\.[0-9]{8}\.[0-9]{4}\.bz2' | tail -n 1) +wget "${BASE_URL}/${YEAR}.${MONTH}/RIBS/${FILENAME}" diff --git a/pkg/binarytrie/array.go b/pkg/binarytrie/array.go new file mode 100644 index 0000000..4bfddae --- /dev/null +++ b/pkg/binarytrie/array.go @@ -0,0 +1,118 @@ +package binarytrie + +import ( + "fmt" + "net" +) + +// ArrayTrie represents a trie in a space-efficient array format. +type ArrayTrie struct { + nodes []arrayTrieNode + skippedBits map[int]uint32 +} + +type arrayTrieNode struct { + branchingFactor uint8 + skipValue uint8 + childrenOffset uint32 + value uint32 +} + +// Insert implements Trie. +func (t *ArrayTrie) Insert(*net.IPNet, uint32) error { + return ErrTrieImmutable +} + +// Lookup implements Trie. +func (t *ArrayTrie) Lookup(ip net.IP) (value uint32, err error) { + var bitPosition, index, nextIndex int + var skippedBits uint32 + + for { + if t.nodes[index].value != 0 { + value = t.nodes[index].value + } + if t.nodes[index].isLeaf() { + break + } + + skippedBits = extractBits(ip, bitPosition, int(t.nodes[index].skipValue)) + bitPosition += int(t.nodes[index].skipValue) + nextIndex = index + int(t.nodes[index].childrenOffset) + int(extractBits(ip, bitPosition, int(t.nodes[index].branchingFactor))) + bitPosition += int(t.nodes[index].branchingFactor) + + if t.nodes[index].skipValue > 0 { + if expected, ok := t.skippedBits[nextIndex]; !ok || expected != skippedBits { + break + } + } + index = nextIndex + } + if value == 0 { + return 0, ErrValueNotFound + } + return +} + +// NewArrayTrie returns an empty ArrayTrie. +// +// This is rarely useful, as an ArrayTrie does not support inserts. +func NewArrayTrie() *ArrayTrie { + return &ArrayTrie{ + nodes: make([]arrayTrieNode, 0), + skippedBits: make(map[int]uint32), + } +} + +// NewArrayTrieFromNaiveTrie creates an ArrayTrie from a NaiveTrie. +func NewArrayTrieFromNaiveTrie(nt *NaiveTrie) *ArrayTrie { + at := NewArrayTrie() + at.nodes = make([]arrayTrieNode, 0, nt.allocatedSize()) + + nodeQueue := []*naiveTrieNode{nt.root} + for len(nodeQueue) > 0 { + batchIndex := len(at.nodes) + batchSize := len(nodeQueue) + for _, nNode := range nodeQueue { + if nNode == nil { + at.nodes = append(at.nodes, arrayTrieNode{}) + continue + } + + i := len(at.nodes) + at.nodes = append(at.nodes, arrayTrieNode{ + branchingFactor: nNode.branchingFactor, + skipValue: nNode.skipValue, + value: nNode.value, + }) + if nNode.parent != nil && nNode.parent.skipValue > 0 { + at.skippedBits[i] = nNode.skippedBits + } + if !nNode.isLeaf() { + at.nodes[i].childrenOffset = uint32(batchIndex - i + len(nodeQueue)) + nodeQueue = append(nodeQueue, nNode.children...) + } + } + nodeQueue = nodeQueue[batchSize:] + } + + return at +} + +func (n *arrayTrieNode) isLeaf() bool { + return n.branchingFactor == 0 +} + +// String implements fmt.Stringer. +func (t *ArrayTrie) String() string { + str := "#\tBF\tSV\tCO\tValue\n" + for i, n := range t.nodes { + suffix := "" + if bits, ok := t.skippedBits[i]; ok { + suffix = fmt.Sprintf(" (skipped: %0*b)", n.skipValue, bits) + } + str += fmt.Sprintf("%d\t%d\t%d\t%d\t%d%s\n", i, n.branchingFactor, n.skipValue, n.childrenOffset, n.value, suffix) + } + str += fmt.Sprintf("%v\n", t.skippedBits) + return str +} diff --git a/pkg/binarytrie/array_test.go b/pkg/binarytrie/array_test.go new file mode 100644 index 0000000..841ea85 --- /dev/null +++ b/pkg/binarytrie/array_test.go @@ -0,0 +1,20 @@ +package binarytrie_test + +import ( + "testing" +) + +func TestEmptyArrayTrieLookup(t *testing.T) { + trie, testCases := newEmptyNaiveTrie() + testLookup(t, trie.ToArrayTrie(), testCases) +} + +func TestTrivialArrayTrieLookup(t *testing.T) { + trie, testCases := newTrivialNaiveTrie() + testLookup(t, trie.ToArrayTrie(), testCases) +} + +func TestPopulatedArrayTrieLookup(t *testing.T) { + trie, testCases := newPopulatedNaiveTrie() + testLookup(t, trie.ToArrayTrie(), testCases) +} diff --git a/pkg/binarytrie/arraymarshaling.go b/pkg/binarytrie/arraymarshaling.go new file mode 100644 index 0000000..2a4cdf8 --- /dev/null +++ b/pkg/binarytrie/arraymarshaling.go @@ -0,0 +1,103 @@ +package binarytrie + +import "encoding/binary" + +const ( + arrayTrieMarshalHeader = "github.com/banviktor/asnlookup/pkg/binarytrie\x00ArrayTrie\x00" + arrayTrieMarshalVersion = uint8(1) +) + +// MarshalBinary implements encoding.BinaryMarshaler. +func (t *ArrayTrie) MarshalBinary() (data []byte, err error) { + var i int + data = make([]byte, len(arrayTrieMarshalHeader)+1+8+len(t.nodes)*10+8+len(t.skippedBits)*12) + + // Write header. + copy(data, arrayTrieMarshalHeader) + i += len(arrayTrieMarshalHeader) + data[i] = arrayTrieMarshalVersion + i += 1 + + // Write nodes. + binary.LittleEndian.PutUint64(data[i:i+8], uint64(len(t.nodes))) + i += 8 + for _, n := range t.nodes { + bN, err := n.MarshalBinary() + if err != nil { + return nil, err + } + copy(data[i:i+10], bN) + i += 10 + } + + // Write skipped bits information. + binary.LittleEndian.PutUint64(data[i:i+8], uint64(len(t.skippedBits))) + i += 8 + for k, v := range t.skippedBits { + binary.LittleEndian.PutUint64(data[i:i+8], uint64(k)) + i += 8 + binary.LittleEndian.PutUint32(data[i:i+4], v) + i += 4 + } + + return +} + +// UnmarshalBinary implements encoding.BinaryUnmarshaler. +func (t *ArrayTrie) UnmarshalBinary(data []byte) error { + var i int + + // Check header. + if string(data[:len(arrayTrieMarshalHeader)]) != arrayTrieMarshalHeader { + return ErrInvalidFormat + } + i += len(arrayTrieMarshalHeader) + if data[i] != arrayTrieMarshalVersion { + return ErrInvalidFormat + } + i += 1 + + // Populate nodes. + nodeCount := binary.LittleEndian.Uint64(data[i : i+8]) + i += 8 + t.nodes = make([]arrayTrieNode, nodeCount) + for j := uint64(0); j < nodeCount; j++ { + if err := t.nodes[j].UnmarshalBinary(data[i : i+10]); err != nil { + return err + } + i += 10 + } + + // Populate skipped bits information. + skippedBitCount := binary.LittleEndian.Uint64(data[i : i+8]) + i += 8 + t.skippedBits = make(map[int]uint32, skippedBitCount) + for j := uint64(0); j < skippedBitCount; j++ { + k := int(binary.LittleEndian.Uint64(data[i : i+8])) + i += 8 + v := binary.LittleEndian.Uint32(data[i : i+4]) + i += 4 + t.skippedBits[k] = v + } + + return nil +} + +// MarshalBinary implements encoding.BinaryMarshaler. +func (n *arrayTrieNode) MarshalBinary() (data []byte, err error) { + data = make([]byte, 10) + data[0] = n.branchingFactor + data[1] = n.skipValue + binary.LittleEndian.PutUint32(data[2:6], n.childrenOffset) + binary.LittleEndian.PutUint32(data[6:10], n.value) + return +} + +// UnmarshalBinary implements encoding.BinaryUnmarshaler. +func (n *arrayTrieNode) UnmarshalBinary(data []byte) error { + n.branchingFactor = data[0] + n.skipValue = data[1] + n.childrenOffset = binary.LittleEndian.Uint32(data[2:6]) + n.value = binary.LittleEndian.Uint32(data[6:10]) + return nil +} diff --git a/pkg/binarytrie/arraymarshaling_test.go b/pkg/binarytrie/arraymarshaling_test.go new file mode 100644 index 0000000..b87d1f9 --- /dev/null +++ b/pkg/binarytrie/arraymarshaling_test.go @@ -0,0 +1,52 @@ +package binarytrie_test + +import ( + . "github.com/banviktor/asnlookup/pkg/binarytrie" + "github.com/stretchr/testify/assert" + "testing" +) + +func TestEmptyMarshaledArrayTrieLookup(t *testing.T) { + trie, testCases := newEmptyNaiveTrie() + assert.NoError(t, trie.Optimize(0.5), "Optimize should not error") + arrayTrie := trie.ToArrayTrie() + + buf, err := arrayTrie.MarshalBinary() + assert.NoError(t, err, "MarshalBinary should not error") + + newTrie := &ArrayTrie{} + err = newTrie.UnmarshalBinary(buf) + assert.NoError(t, err, "UnmarshalBinary should not error") + + testLookup(t, newTrie, testCases) +} + +func TestTrivialMarshaledArrayTrieLookup(t *testing.T) { + trie, testCases := newTrivialNaiveTrie() + assert.NoError(t, trie.Optimize(0.5), "Optimize should not error") + arrayTrie := trie.ToArrayTrie() + + buf, err := arrayTrie.MarshalBinary() + assert.NoError(t, err, "MarshalBinary should not error") + + newTrie := &ArrayTrie{} + err = newTrie.UnmarshalBinary(buf) + assert.NoError(t, err, "UnmarshalBinary should not error") + + testLookup(t, newTrie, testCases) +} + +func TestPopulatedMarshaledArrayTrieLookup(t *testing.T) { + trie, testCases := newPopulatedNaiveTrie() + assert.NoError(t, trie.Optimize(0.5), "Optimize should not error") + arrayTrie := trie.ToArrayTrie() + + buf, err := arrayTrie.MarshalBinary() + assert.NoError(t, err, "MarshalBinary should not error") + + newTrie := &ArrayTrie{} + err = newTrie.UnmarshalBinary(buf) + assert.NoError(t, err, "UnmarshalBinary should not error") + + testLookup(t, newTrie, testCases) +} diff --git a/pkg/binarytrie/errors.go b/pkg/binarytrie/errors.go new file mode 100644 index 0000000..f8adff9 --- /dev/null +++ b/pkg/binarytrie/errors.go @@ -0,0 +1,14 @@ +package binarytrie + +import "errors" + +var ( + // ErrInvalidIPAddress IP address is invalid. + ErrInvalidIPAddress = errors.New("invalid IP address") + // ErrTrieImmutable Trie is immutable. + ErrTrieImmutable = errors.New("trie is immutable") + // ErrValueNotFound value was not found. + ErrValueNotFound = errors.New("value not found") + // ErrInvalidFormat invalid marshaled input. + ErrInvalidFormat = errors.New("invalid format") +) diff --git a/pkg/binarytrie/naive.go b/pkg/binarytrie/naive.go new file mode 100644 index 0000000..f363e70 --- /dev/null +++ b/pkg/binarytrie/naive.go @@ -0,0 +1,120 @@ +package binarytrie + +import ( + "net" +) + +type NaiveTrie struct { + root *naiveTrieNode + mutable bool +} + +type naiveTrieNode struct { + skipValue uint8 + skippedBits uint32 + branchingFactor uint8 + parent *naiveTrieNode + children []*naiveTrieNode + value uint32 +} + +// NewNaiveTrie creates an empty NaiveTrie. +func NewNaiveTrie() *NaiveTrie { + return &NaiveTrie{ + root: &naiveTrieNode{}, + mutable: true, + } +} + +// Insert implements Trie. +func (t *NaiveTrie) Insert(ipNet *net.IPNet, value uint32) error { + if !t.mutable { + return ErrTrieImmutable + } + + prefix, prefixSize, err := parseIpNet(ipNet) + if err != nil { + return err + } + + currentNode := t.root + bitPosition := 0 + for { + if currentNode.branchingFactor == 0 { + currentNode.branchingFactor = 1 + currentNode.children = make([]*naiveTrieNode, 2) + } + + bit := extractBits(prefix, bitPosition, 1) + bitPosition++ + if currentNode.children[bit] == nil { + currentNode.children[bit] = &naiveTrieNode{parent: currentNode} + } + currentNode = currentNode.children[bit] + + if bitPosition >= prefixSize { + break + } + } + currentNode.value = value + return nil +} + +// Lookup implements Trie. +func (t *NaiveTrie) Lookup(ip net.IP) (value uint32, err error) { + ip = ip.To16() + if ip == nil { + return 0, ErrInvalidIPAddress + } + + currentNode := t.root + bitPosition := 0 + for { + if currentNode.value != 0 { + value = currentNode.value + } + if currentNode.isLeaf() { + break + } + + skippedBits := extractBits(ip, bitPosition, int(currentNode.skipValue)) + bitPosition += int(currentNode.skipValue) + prefix := extractBits(ip, bitPosition, int(currentNode.branchingFactor)) + bitPosition += int(currentNode.branchingFactor) + + nextNode := currentNode.children[prefix] + if nextNode == nil || nextNode.skippedBits != skippedBits { + break + } + currentNode = nextNode + } + if value == 0 { + return 0, ErrValueNotFound + } + return +} + +// ToArrayTrie creates an identical ArrayTrie. +func (t *NaiveTrie) ToArrayTrie() *ArrayTrie { + return NewArrayTrieFromNaiveTrie(t) +} + +func (t *NaiveTrie) allocatedSize() int { + return t.root.allocatedSize() +} + +func (n *naiveTrieNode) isLeaf() bool { + return n.branchingFactor == 0 +} + +func (n *naiveTrieNode) allocatedSize() int { + count := 1 + for _, child := range n.children { + if child != nil { + count += child.allocatedSize() + } else { + count++ + } + } + return count +} diff --git a/pkg/binarytrie/naive_test.go b/pkg/binarytrie/naive_test.go new file mode 100644 index 0000000..7922a47 --- /dev/null +++ b/pkg/binarytrie/naive_test.go @@ -0,0 +1,106 @@ +package binarytrie_test + +import ( + . "github.com/banviktor/asnlookup/pkg/binarytrie" + "github.com/stretchr/testify/assert" + "net" + "testing" +) + +type testCase struct { + ip string + asn uint32 + err error +} + +func TestEmptyNaiveTrieLookup(t *testing.T) { + trie, testCases := newEmptyNaiveTrie() + testLookup(t, trie, testCases) +} + +func TestTrivialNaiveTrieLookup(t *testing.T) { + trie, testCases := newTrivialNaiveTrie() + testLookup(t, trie, testCases) +} + +func TestPopulatedNaiveTrieLookup(t *testing.T) { + trie, testCases := newPopulatedNaiveTrie() + testLookup(t, trie, testCases) +} + +func newEmptyNaiveTrie() (*NaiveTrie, []testCase) { + trie := NewNaiveTrie() + + testCases := []testCase{ + {"0.0.0.0", 0, ErrValueNotFound}, + {"255.255.255.255", 0, ErrValueNotFound}, + } + + return trie, testCases +} + +func newTrivialNaiveTrie() (*NaiveTrie, []testCase) { + trie := NewNaiveTrie() + _, ipNet, _ := net.ParseCIDR("0.0.0.0/0") + trie.Insert(ipNet, 42) + + testCases := []testCase{ + {"0.0.0.0", 42, nil}, + {"255.255.255.255", 42, nil}, + } + + return trie, testCases +} + +func newPopulatedNaiveTrie() (*NaiveTrie, []testCase) { + trie := NewNaiveTrie() + testData := []struct { + net string + asn uint32 + }{ + {"192.168.1.0/24", 999}, + {"0.0.0.0/2", 200}, + {"128.0.0.0/2", 210}, + {"160.0.0.0/3", 2101}, + {"160.0.0.0/3", 2101}, // duplicate entry on purpose + {"192.0.0.0/3", 211}, + {"224.0.0.0/3", 211}, + } + for _, td := range testData { + _, ipNet, _ := net.ParseCIDR(td.net) + trie.Insert(ipNet, td.asn) + } + + testCases := []testCase{ + {"0.0.0.0", 200, nil}, + {"32.128.128.128", 200, nil}, + {"63.255.255.255", 200, nil}, + {"64.0.0.0", 0, ErrValueNotFound}, + {"96.128.128.128", 0, ErrValueNotFound}, + {"127.255.255.255", 0, ErrValueNotFound}, + {"128.0.0.0", 210, nil}, + {"159.255.255.255", 210, nil}, + {"160.128.128.128", 2101, nil}, + {"191.255.255.255", 2101, nil}, + {"192.0.0.0", 211, nil}, + {"192.168.0.255", 211, nil}, + {"192.168.1.0", 999, nil}, + {"192.168.1.128", 999, nil}, + {"192.168.1.255", 999, nil}, + {"192.168.2.0", 211, nil}, + {"224.128.128.128", 211, nil}, + {"255.255.255.255", 211, nil}, + } + + return trie, testCases +} + +func testLookup(t *testing.T, trie Trie, testCases []testCase) { + for _, tc := range testCases { + asn, err := trie.Lookup(net.ParseIP(tc.ip)) + if tc.err != nil && assert.Error(t, err, "%s should have error", tc.ip) { + assert.Equal(t, tc.err, err) + } + assert.Equal(t, int(tc.asn), int(asn), "%s expected AS%d, actual: AS%d", tc.ip, tc.asn, asn) + } +} diff --git a/pkg/binarytrie/optimize.go b/pkg/binarytrie/optimize.go new file mode 100644 index 0000000..3a02c20 --- /dev/null +++ b/pkg/binarytrie/optimize.go @@ -0,0 +1,154 @@ +package binarytrie + +import "sync" + +// Optimize performs level compression and path compression on the trie. +// +// This operation makes the trie immutable. +func (t *NaiveTrie) Optimize(fillFactor float32) error { + if !t.mutable { + return ErrTrieImmutable + } + + t.root.propagateValues(0) + t.root.removeRedundancies() + t.root.compressLevels(fillFactor) + t.root.compressPaths(nil, 0, 0) + t.mutable = false + return nil +} + +func (n *naiveTrieNode) propagateValues(value uint32) { + for i, child := range n.children { + if child == nil { + n.children[i] = &naiveTrieNode{parent: n} + } + } + if n.value == 0 { + n.value = value + } + + // Preorder traversal. + for _, child := range n.children { + child.propagateValues(n.value) + } +} + +func (n *naiveTrieNode) removeRedundancies() { + // Postorder traversal. + for _, child := range n.children { + child.removeRedundancies() + } + + if n.isLeaf() { + return + } + if n.children[0].isLeaf() && n.children[1].isLeaf() && n.children[0].value == n.children[1].value { + n.value = n.children[0].value + n.branchingFactor = 0 + n.children = nil + } +} + +func (n *naiveTrieNode) compressPaths(firstNode *naiveTrieNode, depth uint8, prefix uint32) { + var nextNode *naiveTrieNode + var nextNodeIndex int + for i, child := range n.children { + if child.isLeaf() { + if child.value == n.value { + // Ignore trivial leaves introduced by normalization. + continue + } + // Any other leaf breaks the path. + nextNode = nil + break + } + if nextNode != nil { + // The path ends if there is more than 1 nontrivial child. + nextNode = nil + break + } + nextNode = child + nextNodeIndex = i + } + + if firstNode != nil && (nextNode == nil || depth+n.branchingFactor > 32) { + // The path ends. + firstNode.skipValue = depth + firstNode.children = n.children + firstNode.branchingFactor = n.branchingFactor + for _, child := range n.children { + child.skippedBits = prefix + child.parent = firstNode + } + } else if firstNode != nil { + // The path continues. + nextNode.compressPaths(firstNode, depth+n.branchingFactor, (prefix<= 31 || float32(realNodeCount)/float32(len(nextNodes)) < fillFactor { + break + } + depth++ + nodes = nextNodes + } + + if depth > 1 { + n.branchingFactor = depth + n.children = nodes + } + + wg := &sync.WaitGroup{} + wg.Add(len(n.children)) + for _, child := range n.children { + go func(child *naiveTrieNode) { + defer wg.Done() + child.compressLevels(fillFactor) + }(child) + } + wg.Wait() +} diff --git a/pkg/binarytrie/optimize_test.go b/pkg/binarytrie/optimize_test.go new file mode 100644 index 0000000..9c3062d --- /dev/null +++ b/pkg/binarytrie/optimize_test.go @@ -0,0 +1,52 @@ +package binarytrie_test + +import ( + "fmt" + "github.com/stretchr/testify/assert" + "testing" +) + +func TestEmptyOptimizedNaiveTrieLookup(t *testing.T) { + trie, testCases := newEmptyNaiveTrie() + assert.NoError(t, trie.Optimize(0.5), "Optimize should not error") + testLookup(t, trie, testCases) +} + +func TestTrivialOptimizedNaiveTrieLookup(t *testing.T) { + trie, testCases := newTrivialNaiveTrie() + assert.NoError(t, trie.Optimize(0.5), "Optimize should not error") + testLookup(t, trie, testCases) +} + +func TestPopulatedOptimizedNaiveTrieLookup(t *testing.T) { + trie, testCases := newPopulatedNaiveTrie() + assert.NoError(t, trie.Optimize(0.5), "Optimize should not error") + testLookup(t, trie, testCases) +} + +func TestEmptyOptimizedArrayTrieLookup(t *testing.T) { + trie, testCases := newEmptyNaiveTrie() + assert.NoError(t, trie.Optimize(0.5), "Optimize should not error") + arrayTrie := trie.ToArrayTrie() + + fmt.Println(arrayTrie.String()) + testLookup(t, arrayTrie, testCases) +} + +func TestTrivialOptimizedArrayTrieLookup(t *testing.T) { + trie, testCases := newTrivialNaiveTrie() + assert.NoError(t, trie.Optimize(0.5), "Optimize should not error") + arrayTrie := trie.ToArrayTrie() + + fmt.Println(arrayTrie.String()) + testLookup(t, arrayTrie, testCases) +} + +func TestPopulatedOptimizedArrayTrieLookup(t *testing.T) { + trie, testCases := newPopulatedNaiveTrie() + assert.NoError(t, trie.Optimize(0.5), "Optimize should not error") + arrayTrie := trie.ToArrayTrie() + + fmt.Println(arrayTrie.String()) + testLookup(t, arrayTrie, testCases) +} diff --git a/pkg/binarytrie/types.go b/pkg/binarytrie/types.go new file mode 100644 index 0000000..3f62d0e --- /dev/null +++ b/pkg/binarytrie/types.go @@ -0,0 +1,12 @@ +package binarytrie + +import ( + "net" +) + +type Trie interface { + // Insert inserts an IP network - value mapping into the trie. + Insert(*net.IPNet, uint32) error + // Lookup returns a value for the given IP address. + Lookup(ip net.IP) (uint32, error) +} diff --git a/pkg/binarytrie/util.go b/pkg/binarytrie/util.go new file mode 100644 index 0000000..6cb4190 --- /dev/null +++ b/pkg/binarytrie/util.go @@ -0,0 +1,37 @@ +package binarytrie + +import "net" + +func parseIpNet(ipNet *net.IPNet) (net.IP, int, error) { + ip := ipNet.IP.To16() + if ip == nil { + return nil, 0, ErrInvalidIPAddress + } + subnetSize, bits := ipNet.Mask.Size() + if bits == net.IPv4len*8 { + subnetSize += (net.IPv6len - net.IPv4len) * 8 + } + return ip, subnetSize, nil +} + +func extractBits(ip net.IP, position, length int) uint32 { + if length < 1 || length > 32 || position < 0 || position+length-1 >= len(ip)*8 { + return 0 + } + + lastBit := position + length - 1 + firstByte := position / 8 + lastByte := lastBit / 8 + + // Extract the right bytes. + rightShift := 7 - lastBit%8 + bits := uint32(ip[lastByte]) >> rightShift + for i := 1; firstByte <= lastByte-i; i++ { + bits |= uint32(ip[lastByte-i]) << (8*i - rightShift) + } + + // Mask unnecessary bits. + bits &= uint32(0xFFFFFFFF) >> (32 - length) + + return bits +} diff --git a/pkg/database/builder.go b/pkg/database/builder.go new file mode 100644 index 0000000..9cf3d20 --- /dev/null +++ b/pkg/database/builder.go @@ -0,0 +1,88 @@ +package database + +import ( + "fmt" + "github.com/banviktor/asnlookup/pkg/binarytrie" + "github.com/kaorimatz/go-mrt" + "io" + "net" +) + +type builder struct { + prototype *binarytrie.NaiveTrie + fillFactor float32 +} + +// InsertMapping stores an IP prefix - AutonomousSystem mapping. +func (b *builder) InsertMapping(ipNet *net.IPNet, asn uint32) error { + err := b.prototype.Insert(ipNet, asn) + if err != nil { + return err + } + return nil +} + +// ImportMRT imports records from an MRT stream. +func (b *builder) ImportMRT(input io.Reader) error { + r := mrt.NewReader(input) + + for { + record, err := r.Next() + if err == io.EOF { + break + } + if err != nil { + return fmt.Errorf("failed to parse MRT record: %v", err) + } + + rib, ok := record.(*mrt.TableDumpV2RIB) + if !ok || isNullMask(rib.Prefix.Mask) { + continue + } + + prefix, asn, err := mrtRIBToMapping(rib) + if err != nil { + continue + } + + err = b.InsertMapping(prefix, asn) + if err != nil { + return err + } + } + + return nil +} + +// SetFillFactor sets the fill factor parameter for the optimization phase. +func (b *builder) SetFillFactor(fillFactor float32) { + b.fillFactor = fillFactor +} + +// Build builds the Database instance. +func (b *builder) Build() (Database, error) { + err := b.prototype.Optimize(b.fillFactor) + if err != nil { + return nil, err + } + return &database{ + mappings: b.prototype.ToArrayTrie(), + }, nil +} + +// NewBuilder creates a builder. +func NewBuilder() *builder { + return &builder{ + prototype: binarytrie.NewNaiveTrie(), + fillFactor: 0.5, + } +} + +func isNullMask(mask net.IPMask) bool { + for _, b := range mask { + if b != 0 { + return false + } + } + return true +} diff --git a/pkg/database/database.go b/pkg/database/database.go new file mode 100644 index 0000000..22324a7 --- /dev/null +++ b/pkg/database/database.go @@ -0,0 +1,67 @@ +package database + +import ( + "encoding" + "fmt" + "github.com/banviktor/asnlookup/pkg/binarytrie" + "io" + "io/ioutil" + "net" +) + +// AutonomousSystem represents an Autonomous System on the Internet. +type AutonomousSystem struct { + // Number (aka ASN) is the unique identifier for an Autonomous System. + Number uint32 +} + +// Database stores mappings between IP addresses and Autonomous Systems. +type Database interface { + encoding.BinaryMarshaler + encoding.BinaryUnmarshaler + // Lookup returns the AutonomousSystem for a given net.IP. + Lookup(net.IP) (AutonomousSystem, error) +} + +type database struct { + mappings *binarytrie.ArrayTrie +} + +// Lookup implements Database. +func (d *database) Lookup(ip net.IP) (AutonomousSystem, error) { + asn, err := d.mappings.Lookup(ip) + if err == binarytrie.ErrValueNotFound { + return AutonomousSystem{}, ErrNotFound + } else if err != nil { + return AutonomousSystem{}, fmt.Errorf("lookup failed: %v", err) + } + + return AutonomousSystem{ + Number: asn, + }, nil +} + +// MarshalBinary implements encoding.BinaryMarshaler. +func (d *database) MarshalBinary() ([]byte, error) { + return d.mappings.MarshalBinary() +} + +// UnmarshalBinary implements encoding.BinaryUnmarshaler. +func (d *database) UnmarshalBinary(data []byte) error { + return d.mappings.UnmarshalBinary(data) +} + +func NewFromDump(r io.Reader) (Database, error) { + d := &database{ + mappings: binarytrie.NewArrayTrie(), + } + data, err := ioutil.ReadAll(r) + if err != nil { + return nil, fmt.Errorf("failed to read: %v", err) + } + + if err = d.UnmarshalBinary(data); err != nil { + return nil, fmt.Errorf("failed to restore dump: %v", err) + } + return d, nil +} diff --git a/pkg/database/errors.go b/pkg/database/errors.go new file mode 100644 index 0000000..8597dea --- /dev/null +++ b/pkg/database/errors.go @@ -0,0 +1,8 @@ +package database + +import "errors" + +var ( + // ErrNotFound AS not found. + ErrNotFound = errors.New("AS not found") +) diff --git a/pkg/database/mrt.go b/pkg/database/mrt.go new file mode 100644 index 0000000..770541f --- /dev/null +++ b/pkg/database/mrt.go @@ -0,0 +1,35 @@ +package database + +import ( + "encoding/binary" + "errors" + "github.com/kaorimatz/go-mrt" + "net" +) + +func mrtRIBToMapping(rib *mrt.TableDumpV2RIB) (*net.IPNet, uint32, error) { + for _, entry := range rib.RIBEntries { + for _, attr := range entry.BGPAttributes { + path, ok := attr.Value.(mrt.BGPPathAttributeASPath) + if !ok { + continue + } + + for _, segment := range path { + return rib.Prefix, mrtASToUint32(segment.Value[len(segment.Value)-1]), nil + } + } + } + + return nil, 0, errors.New("RIB record does not contain AS path") +} + +func mrtASToUint32(b mrt.AS) uint32 { + switch len(b) { + case 2: + return uint32(binary.BigEndian.Uint16(b)) + case 4: + return binary.BigEndian.Uint32(b) + } + return 0 +} diff --git a/version.go b/version.go new file mode 100644 index 0000000..c7575d6 --- /dev/null +++ b/version.go @@ -0,0 +1,4 @@ +package asnlookup + +// Version denotes the version of asnlookup. +const Version = "0.1.0"