From e26ece1e3a72195ee8a2de1ad81d47e1ae177995 Mon Sep 17 00:00:00 2001 From: Hank Donnay Date: Mon, 31 Jul 2023 17:06:02 -0500 Subject: [PATCH] urn: add URN parser and helpers This sets up some support infrastructure to unify naming across claircore. The goal is to be able to pass around names as strings instead of interface objects. Signed-off-by: Hank Donnay --- .github/codecov.yaml | 1 + toolkit/urn/.gitignore | 1 + toolkit/urn/compliance_test.go | 119 ++++++++++++++ toolkit/urn/escape.go | 50 ++++++ toolkit/urn/generate.sh | 16 ++ toolkit/urn/name.go | 46 ++++++ toolkit/urn/name_test.go | 107 +++++++++++++ toolkit/urn/parser.go | 283 +++++++++++++++++++++++++++++++++ toolkit/urn/parser.rl | 87 ++++++++++ toolkit/urn/urn.go | 149 +++++++++++++++++ toolkit/urn/urn_test.go | 132 +++++++++++++++ 11 files changed, 991 insertions(+) create mode 100644 toolkit/urn/.gitignore create mode 100644 toolkit/urn/compliance_test.go create mode 100644 toolkit/urn/escape.go create mode 100644 toolkit/urn/generate.sh create mode 100644 toolkit/urn/name.go create mode 100644 toolkit/urn/name_test.go create mode 100644 toolkit/urn/parser.go create mode 100644 toolkit/urn/parser.rl create mode 100644 toolkit/urn/urn.go create mode 100644 toolkit/urn/urn_test.go diff --git a/.github/codecov.yaml b/.github/codecov.yaml index ae87e32ab..eeb3e3990 100644 --- a/.github/codecov.yaml +++ b/.github/codecov.yaml @@ -1,6 +1,7 @@ ignore: - "test" # Our test helpers largely do not have tests themselves. - "**/*_string.go" # Ignore generated string implementations. +- "toolkit/urn/parser.go" # Generated file coverage: status: diff --git a/toolkit/urn/.gitignore b/toolkit/urn/.gitignore new file mode 100644 index 000000000..1cc633135 --- /dev/null +++ b/toolkit/urn/.gitignore @@ -0,0 +1 @@ +*.ri diff --git a/toolkit/urn/compliance_test.go b/toolkit/urn/compliance_test.go new file mode 100644 index 000000000..ceabb75e1 --- /dev/null +++ b/toolkit/urn/compliance_test.go @@ -0,0 +1,119 @@ +package urn + +import ( + "strings" + "testing" +) + +func TestCompliance(t *testing.T) { + t.Run("Valid", func(t *testing.T) { + t.Run("Basic", parseOK(`urn:test:test`)) + t.Run("NID", parseOK(`urn:test-T-0123456789:test`)) + t.Run("NSS", parseOK(`urn:test:Test-0123456789()+,-.:=@;$_!*'`)) + }) + t.Run("Invalid", func(t *testing.T) { + t.Run("NID", func(t *testing.T) { + t.Run("TooLong", parseErr(`urn:`+strings.Repeat("a", 33)+`:test`)) + t.Run("BadChars", parseErr(`urn:test//notOK:test`)) + t.Run("None", parseErr(`urn::test`)) + t.Run("HyphenStart", parseErr(`urn:-nid:test`)) + t.Run("HyphenEnd", parseErr(`urn:nid-:test`)) + }) + t.Run("NSS", func(t *testing.T) { + t.Run("BadChar", parseErr("urn:test:null\x00null")) + }) + }) + t.Run("Equivalence", func(t *testing.T) { + // These test cases are ported out of the RFC. + t.Run("CaseInsensitive", allEqual(`urn:example:a123,z456`, `URN:example:a123,z456`, `urn:EXAMPLE:a123,z456`)) + t.Run("Component", allEqual(`urn:example:a123,z456`, `urn:example:a123,z456?+abc`, `urn:example:a123,z456?=xyz`, `urn:example:a123,z456#789`)) + t.Run("NSS", allNotEqual(`urn:example:a123,z456`, `urn:example:a123,z456/foo`, `urn:example:a123,z456/bar`, `urn:example:a123,z456/baz`)) + t.Run("PercentDecoding", func(t *testing.T) { + p := []string{`urn:example:a123%2Cz456`, `URN:EXAMPLE:a123%2cz456`} + allEqual(p...)(t) + for _, p := range p { + allNotEqual(`urn:example:a123,z456`, p)(t) + } + }) + t.Run("CaseSensitive", allNotEqual(`urn:example:a123,z456`, `urn:example:A123,z456`, `urn:example:a123,Z456`)) + t.Run("PercentEncoding", func(t *testing.T) { + allNotEqual(`urn:example:a123,z456`, `urn:example:%D0%B0123,z456`)(t) + allEqual(`urn:example:а123,z456`, `urn:example:%D0%B0123,z456`)(t) // NB that's \u0430 CYRILLIC SMALL LETTER A + }) + }) +} + +func parseOK(s string) func(*testing.T) { + u, err := Parse(s) + return func(t *testing.T) { + if err != nil { + t.Fatal(err) + } + if _, err := u.R(); err != nil { + t.Error(err) + } + if _, err := u.Q(); err != nil { + t.Error(err) + } + } +} +func parseErr(s string) func(*testing.T) { + u, err := Parse(s) + return func(t *testing.T) { + t.Log(err) + if err != nil { + // OK + return + } + if _, err := u.R(); err == nil { + t.Fail() + } + if _, err := u.Q(); err == nil { + t.Fail() + } + } +} +func allEqual(s ...string) func(*testing.T) { + var err error + u := make([]URN, len(s)) + for i, s := range s { + u[i], err = Parse(s) + if err != nil { + break + } + } + return func(t *testing.T) { + if err != nil { + t.Fatal(err) + } + for i := range u { + for j := range u { + if !(&u[i]).Equal(&u[j]) { + t.Errorf("%v != %v", &u[i], &u[j]) + } + } + } + } +} +func allNotEqual(s ...string) func(*testing.T) { + var err error + u := make([]URN, len(s)) + for i, s := range s { + u[i], err = Parse(s) + if err != nil { + break + } + } + return func(t *testing.T) { + if err != nil { + t.Fatal(err) + } + for i := range u { + for j := range u { + if i != j && (&u[i]).Equal(&u[j]) { + t.Errorf("%v == %v", &u[i], &u[j]) + } + } + } + } +} diff --git a/toolkit/urn/escape.go b/toolkit/urn/escape.go new file mode 100644 index 000000000..93fcce4ea --- /dev/null +++ b/toolkit/urn/escape.go @@ -0,0 +1,50 @@ +package urn + +// These functions are adapted out of the net/url package. +// +// URNs have slightly different rules. + +// Copyright 2009 The Go Authors. + +const upperhex = "0123456789ABCDEF" + +// Escape only handles non-ASCII characters and leaves other validation to the +// parsers. +func escape(s string) string { + ct := 0 + for i := 0; i < len(s); i++ { + c := s[i] + if c > 0x7F { + ct++ + } + } + + if ct == 0 { + return s + } + + var buf [64]byte + var t []byte + + required := len(s) + 2*ct + if required <= len(buf) { + t = buf[:required] + } else { + t = make([]byte, required) + } + + j := 0 + for i := 0; i < len(s); i++ { + switch c := s[i]; { + case c > 0x7F: + t[j] = '%' + t[j+1] = upperhex[c>>4] + t[j+2] = upperhex[c&15] + j += 3 + default: + t[j] = s[i] + j++ + } + } + return string(t) +} diff --git a/toolkit/urn/generate.sh b/toolkit/urn/generate.sh new file mode 100644 index 000000000..1cda3674f --- /dev/null +++ b/toolkit/urn/generate.sh @@ -0,0 +1,16 @@ +#!/bin/sh +set -e + +for cmd in ragel-go gofmt sed; do + if ! command -v "$cmd" >/dev/null 2>&1; then + printf 'missing needed command: %s\n' "$cmd" >&2 + exit 99 + fi +done + +ragel-go -s -p -F1 -o _parser.go parser.rl +trap 'rm _parser.go' EXIT +{ + printf '// Code generated by ragel-go DO NOT EDIT.\n\n' + gofmt -s _parser.go +} > parser.go diff --git a/toolkit/urn/name.go b/toolkit/urn/name.go new file mode 100644 index 000000000..219343e27 --- /dev/null +++ b/toolkit/urn/name.go @@ -0,0 +1,46 @@ +package urn + +import ( + "net/url" + "strings" +) + +// Name is a claircore name. +// +// Names are expected to be unique within a claircore system and comparable +// across instances. Names are hierarchical, moving from least specific to most +// specific. +// +// Any pointer fields are optional metadata that may not exist depending on the +// (System, Kind) pair. +type Name struct { + // System scopes to a claircore system or "mode", such as "indexer" or + // "updater". + System string + // Kind scopes to a specific type of object used within the System. + Kind string + // Name scopes to a specific object within the system. + Name string + // Version is the named object's version. + // + // Versions can be ordered with a lexical sort. + Version *string +} + +// String implements fmt.Stringer. +func (n *Name) String() string { + v := url.Values{} + if n.Version != nil { + v.Set("version", *n.Version) + } + u := URN{ + NID: `claircore`, + NSS: strings.Join( + []string{n.System, n.Kind, n.Name}, + ":", + ), + q: v.Encode(), + } + + return u.String() +} diff --git a/toolkit/urn/name_test.go b/toolkit/urn/name_test.go new file mode 100644 index 000000000..f102e02f2 --- /dev/null +++ b/toolkit/urn/name_test.go @@ -0,0 +1,107 @@ +package urn + +import ( + "testing" + + "github.com/google/go-cmp/cmp" +) + +func TestName(t *testing.T) { + version := "1" + tt := []struct { + In string + Want Name + }{ + // Weird cases first: + { + In: "urn:claircore:indexer:package:test?=version=1&version=999", + Want: Name{ + System: "indexer", + Kind: "package", + Name: "test", + Version: &version, + }, + }, + { + In: "urn:claircore:indexer:package:test", + Want: Name{ + System: "indexer", + Kind: "package", + Name: "test", + }, + }, + { + In: "urn:claircore:indexer:package:test?+resolve=something", + Want: Name{ + System: "indexer", + Kind: "package", + Name: "test", + }, + }, + { + In: "urn:claircore:indexer:package:test#some_anchor", + Want: Name{ + System: "indexer", + Kind: "package", + Name: "test", + }, + }, + + // Some other exhaustive cases: + { + In: "urn:claircore:indexer:repository:test?=version=1", + Want: Name{ + System: "indexer", + Kind: "repository", + Name: "test", + Version: &version, + }, + }, + { + In: "urn:claircore:indexer:distribution:test?=version=1", + Want: Name{ + System: "indexer", + Kind: "distribution", + Name: "test", + Version: &version, + }, + }, + { + In: "urn:claircore:matcher:vulnerability:test?=version=1", + Want: Name{ + System: "matcher", + Kind: "vulnerability", + Name: "test", + Version: &version, + }, + }, + { + In: "urn:claircore:matcher:enrichment:test?=version=1", + Want: Name{ + System: "matcher", + Kind: "enrichment", + Name: "test", + Version: &version, + }, + }, + } + + for _, tc := range tt { + t.Logf("parse: %q", tc.In) + u, err := Parse(tc.In) + if err != nil { + t.Error(err) + continue + } + got, err := u.Name() + if err != nil { + t.Error(err) + continue + } + want := tc.Want + t.Logf("name: %q", got.String()) + if !cmp.Equal(&got, &want) { + t.Error(cmp.Diff(&got, &want)) + } + } +} diff --git a/toolkit/urn/parser.go b/toolkit/urn/parser.go new file mode 100644 index 000000000..34da03da1 --- /dev/null +++ b/toolkit/urn/parser.go @@ -0,0 +1,283 @@ +// Code generated by ragel-go DO NOT EDIT. + +package urn + +import "errors" +import "fmt" + +var _urn_trans_keys = []byte{1, 0, 14, 14, 13, 13, 12, 12, 8, 8, 7, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 7, 14, 8, 8, 0, 14, 7, 7, 7, 7, 7, 7, 7, 7, 4, 9, 7, 7, 7, 7, 7, 7, 7, 7, 9, 9, 7, 7, 7, 7, 7, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 5, 14, 0, 14, 0, 14, 0, 14, 0, 14, 0, 14, 0, 14, 0, 14, 0, 14, 0, 14, 0, 14, 0, 14, 0, 14, 0} +var _urn_char_class = []int8{0, 1, 2, 0, 3, 0, 0, 0, 0, 0, 4, 0, 5, 0, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 0, 1, 9, 1, 10, 0, 7, 7, 7, 7, 7, 7, 11, 11, 11, 11, 11, 11, 11, 12, 11, 11, 11, 13, 11, 11, 14, 11, 11, 11, 11, 11, 1, 1, 1, 1, 0, 1, 7, 7, 7, 7, 7, 7, 11, 11, 11, 11, 11, 11, 11, 12, 11, 11, 11, 13, 11, 11, 14, 11, 11, 11, 11, 11, 1, 1, 1, 0, 0} +var _urn_index_offsets = []int16{0, 0, 1, 2, 3, 4, 12, 22, 32, 42, 52, 62, 72, 82, 92, 102, 112, 122, 132, 142, 152, 162, 172, 182, 192, 202, 212, 222, 232, 242, 252, 262, 272, 282, 292, 302, 312, 320, 321, 336, 337, 338, 339, 340, 346, 347, 348, 349, 350, 351, 352, 353, 361, 371, 381, 391, 401, 411, 421, 431, 441, 451, 461, 471, 481, 491, 501, 511, 521, 531, 541, 551, 561, 571, 581, 591, 601, 611, 621, 631, 641, 651, 666, 681, 696, 711, 726, 741, 756, 771, 786, 801, 816, 0} +var _urn_indices = []int16{3, 5, 7, 9, 12, 11, 11, 11, 12, 12, 12, 12, 14, 11, 15, 11, 11, 11, 15, 15, 15, 15, 17, 11, 18, 11, 11, 11, 18, 18, 18, 18, 20, 11, 21, 11, 11, 11, 21, 21, 21, 21, 23, 11, 24, 11, 11, 11, 24, 24, 24, 24, 26, 11, 27, 11, 11, 11, 27, 27, 27, 27, 29, 11, 30, 11, 11, 11, 30, 30, 30, 30, 32, 11, 33, 11, 11, 11, 33, 33, 33, 33, 35, 11, 36, 11, 11, 11, 36, 36, 36, 36, 38, 11, 39, 11, 11, 11, 39, 39, 39, 39, 41, 11, 42, 11, 11, 11, 42, 42, 42, 42, 44, 11, 45, 11, 11, 11, 45, 45, 45, 45, 47, 11, 48, 11, 11, 11, 48, 48, 48, 48, 50, 11, 51, 11, 11, 11, 51, 51, 51, 51, 53, 11, 54, 11, 11, 11, 54, 54, 54, 54, 56, 11, 57, 11, 11, 11, 57, 57, 57, 57, 59, 11, 60, 11, 11, 11, 60, 60, 60, 60, 62, 11, 63, 11, 11, 11, 63, 63, 63, 63, 65, 11, 66, 11, 11, 11, 66, 66, 66, 66, 68, 11, 69, 11, 11, 11, 69, 69, 69, 69, 71, 11, 72, 11, 11, 11, 72, 72, 72, 72, 74, 11, 75, 11, 11, 11, 75, 75, 75, 75, 77, 11, 78, 11, 11, 11, 78, 78, 78, 78, 80, 11, 81, 11, 11, 11, 81, 81, 81, 81, 83, 11, 84, 11, 11, 11, 84, 84, 84, 84, 86, 11, 87, 11, 11, 11, 87, 87, 87, 87, 89, 11, 90, 11, 11, 11, 90, 90, 90, 90, 92, 11, 93, 11, 11, 11, 93, 93, 93, 93, 95, 11, 96, 11, 11, 11, 96, 96, 96, 96, 98, 11, 99, 11, 11, 11, 99, 99, 99, 99, 101, 11, 102, 11, 11, 11, 102, 102, 102, 102, 104, 11, 11, 11, 104, 104, 104, 104, 107, 110, 109, 109, 111, 110, 110, 109, 110, 110, 110, 109, 110, 110, 110, 110, 113, 114, 116, 118, 120, 0, 0, 0, 0, 121, 123, 124, 126, 127, 121, 130, 131, 104, 107, 133, 133, 104, 104, 104, 104, 101, 133, 102, 107, 133, 133, 102, 102, 102, 102, 98, 133, 99, 107, 133, 133, 99, 99, 99, 99, 95, 133, 96, 107, 133, 133, 96, 96, 96, 96, 92, 133, 93, 107, 133, 133, 93, 93, 93, 93, 89, 133, 90, 107, 133, 133, 90, 90, 90, 90, 86, 133, 87, 107, 133, 133, 87, 87, 87, 87, 83, 133, 84, 107, 133, 133, 84, 84, 84, 84, 80, 133, 81, 107, 133, 133, 81, 81, 81, 81, 77, 133, 78, 107, 133, 133, 78, 78, 78, 78, 74, 133, 75, 107, 133, 133, 75, 75, 75, 75, 71, 133, 72, 107, 133, 133, 72, 72, 72, 72, 68, 133, 69, 107, 133, 133, 69, 69, 69, 69, 65, 133, 66, 107, 133, 133, 66, 66, 66, 66, 62, 133, 63, 107, 133, 133, 63, 63, 63, 63, 59, 133, 60, 107, 133, 133, 60, 60, 60, 60, 56, 133, 57, 107, 133, 133, 57, 57, 57, 57, 53, 133, 54, 107, 133, 133, 54, 54, 54, 54, 50, 133, 51, 107, 133, 133, 51, 51, 51, 51, 47, 133, 48, 107, 133, 133, 48, 48, 48, 48, 44, 133, 45, 107, 133, 133, 45, 45, 45, 45, 41, 133, 42, 107, 133, 133, 42, 42, 42, 42, 38, 133, 39, 107, 133, 133, 39, 39, 39, 39, 35, 133, 36, 107, 133, 133, 36, 36, 36, 36, 32, 133, 33, 107, 133, 133, 33, 33, 33, 33, 29, 133, 30, 107, 133, 133, 30, 30, 30, 30, 26, 133, 27, 107, 133, 133, 27, 27, 27, 27, 23, 133, 24, 107, 133, 133, 24, 24, 24, 24, 20, 133, 21, 107, 133, 133, 21, 21, 21, 21, 17, 133, 18, 107, 133, 133, 18, 18, 18, 18, 118, 0, 164, 165, 118, 118, 118, 118, 118, 118, 166, 118, 118, 118, 118, 168, 0, 0, 169, 168, 168, 0, 168, 168, 168, 0, 168, 168, 168, 168, 114, 0, 0, 171, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 173, 0, 174, 175, 173, 173, 0, 173, 173, 173, 176, 173, 173, 173, 173, 124, 0, 178, 179, 124, 124, 124, 124, 124, 124, 180, 124, 124, 124, 124, 124, 0, 178, 179, 124, 124, 124, 124, 124, 181, 180, 124, 124, 124, 124, 183, 0, 184, 185, 183, 183, 124, 183, 183, 183, 180, 183, 183, 183, 183, 127, 0, 187, 188, 127, 127, 127, 127, 127, 127, 189, 127, 127, 127, 127, 127, 0, 187, 188, 127, 127, 127, 127, 127, 191, 189, 127, 127, 127, 127, 183, 0, 184, 185, 183, 183, 127, 183, 183, 183, 189, 183, 183, 183, 183, 194, 0, 195, 196, 194, 194, 0, 194, 194, 194, 0, 194, 194, 194, 194, 131, 0, 198, 199, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 0} +var _urn_index_defaults = []int16{0, 2, 2, 2, 2, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 106, 109, 0, 0, 109, 109, 0, 0, 0, 0, 0, 0, 0, 0, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} +var _urn_cond_targs = []int8{0, 1, 0, 2, 2, 3, 3, 4, 4, 5, 5, 0, 6, 6, 7, 80, 7, 8, 79, 8, 9, 78, 9, 10, 77, 10, 11, 76, 11, 12, 75, 12, 13, 74, 13, 14, 73, 14, 15, 72, 15, 16, 71, 16, 17, 70, 17, 18, 69, 18, 19, 68, 19, 20, 67, 20, 21, 66, 21, 22, 65, 22, 23, 64, 23, 24, 63, 24, 25, 62, 25, 26, 61, 26, 27, 60, 27, 28, 59, 28, 29, 58, 29, 30, 57, 30, 31, 56, 31, 32, 55, 32, 33, 54, 33, 34, 53, 34, 35, 52, 35, 36, 51, 36, 37, 37, 0, 38, 38, 0, 81, 41, 39, 40, 83, 41, 42, 42, 81, 43, 84, 91, 44, 45, 85, 46, 47, 88, 48, 49, 50, 92, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 41, 43, 82, 83, 39, 83, 39, 84, 85, 82, 44, 48, 85, 82, 44, 86, 87, 87, 88, 82, 46, 88, 82, 46, 89, 89, 90, 90, 91, 92, 82, 49, 92, 82, 49, 0} +var _urn_cond_actions = []int8{0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 2, 2, 3, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 4, 4, 5, 6, 6, 3, 7, 0, 0, 0, 6, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 10, 9, 11, 3, 7, 12, 10, 13, 3, 13, 7, 13, 14, 14, 10, 14, 0, 15, 3, 15, 16, 17, 17, 10, 14, 17, 0, 15, 18, 3, 18, 7, 19, 19, 10, 0} +var _urn_eof_trans = []int16{1, 2, 5, 7, 9, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50, 53, 56, 59, 62, 65, 68, 71, 74, 77, 80, 83, 86, 89, 92, 95, 98, 101, 104, 106, 109, 113, 114, 116, 118, 120, 123, 124, 126, 127, 129, 130, 131, 133, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 168, 171, 173, 178, 181, 183, 187, 191, 193, 194, 198, 0} +var urn_start int = 1 +var _ = urn_start +var urn_first_final int = 81 +var _ = urn_first_final +var urn_error int = 0 +var _ = urn_error +var urn_en_main int = 1 +var _ = urn_en_main + +func parse(out *URN, data string) (err error) { + var p, b, cs int + pe := len(data) + eof := len(data) + pct := false + + { + cs = int(urn_start) + + } + { + var _trans uint = 0 + var _keys int + var _inds int + var _ic int + _resume: + { + + } + if p == pe && p != eof { + goto _out + + } + if p == eof { + if _urn_eof_trans[cs] > 0 { + _trans = uint(_urn_eof_trans[cs]) - 1 + + } + + } else { + _keys = int((cs << 1)) + + _inds = int(_urn_index_offsets[cs]) + + if (data[p]) <= 126 && (data[p]) >= 33 { + _ic = int(_urn_char_class[int((data[p]))-33]) + if _ic <= int(_urn_trans_keys[_keys+1]) && _ic >= int(_urn_trans_keys[_keys]) { + _trans = uint(_urn_indices[_inds+int((_ic-int(_urn_trans_keys[_keys])))]) + + } else { + _trans = uint(_urn_index_defaults[cs]) + + } + + } else { + _trans = uint(_urn_index_defaults[cs]) + + } + + } + cs = int(_urn_cond_targs[_trans]) + if _urn_cond_actions[_trans] != 0 { + switch _urn_cond_actions[_trans] { + case 1: + { + err = fmt.Errorf("invalid scheme: %q", data[:p]) + { + p += 1 + goto _out + + } + + } + + case 4: + { + err = errors.New("invalid nid: too long") + { + p += 1 + goto _out + + } + + } + + case 6: + { + err = fmt.Errorf("invalid nss: %q", data[b:p]) + { + p += 1 + goto _out + + } + + } + + case 3: + { + b = p + pct = false + } + + case 10: + { + pct = true + } + + case 5: + { + out.setNID(data[b:p]) + } + + case 9: + { + out.setNSS(data[b:p]) + } + + case 14: + { + out.r = data[b:p] + } + + case 19: + { + out.q = data[b:p] + } + + case 12: + { + out.f = data[b:p] + } + + case 2: + { + err = fmt.Errorf("invalid nid: bad char at pos %d: %+q", p, data[p]) + { + p += 1 + goto _out + + } + + } + { + err = fmt.Errorf("invalid nid: %q", data[b:p]) + { + p += 1 + goto _out + + } + + } + + case 8: + { + err = fmt.Errorf("invalid nid: bad char at pos %d: %+q", p, data[p]) + { + p += 1 + goto _out + + } + + } + { + err = errors.New("invalid nid: too long") + { + p += 1 + goto _out + + } + + } + + case 7: + { + b = p + pct = false + } + { + pct = true + } + + case 13: + { + b = p + pct = false + } + { + out.r = data[b:p] + } + + case 18: + { + b = p + pct = false + } + { + out.q = data[b:p] + } + + case 11: + { + b = p + pct = false + } + { + out.f = data[b:p] + } + + case 16: + { + pct = true + } + { + b = p + pct = false + } + + case 17: + { + out.r = data[b:p] + } + { + out.q = data[b:p] + } + + case 15: + { + out.r = data[b:p] + } + { + b = p + pct = false + } + { + out.q = data[b:p] + } + + } + + } + if p == eof { + if cs >= 81 { + goto _out + + } + + } else { + if cs != 0 { + p += 1 + goto _resume + + } + + } + _out: + { + + } + + } + if err != nil { + return err + } + if p != eof { + return errors.New("invalid nss") + } + _ = pct + return nil +} diff --git a/toolkit/urn/parser.rl b/toolkit/urn/parser.rl new file mode 100644 index 000000000..cdca3bfff --- /dev/null +++ b/toolkit/urn/parser.rl @@ -0,0 +1,87 @@ +package urn + +import "errors" +import "fmt" + +%% machine urn; +%% write data; + +func parse(out *URN, data string) (err error) { + var p, b, cs int + pe := len(data) + eof := len(data) + pct := false + +%%{ + action err_scheme { + err = fmt.Errorf("invalid scheme: %q", data[:p]) + fbreak; + } + action err_nid { + err = fmt.Errorf("invalid nid: %q", data[b:p]) + fbreak; + } + action err_nid_toolong { + err = errors.New("invalid nid: too long") + fbreak; + } + action err_nid_char { + err = fmt.Errorf("invalid nid: bad char at pos %d: %+q", p, data[p]) + fbreak; + } + action err_nss { + err = fmt.Errorf("invalid nss: %q", data[b:p]) + fbreak; + } + + action mark { + b = p + pct = false + } + action set_pct { + pct = true + } + + action set_nid { + out.setNID(data[b:p]) + } + action set_nss { + out.setNSS(data[b:p]) + } + action set_rc { + out.r = data[b:p] + } + action set_qc { + out.q = data[b:p] + } + action set_fc { + out.f = data[b:p] + } + + sub_delims = [!$&'()*+,;=]; + pct_encoded = ('%' xdigit{2}) >set_pct; + unreserved = alnum | [._~] | '-'; + pchar = unreserved | pct_encoded | sub_delims | [:@]; + query = pchar ( pchar | [/?] )*; + NSS = (pchar (pchar | '/')*) >mark %set_nss @err(err_nss); + ldh = (alnum | '-') @err(err_nid_char); + NID = (alnum @err(err_nid_char) ldh{,30} alnum @err(err_nid_char)) >mark %set_nid @err(err_nid); + assigned_name = ([Uu][Rr][Nn] ':') %mark @err(err_scheme) (NID ':' @err(err_nid_toolong)) NSS; + f_component = query* >mark %set_fc; + rq_components = ('?+' query* >mark %set_rc)? ('?=' query* >mark %set_qc)?; + + main := assigned_name rq_components? ('#' f_component)?; + + write init; + write exec; +}%% + + if err != nil { + return err + } + if p != eof { + return errors.New("invalid nss") + } + _=pct + return nil +} diff --git a/toolkit/urn/urn.go b/toolkit/urn/urn.go new file mode 100644 index 000000000..abd7c7b9d --- /dev/null +++ b/toolkit/urn/urn.go @@ -0,0 +1,149 @@ +// Package urn implements RFC 8141, with additional Namespace Specific (NSS) +// handling for claircore's use cases. +package urn + +import ( + "fmt" + "net/url" + "strings" +) + +// URN is an RFC 8141 URN. +type URN struct { + // NID is the namespace ID. + NID string + // NSS is the namespace specific string. + NSS string + // The "R" component is for "resolver" parameters, and have no RFC-defined + // semantics. + r string + // The "Q" component is parameters for the named resource or system. + q string + // The "F" component is for clients, as in RFC 3986. + f string +} + +//go:generate sh ./generate.sh + +// Parse parses the provided string into its components. +// +// The optional "r", "q", and "f" components are not eagerly processed and are +// only checked for syntactical correctness on demand. +func Parse(n string) (u URN, _ error) { + n = escape(n) + if err := parse(&u, n); err != nil { + return u, fmt.Errorf("urn: %w", err) + } + return u, nil +} + +func (u *URN) setNID(s string) { + u.NID = strings.ToLower(s) +} + +func (u *URN) setNSS(s string) { + var n int + // Remap lower-case hex digits to upper-case. + f := func(r rune) rune { + if n != 0 { + if r > 0x60 && r < 0x67 { + r -= 0x20 + } + n-- + } + if r == '%' { + n = 2 + } + return r + } + u.NSS = strings.Map(f, s) +} + +// String returns the normalized URN with optional components. +func (u *URN) String() string { + var b strings.Builder + b.WriteString(`urn:`) + b.WriteString(u.NID) + b.WriteByte(':') + b.WriteString(u.NSS) + if u.r != "" { + r, _ := u.R() + b.WriteString(`?+`) + b.WriteString(r.Encode()) + } + if u.q != "" { + q, _ := u.Q() + b.WriteString(`?=`) + b.WriteString(q.Encode()) + } + if u.f != "" { + b.WriteByte('#') + b.WriteString(u.f) + } + return b.String() +} + +// Normalized returns the normalized URN without optional components. +func (u URN) Normalized() string { return `urn:` + u.NID + `:` + u.NSS } + +// R returns the "r" (`?+`) component. +func (u *URN) R() (url.Values, error) { return url.ParseQuery(u.r) } + +// Q returns the "q" (`?=`) component. +func (u *URN) Q() (url.Values, error) { return url.ParseQuery(u.q) } + +// F returns the "f" (`#`) component. +func (u *URN) F() string { return u.f } + +// Equal checks for equivalence as described in RFC 8141. +func (u *URN) Equal(b *URN) bool { return u.NID == b.NID && u.NSS == b.NSS } + +// Name returns a claircore name. +// +// Reports an error if the URN is not in the "claircore" namespace. +func (u *URN) Name() (Name, error) { + if u.NID != "claircore" { + return Name{}, fmt.Errorf(`urn: wrong nid: %q`, u.NID) + } + + fs := strings.FieldsFunc(u.NSS, isColon) + if len(fs) < 3 { + return Name{}, fmt.Errorf(`urn: bad format for nss: %q`, fs) + } + var n Name + n.System = fs[0] + n.Kind = fs[1] + n.Name = fs[2] + + if u.q != "" { + q, err := u.Q() + if err != nil { + return Name{}, fmt.Errorf(`urn: invalid q-component: %w`, err) + } + for _, x := range []struct { + Key string + Tgt **string + }{ + {"version", &n.Version}, + } { + if vs, ok := q[x.Key]; ok { + (*x.Tgt) = &vs[0] + } + } + } + + return n, nil +} + +func isColon(r rune) bool { + return r == ':' +} + +// Normalize returns the normalized version of the passed URN. +func Normalize(n string) (string, error) { + u, err := Parse(n) + if err != nil { + return "", err + } + return u.Normalized(), nil +} diff --git a/toolkit/urn/urn_test.go b/toolkit/urn/urn_test.go new file mode 100644 index 000000000..0eae39afb --- /dev/null +++ b/toolkit/urn/urn_test.go @@ -0,0 +1,132 @@ +package urn + +import ( + "net/url" + "testing" + + "github.com/google/go-cmp/cmp" +) + +// TODO(hank) Find a conformance test suite and set it up. + +func TestString(t *testing.T) { + tt := []struct { + In URN + Want string + }{ + { + In: URN{NID: "test", NSS: "test"}, + Want: "urn:test:test", + }, + { + In: URN{ + NID: "test", + NSS: "test", + q: url.Values{"a": {"b"}}.Encode(), + }, + Want: "urn:test:test?=a=b", + }, + } + + for _, tc := range tt { + got, want := tc.In.String(), tc.Want + t.Logf("got: %q, want: %q", got, want) + if got != want { + t.Fail() + } + } +} + +func TestParse(t *testing.T) { + opts := cmp.Options{ + cmp.AllowUnexported(URN{}), + } + tt := []struct { + In string + Want URN + }{ + { + In: "urn:claircore:test", + Want: URN{ + NID: "claircore", + NSS: "test", + }, + }, + { + In: "urn:claircore:indexer:package:test?=v=1", + Want: URN{ + NID: "claircore", + NSS: "indexer:package:test", + q: "v=1", + }, + }, + { + In: "urn:claircore:indexer:package:test?+r=1#f", + Want: URN{ + NID: "claircore", + NSS: "indexer:package:test", + r: "r=1", + f: "f", + }, + }, + { + In: "urn:test:tes%74", + Want: URN{ + NID: "test", + NSS: "tes%74", + }, + }, + { + In: "urn:test:%3b", + Want: URN{ + NID: "test", + NSS: "%3B", + }, + }, + } + for _, tc := range tt { + t.Logf("parse: %q", tc.In) + got, err := Parse(tc.In) + if err != nil { + t.Errorf("in: %q, error: %v", tc.In, err) + continue + } + want := tc.Want + t.Logf("got: %#v %q", got, got.String()) + t.Logf("want: %#v %q", want, want.String()) + // Compare pointers to test RFC 8141 equality. + if !cmp.Equal(&got, &want) { + t.Error(cmp.Diff(&got, &want)) + } + // Compare values to test Go equality. + if !cmp.Equal(got, want, opts) { + t.Error(cmp.Diff(got, want, opts)) + } + } +} + +func TestNormalized(t *testing.T) { + tt := []struct { + In string + Want string + }{ + { + In: "urn:claircore:indexer:package:test?=v=1", + Want: "urn:claircore:indexer:package:test", + }, + } + for _, tc := range tt { + t.Logf("parse: %q", tc.In) + got, err := Normalize(tc.In) + if err != nil { + t.Errorf("in: %q, error: %v", tc.In, err) + continue + } + want := tc.Want + t.Logf("got: %#v", got) + t.Logf("want: %#v", want) + if !cmp.Equal(got, want) { + t.Error(cmp.Diff(got, want)) + } + } +}