diff --git a/nquad/nquad.go b/nquad/nquad.go new file mode 100644 index 0000000..93fcb18 --- /dev/null +++ b/nquad/nquad.go @@ -0,0 +1,248 @@ +// Copyright (c) 2023 Z5Labs and Contributors +// +// This software is released under the MIT License. +// https://opensource.org/licenses/MIT + +package nquad + +import ( + "encoding/base64" + "errors" + "fmt" + "strconv" + "strings" + "unicode/utf8" + + rdfpb "github.com/z5labs/rdf/proto" +) + +// Marshaler represents any type which can +// convert itself into a NQuad representation. +type Marshaler interface { + MarshalNQuad() ([]byte, error) +} + +// Unmarshaler represents any type which can +// construct itself from a NQuad representation. +type Unmarshaler interface { + UnmarshalNQuad(data []byte) error +} + +// Triple is a type alias for the underlying +// RDF Triple protobuf message which implements +// the Marshaler and Unmarshaler interfaces. +type Triple rdfpb.Triple + +// MarshalNQuad implements the Marshaler interface. +func (t *Triple) MarshalNQuad() ([]byte, error) { + var m marshaler + return m.marshal((*rdfpb.Triple)(t)) +} + +type marshaler struct { + buf []byte +} + +func (m *marshaler) marshal(t *rdfpb.Triple) ([]byte, error) { + switch x := t.Subject.Value.(type) { + case *rdfpb.Subject_BlankNode: + m.marshalBlankNode(x.BlankNode) + case *rdfpb.Subject_Iri: + m.marshalIri(x.Iri) + } + m.buf = append(m.buf, " "...) + m.marshalIri(t.Predicate) + m.buf = append(m.buf, " "...) + switch x := t.Object.Value.(type) { + case *rdfpb.Object_BlankNode: + m.marshalBlankNode(x.BlankNode) + case *rdfpb.Object_Iri: + m.marshalIri(x.Iri) + case *rdfpb.Object_Literal: + m.marshalLiteral(x.Literal) + } + m.buf = append(m.buf, " ."...) + return m.buf, nil +} + +func (m *marshaler) marshalBlankNode(s string) { + m.buf = append(m.buf, "_:"...) + m.buf = append(m.buf, s...) +} + +func (m *marshaler) marshalIri(s string) { + m.buf = append(m.buf, "<"...) + m.buf = append(m.buf, s...) + m.buf = append(m.buf, ">"...) +} + +func (m *marshaler) marshalLiteral(lit *rdfpb.Literal) { + m.buf = append(m.buf, "\""...) + switch x := lit.Value.(type) { + case *rdfpb.Literal_String_: + m.buf = append(m.buf, x.String_...) + case *rdfpb.Literal_Int: + m.buf = append(m.buf, strconv.FormatInt(x.Int, 10)...) + case *rdfpb.Literal_Float64: + m.buf = append(m.buf, strconv.FormatFloat(x.Float64, 'f', -1, 64)...) + case *rdfpb.Literal_Bool: + m.buf = append(m.buf, strconv.FormatBool(x.Bool)...) + case *rdfpb.Literal_Bytes: + base64.StdEncoding.Encode(m.buf, x.Bytes) + } + m.buf = append(m.buf, "\""...) +} + +// UnmarshalNQuad implements the Unmarshaler interface. +func (t *Triple) UmarshalNQuad(data []byte) error { + if len(data) == 0 { + return nil + } + s := string(data) + terms := strings.Split(s, " ") + if len(terms) > 4 { + return errors.New("nquad statement can have a max of 4 terms in it") + } + u := unmarshaler{ + terms: terms, + } + return u.unmarshal(t) +} + +type unmarshaler struct { + terms []string +} + +type stateAction func(*unmarshaler, *Triple) (stateAction, error) + +func (u *unmarshaler) unmarshal(triple *Triple) (err error) { + for next := unmarshalSubject; next != nil; { + next, err = next(u, triple) + if err != nil { + return err + } + } + return nil +} + +func unmarshalSubject(u *unmarshaler, triple *Triple) (stateAction, error) { + s := u.peek() + r, _ := utf8.DecodeRuneInString(s) + switch r { + case '<': + iri, err := u.unmarshalIri() + if err != nil { + return nil, err + } + triple.Subject = &rdfpb.Subject{ + Value: &rdfpb.Subject_Iri{ + Iri: iri, + }, + } + case '_': + blankNode, err := u.unmarshalBlankNode() + if err != nil { + return nil, err + } + triple.Subject = &rdfpb.Subject{ + Value: &rdfpb.Subject_BlankNode{ + BlankNode: blankNode, + }, + } + default: + return nil, fmt.Errorf("unexpected starting character for subject: %q", r) + } + return unmarshalPredicate, nil +} + +func unmarshalPredicate(u *unmarshaler, triple *Triple) (stateAction, error) { + s := u.peek() + if len(s) == 0 { + return nil, errors.New("missing predicate") + } + iri, err := u.unmarshalIri() + if err != nil { + return nil, err + } + triple.Predicate = iri + return unmarshalObject, nil +} + +func unmarshalObject(u *unmarshaler, triple *Triple) (stateAction, error) { + s := u.peek() + if len(s) == 0 { + return nil, errors.New("missing object") + } + r, _ := utf8.DecodeRuneInString(s) + switch r { + case '<': + iri, err := u.unmarshalIri() + if err != nil { + return nil, err + } + triple.Object = &rdfpb.Object{ + Value: &rdfpb.Object_Iri{ + Iri: iri, + }, + } + case '_': + blankNode, err := u.unmarshalBlankNode() + if err != nil { + return nil, err + } + triple.Object = &rdfpb.Object{ + Value: &rdfpb.Object_BlankNode{ + BlankNode: blankNode, + }, + } + default: + lit, err := u.unmarshalLiteral() + if err != nil { + return nil, err + } + triple.Object = &rdfpb.Object{ + Value: &rdfpb.Object_Literal{ + Literal: lit, + }, + } + } + return unmarshalEndOfStatement, nil +} + +func unmarshalEndOfStatement(u *unmarshaler, triple *Triple) (stateAction, error) { + s, ok := u.next() + if !ok || s != "." { + return nil, errors.New("nquad statement should end with '.'") + } + return nil, nil +} + +func (u *unmarshaler) unmarshalIri() (string, error) { + s, _ := u.next() + return strings.Trim(s, "<>"), nil +} + +func (u *unmarshaler) unmarshalBlankNode() (string, error) { + s, _ := u.next() + return strings.TrimPrefix(s, "_:"), nil +} + +func (u *unmarshaler) unmarshalLiteral() (*rdfpb.Literal, error) { + return nil, nil +} + +func (u *unmarshaler) peek() string { + if len(u.terms) == 0 { + return "" + } + return u.terms[0] +} + +func (u *unmarshaler) next() (string, bool) { + if len(u.terms) == 0 { + return "", false + } + s := u.terms[0] + u.terms = u.terms[1:] + return s, true +} diff --git a/nquad/nquad_test.go b/nquad/nquad_test.go new file mode 100644 index 0000000..32e5e60 --- /dev/null +++ b/nquad/nquad_test.go @@ -0,0 +1,133 @@ +// Copyright (c) 2023 Z5Labs and Contributors +// +// This software is released under the MIT License. +// https://opensource.org/licenses/MIT + +package nquad + +import ( + "testing" + + "github.com/stretchr/testify/assert" + rdfpb "github.com/z5labs/rdf/proto" +) + +func TestMarshalThenUnmarshalIdentity(t *testing.T) { + testCases := []struct { + Name string + Triple *rdfpb.Triple + }{ + { + Name: "BlankNodeToBlankNode", + Triple: &rdfpb.Triple{ + Subject: &rdfpb.Subject{ + Value: &rdfpb.Subject_BlankNode{ + BlankNode: "bob", + }, + }, + Predicate: "knows", + Object: &rdfpb.Object{ + Value: &rdfpb.Object_BlankNode{ + BlankNode: "alice", + }, + }, + }, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.Name, func(t *testing.T) { + srcTriple := (*Triple)(testCase.Triple) + b, err := srcTriple.MarshalNQuad() + if !assert.Nil(t, err) { + return + } + + var uTriple Triple + err = uTriple.UmarshalNQuad(b) + if !assert.Nil(t, err) { + return + } + + if !assert.Equal(t, srcTriple, &uTriple) { + return + } + }) + } +} + +func TestUnmarshalThenMarshalIdentity(t *testing.T) { + testCases := []struct { + Name string + NQuad string + }{ + { + Name: "BlankNodeToBlankNode", + NQuad: "_:bob _:alice .", + }, + } + + for _, testCase := range testCases { + t.Run(testCase.Name, func(t *testing.T) { + var triple Triple + err := triple.UmarshalNQuad([]byte(testCase.NQuad)) + if !assert.Nil(t, err) { + return + } + + b, err := triple.MarshalNQuad() + if !assert.Nil(t, err) { + return + } + + if !assert.Equal(t, testCase.NQuad, string(b)) { + return + } + }) + } +} + +func BenchmarkTriple_MarshalNQuad(b *testing.B) { + triple := &Triple{ + Subject: &rdfpb.Subject{ + Value: &rdfpb.Subject_BlankNode{ + BlankNode: "bob", + }, + }, + Predicate: "knows", + Object: &rdfpb.Object{ + Value: &rdfpb.Object_BlankNode{ + BlankNode: "alice", + }, + }, + } + + for i := 0; i < b.N; i++ { + buf, err := triple.MarshalNQuad() + if err != nil { + b.Error(err) + return + } + if len(buf) == 0 { + b.Fail() + return + } + } +} + +func BenchmarkTriple_UnmarshalNQuad(b *testing.B) { + nquad := []byte("_:bob _:alice .") + + for i := 0; i < b.N; i++ { + var triple Triple + err := triple.UmarshalNQuad(nquad) + if err != nil { + b.Error(err) + return + } + if triple.Subject == nil || triple.Predicate == "" || triple.Object == nil { + b.Fail() + return + } + } +}