diff --git a/README.md b/README.md index a36919d..769bda8 100644 --- a/README.md +++ b/README.md @@ -1,95 +1,210 @@ -# Gomme +
+ + + + + +
-Gomme is a parser combinator library for the Go programming language. -It provides a toolkit for developers to build reliable, fast, flexible, and easy-to-develop and maintain parsers -for both textual and binary formats. It extensively uses the recent introduction of Generics in the Go programming -language to offer flexibility in how combinators can be mixed and matched to produce the desired output while -providing as much compile-time type safety as possible. +Gomme is a library that simplifies building parsers in Go. -## Why would you want to use Gomme? +Inspired by Rust's renowned `nom` crate, Gomme provides a developer-friendly toolkit that allows you to quickly and easily create reliable parsers for both textual and binary formats. -Parser combinators arguably come with a somewhat steep learning curve, but they are a very powerful tool for parsing textual and binary formats. We believe that the benefits of parser combinators outweigh the cost of learning them, and that's why we built Gomme. Our intuition is that most of the cost of learning them is due to the lack of good documentation and examples, and that's why we are trying to provide a comprehensive documentation and a large set of examples. +With the power of Go's newly introduced Generics, Gomme gives you the flexibility to design your own parsers while ensuring optimal compile-time type safety. Whether you're a seasoned developer or just starting out, Gomme is designed to make the process of building parsers efficient, enjoyable, and less intimidating. -In practice we have found that parser combinators are very intuitive and flexible, and can be used to build parsers for a wide range of formats. They are also very easy to test, and can be used to build parsers that are very easy to maintain and extend. We have also found that parser combinators are very fast, and can be used to build parsers that can turn out as fast as hand-written parsers. -## Table of Content +## Table of content - -- [Example](#example) +- [Getting started](#getting-started) +- [Why Gomme?](#why-gomme) +- [Examples](#examples) - [Documentation](#documentation) +- [Table of content](#table-of-content-1) +- [Documentation](#documentation-1) - [Installation](#installation) -- [FAQ](#faq) +- [Guide](#guide) + - [List of combinators](#list-of-combinators) + - [Base combinators](#base-combinators) + - [Bytes combinators](#bytes-combinators) + - [Character combinators](#character-combinators) + - [Combinators for Sequences](#combinators-for-sequences) + - [Combinators for Applying Parsers Many Times](#combinators-for-applying-parsers-many-times) + - [Combinators for Choices](#combinators-for-choices) +- [Installation](#installation-1) +- [Frequently asked questions](#frequently-asked-questions) + - [Q: What are parser combinators?](#q-what-are-parser-combinators) + - [Q: Why would I use parser combinators instead of a specific parser?](#q-why-would-i-use-parser-combinators-instead-of-a-specific-parser) + - [Q: Where can I learn more about parser combinators?](#q-where-can-i-learn-more-about-parser-combinators) - [Acknowledgements](#acknowledgements) - [Authors](#authors) -## Example -Here's an example of how to parse [hexadecimal color codes](https://developer.mozilla.org/en-US/docs/Web/CSS/color), using the Gomme library: +## Getting started + +Here's how to quickly parse [hexadecimal color codes](https://developer.mozilla.org/en-US/docs/Web/CSS/color) using Gomme: ```golang // RGBColor stores the three bytes describing a color in the RGB space. type RGBColor struct { - red uint8 - green uint8 - blue uint8 + red uint8 + green uint8 + blue uint8 } // ParseRGBColor creates a new RGBColor from a hexadecimal color string. -// The string must be a six digit hexadecimal number, prefixed with a "#". +// The string must be a six-digit hexadecimal number, prefixed with a "#". func ParseRGBColor(input string) (RGBColor, error) { - parser := gomme.Preceded( - gomme.Token[string]("#"), - gomme.Map( - gomme.Count(HexColorComponent(), 3), - func(components []uint8) (RGBColor, error) { - return RGBColor{components[0], components[1], components[2]}, nil - }, - ), - ) - - result := parser(input) - if result.Err != nil { - return RGBColor{}, result.Err - } - - return result.Output, nil + parser := gomme.Preceded( + gomme.Token[string]("#"), + gomme.Map( + gomme.Count(HexColorComponent(), 3), + func(components []uint8) (RGBColor, error) { + return RGBColor{components[0], components[1], components[2]}, nil + }, + ), + ) + + result := parser(input) + if result.Err != nil { + return RGBColor{}, result.Err + } + + return result.Output, nil } // HexColorComponent produces a parser that parses a single hex color component, -// which is a two digit hexadecimal number. +// which is a two-digit hexadecimal number. func HexColorComponent() gomme.Parser[string, uint8] { - return func(input string) gomme.Result[uint8, string] { - return gomme.Map( - gomme.TakeWhileMN[string](2, 2, gomme.IsHexDigit), - fromHex, - )(input) - } + return func(input string) gomme.Result[uint8, string] { + return gomme.Map( + gomme.TakeWhileMN[string](2, 2, gomme.IsHexDigit), + fromHex, + )(input) + } } -// fromHex converts a two digits hexadecimal number to its decimal value. +// fromHex converts two digits hexadecimal numbers to their decimal value. func fromHex(input string) (uint8, error) { - res, err := strconv.ParseInt(input, 16, 16) - if err != nil { - return 0, err - } + res, err := strconv.ParseInt(input, 16, 16) + if err != nil { + return 0, err + } - return uint8(res), nil + return uint8(res), nil } ``` -More examples can be found in the [examples](./examples) directory. +It's as simple as that! Feel free to explore more in the [examples](examples/) directory. + +## Why Gomme? + +While it's true that learning parser combinators might initially seem daunting, their power, flexibility, and efficiency make them an invaluable tool for parsing textual and binary formats. We've created Gomme with a focus on making this learning curve as smooth as possible, providing clear documentation and a wide array of examples. +Once you get the hang of it, you'll find that Gomme's parser combinators are intuitive, adaptable, and perfect for quickly building parsers for various formats. They're easy to test and maintain, and they can help you create parsers that are as fast as their hand-written counterparts. + +## Examples + +See Gomme in action with these handy examples: +- [Parsing a simple CSV file](./examples/csv) +- [Parsing Redis' RESP protocol](./examples/redis) +- [Parsing hexadecimal color codes](./examples/hexcolor) ## Documentation -[Documentation](https://linktodocumentation) +For more detailled information, refer to the official [documentation](https://pkg.go.dev/github.com/oleiade/gomme). +## Table of content +## Documentation + +[Documentation](https://pkg.go.dev/github.com/oleiade/gomme) + +## Installation + +```bash +go get github.com/oleiade/gomme +``` + +## Guide + +In this guide, we provide a detailed overview of the various combinators available in Gomme. Combinators are fundamental building blocks in parser construction, each designed for a specific task. By combining them, you can create complex parsers suited to your specific needs. For each combinator, we've provided a brief description and a usage example. Let's explore! + +### List of combinators + +#### Base combinators + +| Combinator | Description | Example | +| :------------------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :----------------------------------------------------------------- | +| [`Map`](https://pkg.go.dev/github.com/oleiade/gomme#Map) | Applies a function to the result of the provided parser, allowing you to transform the parser's result. | `Map(Digit1(), func(s string)int { return 123 })` | +| [`Optional`](https://pkg.go.dev/github.com/oleiade/gomme#Optional) | Makes a parser optional. If unsuccessful, the parser returns a nil `Result.Output`.Output`. | `Optional(CRLF())` | +| [`Peek`](https://pkg.go.dev/github.com/oleiade/gomme#Peek) | Applies the provided parser without consuming the input. | | +| [`Recognize`](https://pkg.go.dev/github.com/oleiade/gomme#Recognize) | Returns the consumed input as the produced value when the provided parser is successful. | `Recognize(SeparatedPair(Token("key"), Char(':'), Token("value"))` | +| [`Assign`](https://pkg.go.dev/github.com/oleiade/gomme#Assign) | Returns the assigned value when the provided parser is successful. | `Assign(true, Token("true"))` | + +#### Bytes combinators + +| Combinator | Description | Example | +| :----------------------------------------------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------------------------------ | +| [`Take`](https://pkg.go.dev/github.com/oleiade/gomme#Take) | Parses the first N elements of the input. | `Take(5)` | +| [`TakeUntil`](https://pkg.go.dev/github.com/oleiade/gomme#TakeUntil) | Parses the input until the provided parser argument succeeds. | `TakeUntil(CRLF()))` | +| [`TakeWhileMN`](https://pkg.go.dev/github.com/oleiade/gomme#TakeWhileMN) | Parses the longest input slice fitting the length expectation (m <= input length <= n) and matching the predicate. The parser argument is a function taking a `rune` as input and returning a `bool`. | `TakeWhileMN(2, 6, gomme.isHexDigit)` | +| [`Token`](https://pkg.go.dev/github.com/oleiade/gomme#Token) | Recognizes a specific pattern. Compares the input with the token's argument and returns the matching part. | `Token("tolkien")` | + +#### Character combinators + +| Combinator | Description | Example | +| :--- | :--- | :--- | +| [`Char`](https://pkg.go.dev/github.com/oleiade/gomme#Char) | Parses a single instance of a provided character. | `Char('$')` | +| [`AnyChar`](https://pkg.go.dev/github.com/oleiade/gomme#AnyChar) | Parses a single instance of any character. | `AnyChar()` | +| [`Alpha0`](https://pkg.go.dev/github.com/oleiade/gomme#Alpha0) | Parses zero or more alphabetical ASCII characters (case insensitive). | `Alpha0()` | +| [`Alpha1`](https://pkg.go.dev/github.com/oleiade/gomme#Alpha1) | Parses one or more alphabetical ASCII characters (case insensitive). | `Alpha1()` | +| [`Alphanumeric0`](https://pkg.go.dev/github.com/oleiade/gomme#Alphanumeric0) | Parses zero or more alphabetical and numerical ASCII characters (case insensitive). | `Alphanumeric0()` | +| [`Alphanumeric1`](https://pkg.go.dev/github.com/oleiade/gomme#Alphanumeric1) | Parses one or more alphabetical and numerical ASCII characters (case insensitive). | `Alphanumeric1()` | +| [`Digit0`](https://pkg.go.dev/github.com/oleiade/gomme#Digit0) | Parses zero or more numerical ASCII characters: 0-9. | `Digit0()` | +| [`Digit1`](https://pkg.go.dev/github.com/oleiade/gomme#Digit1) | Parses one or more numerical ASCII characters: 0-9. | `Digit1()` | +| [`HexDigit0`](https://pkg.go.dev/github.com/oleiade/gomme#HexDigit0) | Parses zero or more hexadecimal ASCII characters (case insensitive). | `HexDigit0()` | +| [`HexDigit1`](https://pkg.go.dev/github.com/oleiade/gomme#HexDigit1) | Parses one or more hexadecimal ASCII characters (case insensitive). | `HexDigit1()` | +| [`Whitespace0`](https://pkg.go.dev/github.com/oleiade/gomme#Whitespace0) | Parses zero or more whitespace ASCII characters: space, tab, carriage return, line feed. | `Whitespace0()` | +| [`Whitespace1`](https://pkg.go.dev/github.com/oleiade/gomme#Whitespace1) | Parses one or more whitespace ASCII characters: space, tab, carriage return, line feed. | `Whitespace1()` | +| [`LF`](https://pkg.go.dev/github.com/oleiade/gomme#LF) | Parses a single new line character '\n'. | `LF()` | +| [`CRLF`](https://pkg.go.dev/github.com/oleiade/gomme#CRLF) | Parses a '\r\n' string. | `CRLF()` | +| [`OneOf`](https://pkg.go.dev/github.com/oleiade/gomme#OneOf) | Parses one of the provided characters. Equivalent to using `Alternative` over a series of `Char` parsers. | `OneOf('a', 'b' , 'c')` | +| [`Satisfy`](https://pkg.go.dev/github.com/oleiade/gomme#Satisfy) | Parses a single character, asserting that it matches the provided predicate. The predicate function takes a `rune` as input and returns a `bool`. `Satisfy` is useful for building custom character matchers. | `Satisfy(func(c rune)bool { return c == '{' || c == '[' })` | +| [`Space`](https://pkg.go.dev/github.com/oleiade/gomme#Space) | Parses a single space character ' '. | `Space()` | +| [`Tab`](https://pkg.go.dev/github.com/oleiade/gomme#Tab) | Parses a single tab character '\t'. | `Tab()` | +| [`Int64`](https://pkg.go.dev/github.com/oleiade/gomme#Int64) | Parses an `int64` from its textual representation. | `Int64()` | +| [`Int8`](https://pkg.go.dev/github.com/oleiade/gomme#Int8) | Parses an `int8` from its textual representation. | `Int8()` | +| [`UInt8`](https://pkg.go.dev/github.com/oleiade/gomme#UInt8) | Parses a `uint8` from its textual representation. | `UInt8()` | + +#### Combinators for Sequences + +| Combinator | Description | Example | +| :--- | :--- | :--- | +| [`Preceded`](https://pkg.go.dev/github.com/oleiade/gomme#Preceded) | Applies the prefix parser and discards its result. It then applies the main parser and returns its result. It discards the prefix value. It proves useful when looking for data prefixed with a pattern. For instance, when parsing a value, prefixed with its name. | `Preceded(Token("name:"), Alpha1())` | +| [`Terminated`](https://pkg.go.dev/github.com/oleiade/gomme#Terminated) | Applies the main parser, followed by the suffix parser whom it discards the result of, and returns the result of the main parser. Note that if the suffix parser fails, the whole operation fails, regardless of the result of the main parser. It proves useful when looking for suffixed data while not interested in retaining the suffix value itself. For instance, when parsing a value followed by a control character. | `Terminated(Digit1(), LF())` | +| [`Delimited`](https://pkg.go.dev/github.com/oleiade/gomme#Delimited) | Applies the prefix parser, the main parser, followed by the suffix parser, discards the result of both the prefix and suffix parsers, and returns the result of the main parser. Note that if any of the prefix or suffix parsers fail, the whole operation fails, regardless of the result of the main parser. It proves useful when looking for data surrounded by patterns helping them identify it without retaining its value. For instance, when parsing a value, prefixed by its name and followed by a control character. | `Delimited(Tag("name:"), Digit1(), LF())` | +| [`Pair`](https://pkg.go.dev/github.com/oleiade/gomme#Pair) | Applies two parsers in a row and returns a pair container holding both their result values. | `Pair(Alpha1(), Tag("cm"))` | +| [`SeparatedPair`](https://pkg.go.dev/github.com/oleiade/gomme#SeparatedPair) | Applies a left parser, a separator parser, and a right parser discards the result of the separator parser, and returns the result of the left and right parsers as a pair container holding the result values. | `SeparatedPair(Alpha1(), Tag(":"), Alpha1())` | +| [`Sequence`](https://pkg.go.dev/github.com/oleiade/gomme#Sequence) | Applies a sequence of parsers sharing the same signature. If any of the provided parsers fail, the whole operation fails. | `Sequence(SeparatedPair(Tag("name"), Char(':'), Alpha1()), SeparatedPair(Tag("height"), Char(':'), Digit1()))` | + +#### Combinators for Applying Parsers Many Times + +| Combinator | Description | Example | +| :--- | :--- | :--- | +| [`Count`](https://pkg.go.dev/github.com/oleiade/gomme#Count) | Applies the provided parser `count` times. If the parser fails before it can be applied `count` times, the operation fails. It proves useful whenever one needs to parse the same pattern many times in a row. | `Count(3, OneOf('a', 'b', 'c'))` | +| [`Many0`](https://pkg.go.dev/github.com/oleiade/gomme#Many0) | Keeps applying the provided parser until it fails and returns a slice of all the results. Specifically, if the parser fails to match, `Many0` still succeeds, returning an empty slice of results. It proves useful when trying to consume a repeated pattern, regardless of whether there's any match, like when trying to parse any number of whitespaces in a row. | `Many0(Char(' '))` | +| [`Many1`](https://pkg.go.dev/github.com/oleiade/gomme#Many1) | Keeps applying the provided parser until it fails and returns a slice of all the results. If the parser fails to match at least once, `Many1` fails. It proves useful when trying to consume a repeated pattern, like any number of whitespaces in a row, ensuring that it appears at least once. | `Many1(LF())` | +| [`SeparatedList0`](https://pkg.go.dev/github.com/oleiade/gomme#SeparatedList0) | | | +| [`SeparatedList1`](https://pkg.go.dev/github.com/oleiade/gomme#SeparatedList1) | | | + +#### Combinators for Choices + +| Combinator | Description | Example | +| :--- | :--- | :--- | +| [`Alternative`](https://pkg.go.dev/github.com/oleiade/gomme#Alternative) | Tests a list of parsers, one by one, until one succeeds. Note that all parsers must share the same signature (`Parser[I, O]`). | `Alternative(Token("abc"), Token("123"))` | ## Installation @@ -100,30 +215,28 @@ Add the library to your Go project with the following command: go get github.com/oleiade/gomme@latest ``` -## FAQ +## Frequently asked questions -#### What are parser combinators? +### Q: What are parser combinators? -Parser combinators are a programming paradigm for building parsers. As opposed to hand-written or generated parser, they adopt a functional programming approach to parsing, and are based on the idea of composing parsers together to build more complex parsers. What that means in practice, is that instead of writing a parser that parses a whole format, by analyzing and branching based on each characters of your input, you write a set of parsers that parse the smallest possible unit of the format, and then compose them together to build more complex parsers. +**A**: Parser combinators offer a new way of building parsers. Instead of writing a complex parser that analyzes an entire format, you create small, simple parsers that handle the smallest units of the format. These small parsers can then be combined to build more complex parsers. It's a bit like using building blocks to construct whatever structure you want. -A key concept to understand is that parser combinators are not parsers themselves, but rather a toolkit that allows you to build parsers. This is why parser combinators are often referred to as a "parser building toolkit". Parser combinator generally are functions producing other functions ingesting some input byte by byte based on some predicate, and returning a result. The result is a structure containing the output of the parser, the remaining part (once the combinator's predicate is not matched anymore, it stops and returns both what it "consumed", and what was left of the input), and an error if the parser failed to parse the input. The output of the parser is the result of the parsing process, and can be of any type. The error is a Go error, and can be used to provide more information about the parsing failure. +### Q: Why would I use parser combinators instead of a specific parser? -#### Why would I want to use parser combinators, and not write my own specific parser? +**A**: Parser combinators are incredibly flexible and intuitive. Once you're familiar with them, they enable you to quickly create, maintain, and modify parsers. They offer you a high degree of freedom in designing your parser and how it's used. -Parser combinators are very flexible, and once you get a good hang on them, they'll allow you to write parsers that are very easy to maintain, modify and extend very easily, and very fast. They are also allegedly quite intuitive, and descriptive of what the underlying data format they parse looks like. Because they're essentially a bunch of functions, generating other functions, composed in various ways depending on the need, they afford you a lot of freedom in how you want to build your specific parser, and how you want to use it. -#### Where can I read/watch about Parser Combinators? +### Q: Where can I learn more about parser combinators? -We recommend the following resources: +A: Here are some resources we recommend: - [You could have invented parser combinators](https://theorangeduck.com/page/you-could-have-invented-parser-combinators) - [Functional Parsing](https://www.youtube.com/watch?v=dDtZLm7HIJs) - [Building a Mapping Language in Go with Parser Combinators](https://www.youtube.com/watch?v=JiViND-bpmw) ## Acknowledgements -We can frankly take close to zero credit for this library, apart for the work put into assembling the already existing elements of theory and implementation into a single autonomous project. - -This library relies heavily on the whole theorical work done in the parser combinators space. From the implementation side of things, it was specifically started with the intention to have something similar to Rust's incredible [nom](https://github.com/Geal/nom) library in Go. This project was made possible by the pre-existing implementation of some parser combinators in [benthos'](https://github.com/benthosdev/benthos) blob lang implementation. Although the end-result is somewhat different from it, this project wouldn't have been possible without this pre-existing resource as a guiding example. +We can frankly take close to zero credit for this library, apart from work put into assembling the already existing elements of theory and implementation into a single autonomous project. +We've stood on the shoulders of giants to create Gomme. The library draws heavily on the extensive theoretical work done in the parser combinators space, and we owe a huge debt to Rust's [nom](https://github.com/Geal/nom) and [benthos'](https://github.com/benthosdev/benthos) blob lang implementation. Our goal was to consolidate these diverse elements into a single, easy-to-use Go library. ## Authors - [@oleiade](https://github.com/oleiade) diff --git a/TODO.md b/TODO.md index daafd1d..3702f22 100644 --- a/TODO.md +++ b/TODO.md @@ -4,11 +4,14 @@ - [ ] Create `Bytes` interface type for bytes file's content - [ ] Create `String` or `Characters` interface type for characters file's content -- [ ] Add Examples -- [ ] Document Recognize as explicitly as possible -- [ ] Add an `ErrInfiniteLoop` (`Many0`) - [ ] Sort Out Fatal/Non-Fatal errors (distinguish whether a parser failed in an expected manner, or if the whole parsing should be interrupted) - [ ] Reduce Int8/Int64 allocations (their parsers could be somewhat simplified?) +- [ ] Add combinator to parse whitespace (+ helper for multispace0/1?) +- [ ] Refactor TakeWhileOneOf to be "just" TakeWhile +- [ ] Refactor space to be of the form space0 and space1 +- [ ] Rename `LF` to `Newline` +- [X] Document Recognize as explicitly as possible +- [X] Add Examples - [x] Add Benchmarks - [x] Make sure the Failure messages are properly cased - [x] Rename `p` parser arguments to `parse` for clearer code @@ -20,9 +23,13 @@ - [ ] Rename `Preceded` to `Prefixed` - [ ] Rename `Terminated` to `Suffixed` - [ ] Rename `Sequence` to `List`? -- [ ] Introduce `SeparatedList` as a result of previous? - [ ] Rename `Satisfy` to `Satisfies`? +- [X] Introduce `SeparatedList` as a result of previous? +- [X] Create `bytes.go` file to distinguish from characters ## Track - [ ] Chase allocations, document them, and reduce their amount as much as possible + +## NoNos +- [X] Add an `ErrInfiniteLoop` (`Many0`) \ No newline at end of file diff --git a/bytes.go b/bytes.go new file mode 100644 index 0000000..86be43e --- /dev/null +++ b/bytes.go @@ -0,0 +1,99 @@ +package gomme + +import ( + "fmt" + "strings" +) + +// Take returns a subset of the input of size `count`. +func Take[I Bytes](count uint) Parser[I, I] { + return func(input I) Result[I, I] { + if len(input) == 0 && count > 0 { + return Failure[I, I](NewError(input, "TakeUntil"), input) + } + + if uint(len(input)) < count { + return Failure[I, I](NewError(input, "Take"), input) + } + + return Success(input[:count], input[count:]) + } +} + +// TakeUntil parses any number of characters until the provided parser is successful. +// If the provided parser is not successful, the parser fails, and the entire input is +// returned as the Result's Remaining. +func TakeUntil[I Bytes, O any](parse Parser[I, O]) Parser[I, I] { + return func(input I) Result[I, I] { + if len(input) == 0 { + return Failure[I, I](NewError(input, "TakeUntil"), input) + } + + pos := 0 + for ; pos < len(input); pos++ { + current := input[pos:] + res := parse(current) + if res.Err == nil { + return Success(input[:pos], input[pos:]) + } + + continue + } + + return Failure[I, I](NewError(input, "TakeUntil"), input) + } +} + +// TakeWhileMN returns the longest input subset that matches the predicates, within +// the boundaries of `atLeast` <= len(input) <= `atMost`. +// +// If the provided parser is not successful or the pattern is out of the +// `atLeast` <= len(input) <= `atMost` range, the parser fails, and the entire +// input is returned as the Result's Remaining. +func TakeWhileMN[I Bytes](atLeast, atMost uint, predicate func(rune) bool) Parser[I, I] { + return func(input I) Result[I, I] { + if len(input) == 0 { + return Failure[I, I](NewError(input, "TakeWhileMN"), input) + } + + // Input is shorter than the minimum expected matching length, + // it is thus not possible to match it within the established + // constraints. + if uint(len(input)) < atLeast { + return Failure[I, I](NewError(input, "TakeWhileMN"), input) + } + + lastValidPos := 0 + for idx := 0; idx < len(input); idx++ { + if uint(idx) == atMost { + break + } + + matched := predicate(rune(input[idx])) + if !matched { + if uint(idx) < atLeast { + return Failure[I, I](NewError(input, "TakeWhileMN"), input) + } + + return Success(input[:idx], input[idx:]) + } + + lastValidPos++ + } + + return Success(input[:lastValidPos], input[lastValidPos:]) + } +} + +// Token parses a token from the input, and returns the part of the input that +// matched the token. +// If the token could not be found, the parser returns an error result. +func Token[I Bytes](token string) Parser[I, I] { + return func(input I) Result[I, I] { + if !strings.HasPrefix(string(input), token) { + return Failure[I, I](NewError(input, fmt.Sprintf("Token(%s)", token)), input) + } + + return Success(input[:len(token)], input[len(token):]) + } +} diff --git a/bytes_test.go b/bytes_test.go new file mode 100644 index 0000000..4d63c66 --- /dev/null +++ b/bytes_test.go @@ -0,0 +1,449 @@ +package gomme + +import ( + "fmt" + "testing" +) + +func TestTake(t *testing.T) { + t.Parallel() + + type args struct { + p Parser[string, string] + } + testCases := []struct { + name string + args args + input string + wantErr bool + wantOutput string + wantRemaining string + }{ + { + name: "taking less than input size should succeed", + input: "1234567", + args: args{ + p: Take[string](6), + }, + wantErr: false, + wantOutput: "123456", + wantRemaining: "7", + }, + { + name: "taking exact input size should succeed", + input: "123456", + args: args{ + p: Take[string](6), + }, + wantErr: false, + wantOutput: "123456", + wantRemaining: "", + }, + { + name: "taking more than input size should fail", + input: "123", + args: args{ + p: Take[string](6), + }, + wantErr: true, + wantOutput: "", + wantRemaining: "123", + }, + { + name: "taking from empty input should fail", + input: "", + args: args{ + p: Take[string](6), + }, + wantErr: true, + wantOutput: "", + wantRemaining: "", + }, + } + for _, tc := range testCases { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + gotResult := tc.args.p(tc.input) + if (gotResult.Err != nil) != tc.wantErr { + t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) + } + + if gotResult.Output != tc.wantOutput { + t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) + } + + if gotResult.Remaining != tc.wantRemaining { + t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) + } + }) + } +} + +func BenchmarkTake(b *testing.B) { + p := Take[string](6) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + p("123456") + } +} + +func TestTakeUntil(t *testing.T) { + t.Parallel() + + type args struct { + p Parser[string, string] + } + testCases := []struct { + name string + args args + input string + wantErr bool + wantOutput string + wantRemaining string + }{ + { + name: "matching parser should succeed", + input: "abc123", + args: args{ + p: TakeUntil(Digit1[string]()), + }, + wantErr: false, + wantOutput: "abc", + wantRemaining: "123", + }, + { + name: "immediately matching parser should succeed", + input: "123", + args: args{ + p: TakeUntil(Digit1[string]()), + }, + wantErr: false, + wantOutput: "", + wantRemaining: "123", + }, + { + name: "no match should fail", + input: "abcdef", + args: args{ + p: TakeUntil(Digit1[string]()), + }, + wantErr: true, + wantOutput: "", + wantRemaining: "abcdef", + }, + { + name: "empty input should fail", + input: "", + args: args{ + p: TakeUntil(Digit1[string]()), + }, + wantErr: true, + wantOutput: "", + wantRemaining: "", + }, + } + for _, tc := range testCases { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + gotResult := tc.args.p(tc.input) + if (gotResult.Err != nil) != tc.wantErr { + t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) + } + + if gotResult.Output != tc.wantOutput { + t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) + } + + if gotResult.Remaining != tc.wantRemaining { + t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) + } + }) + } +} + +func BenchmarkTakeUntil(b *testing.B) { + p := TakeUntil(Digit1[string]()) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + p("abc123") + } +} + +func TestTakeWhileMN(t *testing.T) { + t.Parallel() + + type args struct { + p Parser[string, string] + } + testCases := []struct { + name string + args args + input string + wantErr bool + wantOutput string + wantRemaining string + }{ + { + name: "parsing input with enough characters and partially matching predicated should succeed", + input: "latin123", + args: args{ + p: TakeWhileMN[string](3, 6, IsAlpha), + }, + wantErr: false, + wantOutput: "latin", + wantRemaining: "123", + }, + { + name: "parsing input longer than atLeast and atMost should succeed", + input: "lengthy", + args: args{ + p: TakeWhileMN[string](3, 6, IsAlpha), + }, + wantErr: false, + wantOutput: "length", + wantRemaining: "y", + }, + { + name: "parsing input longer than atLeast and shorter than atMost should succeed", + input: "latin", + args: args{ + p: TakeWhileMN[string](3, 6, IsAlpha), + }, + wantErr: false, + wantOutput: "latin", + wantRemaining: "", + }, + { + name: "parsing too short input should fail", + input: "ed", + args: args{ + p: TakeWhileMN[string](3, 6, IsAlpha), + }, + wantErr: true, + wantOutput: "", + wantRemaining: "ed", + }, + { + name: "parsing with non-matching predicate should fail", + input: "12345", + args: args{ + p: TakeWhileMN[string](3, 6, IsAlpha), + }, + wantErr: true, + wantOutput: "", + wantRemaining: "12345", + }, + } + for _, tc := range testCases { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + gotResult := tc.args.p(tc.input) + if (gotResult.Err != nil) != tc.wantErr { + t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) + } + + if gotResult.Output != tc.wantOutput { + t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) + } + + if gotResult.Remaining != tc.wantRemaining { + t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) + } + }) + } +} + +func BenchmarkTakeWhileMN(b *testing.B) { + p := TakeWhileMN[string](3, 6, IsAlpha) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + p("latin") + } +} + +// TakeWhileOneOf parses any number of characters present in the +// provided collection of runes. +func TakeWhileOneOf[I Bytes](collection ...rune) Parser[I, I] { + index := make(map[rune]struct{}, len(collection)) + + for _, r := range collection { + index[r] = struct{}{} + } + + expected := fmt.Sprintf("chars(%v)", string(collection)) + + return func(input I) Result[I, I] { + if len(input) == 0 { + return Failure[I, I](NewError(input, expected), input) + } + + pos := 0 + for ; pos < len(input); pos++ { + _, exists := index[rune(input[pos])] + if !exists { + if pos == 0 { + return Failure[I, I](NewError(input, expected), input) + } + + break + } + } + + return Success(input[:pos], input[pos:]) + } +} + +func TestTakeWhileOneOf(t *testing.T) { + t.Parallel() + + type args struct { + p Parser[string, string] + } + testCases := []struct { + name string + args args + input string + wantErr bool + wantOutput string + wantRemaining string + }{ + { + name: "matching parser should succeed", + input: "abc123", + args: args{ + p: TakeWhileOneOf[string]('a', 'b', 'c'), + }, + wantErr: false, + wantOutput: "abc", + wantRemaining: "123", + }, + { + name: "no match should fail", + input: "123", + args: args{ + p: TakeWhileOneOf[string]('a', 'b', 'c'), + }, + wantErr: true, + wantOutput: "", + wantRemaining: "123", + }, + { + name: "empty input should fail", + input: "", + args: args{ + p: TakeWhileOneOf[string]('a', 'b', 'c'), + }, + wantErr: true, + wantOutput: "", + wantRemaining: "", + }, + } + for _, tc := range testCases { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + gotResult := tc.args.p(tc.input) + if (gotResult.Err != nil) != tc.wantErr { + t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) + } + + if gotResult.Output != tc.wantOutput { + t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) + } + + if gotResult.Remaining != tc.wantRemaining { + t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) + } + }) + } +} + +func BenchmarkTakeWhileOneOf(b *testing.B) { + p := TakeWhileOneOf[string]('a', 'b', 'c') + + b.ResetTimer() + for i := 0; i < b.N; i++ { + p("abc123") + } +} + +func TestToken(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + parser Parser[string, string] + input string + wantErr bool + wantOutput string + wantRemaining string + }{ + { + name: "parsing a token from an input starting with it should succeed", + parser: Token[string]("Bonjour"), + input: "Bonjour tout le monde", + wantErr: false, + wantOutput: "Bonjour", + wantRemaining: " tout le monde", + }, + { + name: "parsing a token from an non-matching input should fail", + parser: Token[string]("Bonjour"), + input: "Hello tout le monde", + wantErr: true, + wantOutput: "", + wantRemaining: "Hello tout le monde", + }, + { + name: "parsing a token from an empty input should fail", + parser: Token[string]("Bonjour"), + input: "", + wantErr: true, + wantOutput: "", + wantRemaining: "", + }, + } + + for _, tc := range testCases { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + gotResult := tc.parser(tc.input) + if (gotResult.Err != nil) != tc.wantErr { + t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) + } + + if gotResult.Output != tc.wantOutput { + t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) + } + + if gotResult.Remaining != tc.wantRemaining { + t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) + } + }) + } +} + +func BenchmarkToken(b *testing.B) { + parser := Token[string]("Bonjour") + + for i := 0; i < b.N; i++ { + parser("Bonjour tout le monde") + } +} diff --git a/characters.go b/characters.go index 0428e58..6094d55 100644 --- a/characters.go +++ b/characters.go @@ -1,9 +1,7 @@ package gomme import ( - "fmt" "strconv" - "strings" ) // Char parses a single character and matches it with @@ -221,6 +219,54 @@ func HexDigit1[I Bytes]() Parser[I, I] { } } +// WhiteSpace0 parses zero or more whitespace characters: ' ', '\t', '\n', '\r'. +// In the cases where the input is empty, or no terminating character is found, the parser +// returns the input as is. +func Whitespace0[I Bytes]() Parser[I, I] { + return func(input I) Result[I, I] { + if len(input) == 0 { + return Success(input, input) + } + + lastPos := 0 + for idx := 0; idx < len(input); idx++ { + if !IsWhitespace(rune(input[idx])) { + return Success(input[:idx], input[idx:]) + } + + lastPos++ + } + + return Success(input[:lastPos], input[lastPos:]) + } +} + +// Whitespace1 parses one or more whitespace characters: ' ', '\t', '\n', '\r'. +// In the cases where the input doesn't hold enough data, or a terminating character +// is found before any matching ones were, the parser returns an error result. +func Whitespace1[I Bytes]() Parser[I, I] { + return func(input I) Result[I, I] { + if len(input) == 0 { + return Failure[I, I](NewError(input, "WhiteSpace1"), input) + } + + if !IsWhitespace(rune(input[0])) { + return Failure[I, I](NewError(input, "WhiteSpace1"), input) + } + + lastPos := 1 + for idx := 1; idx < len(input); idx++ { + if !IsWhitespace(rune(input[idx])) { + return Success(input[:idx], input[idx:]) + } + + lastPos++ + } + + return Success(input[:lastPos], input[lastPos:]) + } +} + // LF parses a line feed `\n` character. func LF[I Bytes]() Parser[I, rune] { return func(input I) Result[rune, I] { @@ -308,19 +354,6 @@ func Tab[I Bytes]() Parser[I, rune] { } } -// Token parses a token from the input, and returns the part of the input that -// matched the token. -// If the token could not be found, the parser returns an error result. -func Token[I Bytes](token string) Parser[I, I] { - return func(input I) Result[I, I] { - if !strings.HasPrefix(string(input), token) { - return Failure[I, I](NewError(input, fmt.Sprintf("Token(%s)", token)), input) - } - - return Success(input[:len(token)], input[len(token):]) - } -} - // Int64 parses an integer from the input, and returns the part of the input that // matched the integer. func Int64[I Bytes]() Parser[I, int64] { @@ -413,3 +446,7 @@ func IsHexDigit(c rune) bool { func IsControl(c rune) bool { return c < 32 || c == 127 } + +func IsWhitespace(c rune) bool { + return c == ' ' || c == '\t' || c == '\n' || c == '\r' +} diff --git a/characters_test.go b/characters_test.go index 97605b2..5c5bad1 100644 --- a/characters_test.go +++ b/characters_test.go @@ -668,6 +668,257 @@ func BenchmarkHexDigit1(b *testing.B) { } } +func TestWhitespace0(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + parser Parser[string, string] + input string + wantErr bool + wantOutput string + wantRemaining string + }{ + { + name: "parsing single whitespace from single ' ' input should succeed", + parser: Whitespace0[string](), + input: " ", + wantErr: false, + wantOutput: " ", + wantRemaining: "", + }, + { + name: "parsing single whitespace from single '\t' input should succeed", + parser: Whitespace0[string](), + input: "\t", + wantErr: false, + wantOutput: "\t", + wantRemaining: "", + }, + { + name: "parsing single whitespace from single '\n' input should succeed", + parser: Whitespace0[string](), + input: "\n", + wantErr: false, + wantOutput: "\n", + wantRemaining: "", + }, + { + name: "parsing single whitespace from single '\r' input should succeed", + parser: Whitespace0[string](), + input: "\r", + wantErr: false, + wantOutput: "\r", + wantRemaining: "", + }, + { + name: "parsing multiple whitespace chars from multiple whitespace chars input should succeed", + parser: Whitespace0[string](), + input: " \t\n\r", + wantErr: false, + wantOutput: " \t\n\r", + wantRemaining: "", + }, + { + name: "parsing multiple whitespace chars from multiple whitespace chars with suffix input should succeed", + parser: Whitespace0[string](), + input: " \t\n\rabc", + wantErr: false, + wantOutput: " \t\n\r", + wantRemaining: "abc", + }, + { + name: "parsing an empty input should succeed", + parser: Whitespace0[string](), + input: "", + wantErr: false, + wantOutput: "", + wantRemaining: "", + }, + { + name: "parsing a single non-whitespace char input should succeed", + parser: Whitespace0[string](), + input: "a", + wantErr: false, + wantOutput: "", + wantRemaining: "a", + }, + { + name: "parsing input starting with a non-whitespace char should succeed", + parser: Whitespace0[string](), + input: "a \t\n\r", + wantErr: false, + wantOutput: "", + wantRemaining: "a \t\n\r", + }, + { + name: "parsing non-whitespace chars should succeed", + parser: Whitespace0[string](), + input: "ghi", + wantErr: false, + wantOutput: "", + wantRemaining: "ghi", + }, + } + + for _, tc := range testCases { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + gotResult := tc.parser(tc.input) + if (gotResult.Err != nil) != tc.wantErr { + t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) + } + + if gotResult.Output != tc.wantOutput { + t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) + } + + if gotResult.Remaining != tc.wantRemaining { + t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) + } + }) + } +} + +func BenchmarkWhitespace0(b *testing.B) { + b.ReportAllocs() + parser := Whitespace0[string]() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + parser(" \t\n\r") + } +} + +func TestWhitespace1(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + parser Parser[string, string] + input string + wantErr bool + wantOutput string + wantRemaining string + }{ + { + name: "parsing single whitespace from single ' ' input should succeed", + parser: Whitespace1[string](), + input: " ", + wantErr: false, + wantOutput: " ", + wantRemaining: "", + }, + { + name: "parsing single whitespace from single '\t' input should succeed", + parser: Whitespace1[string](), + input: "\t", + wantErr: false, + wantOutput: "\t", + wantRemaining: "", + }, + { + name: "parsing single whitespace from single '\n' input should succeed", + parser: Whitespace1[string](), + input: "\n", + wantErr: false, + wantOutput: "\n", + wantRemaining: "", + }, + { + name: "parsing single whitespace from single '\r' input should succeed", + parser: Whitespace1[string](), + input: "\r", + wantErr: false, + wantOutput: "\r", + wantRemaining: "", + }, + { + name: "parsing multiple whitespace chars from multiple whitespace chars input should succeed", + parser: Whitespace1[string](), + input: " \t\n\r", + wantErr: false, + wantOutput: " \t\n\r", + wantRemaining: "", + }, + { + name: "parsing multiple whitespace chars from multiple whitespace chars with suffix input should succeed", + parser: Whitespace1[string](), + input: " \t\n\rabc", + wantErr: false, + wantOutput: " \t\n\r", + wantRemaining: "abc", + }, + { + name: "parsing an empty input should fail", + parser: Whitespace1[string](), + input: "", + wantErr: true, + wantOutput: "", + wantRemaining: "", + }, + { + name: "parsing a single non-whitespace char input should fail", + parser: Whitespace1[string](), + input: "a", + wantErr: true, + wantOutput: "", + wantRemaining: "a", + }, + { + name: "parsing input starting with a non-whitespace char should fail", + parser: Whitespace1[string](), + input: "a \t\n\r", + wantErr: true, + wantOutput: "", + wantRemaining: "a \t\n\r", + }, + { + name: "parsing non-whitespace chars should fail", + parser: Whitespace1[string](), + input: "ghi", + wantErr: true, + wantOutput: "", + wantRemaining: "ghi", + }, + } + + for _, tc := range testCases { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + gotResult := tc.parser(tc.input) + if (gotResult.Err != nil) != tc.wantErr { + t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) + } + + if gotResult.Output != tc.wantOutput { + t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) + } + + if gotResult.Remaining != tc.wantRemaining { + t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) + } + }) + } +} + +func BenchmarkWhitespace1(b *testing.B) { + b.ReportAllocs() + + parser := Whitespace1[string]() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + parser(" \t\n\r") + } +} + func TestAlphanumeric0(t *testing.T) { t.Parallel() @@ -1487,73 +1738,6 @@ func BenchmarkTab(b *testing.B) { } } -func TestToken(t *testing.T) { - t.Parallel() - - testCases := []struct { - name string - parser Parser[string, string] - input string - wantErr bool - wantOutput string - wantRemaining string - }{ - { - name: "parsing a token from an input starting with it should succeed", - parser: Token[string]("Bonjour"), - input: "Bonjour tout le monde", - wantErr: false, - wantOutput: "Bonjour", - wantRemaining: " tout le monde", - }, - { - name: "parsing a token from an non-matching input should fail", - parser: Token[string]("Bonjour"), - input: "Hello tout le monde", - wantErr: true, - wantOutput: "", - wantRemaining: "Hello tout le monde", - }, - { - name: "parsing a token from an empty input should fail", - parser: Token[string]("Bonjour"), - input: "", - wantErr: true, - wantOutput: "", - wantRemaining: "", - }, - } - - for _, tc := range testCases { - tc := tc - - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - - gotResult := tc.parser(tc.input) - if (gotResult.Err != nil) != tc.wantErr { - t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) - } - - if gotResult.Output != tc.wantOutput { - t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) - } - - if gotResult.Remaining != tc.wantRemaining { - t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) - } - }) - } -} - -func BenchmarkToken(b *testing.B) { - parser := Token[string]("Bonjour") - - for i := 0; i < b.N; i++ { - parser("Bonjour tout le monde") - } -} - func TestInt64(t *testing.T) { t.Parallel() diff --git a/combinators.go b/combinators.go index b20b37c..0fca6c4 100644 --- a/combinators.go +++ b/combinators.go @@ -5,8 +5,6 @@ // providing as much compile-time type safety as possible. package gomme -import "fmt" - // FIXME: Ideally, I would want the combinators working with sequences // to produce somewhat detailed errors, and tell me which of the combinators failed @@ -44,118 +42,6 @@ func Failure[I Bytes, O any](err *Error[I], input I) Result[O, I] { return Result[O, I]{output, err, input} } -// TakeWhileOneOf parses any number of characters present in the -// provided collection of runes. -func TakeWhileOneOf[I Bytes](collection ...rune) Parser[I, I] { - index := make(map[rune]struct{}, len(collection)) - - for _, r := range collection { - index[r] = struct{}{} - } - - expected := fmt.Sprintf("chars(%v)", string(collection)) - - return func(input I) Result[I, I] { - if len(input) == 0 { - return Failure[I, I](NewError(input, expected), input) - } - - pos := 0 - for ; pos < len(input); pos++ { - _, exists := index[rune(input[pos])] - if !exists { - if pos == 0 { - return Failure[I, I](NewError(input, expected), input) - } - - break - } - } - - return Success(input[:pos], input[pos:]) - } -} - -// TakeUntil parses any number of characters until the provided parser is successful. -// If the provided parser is not successful, the parser fails, and the entire input is -// returned as the Result's Remaining. -func TakeUntil[I Bytes, O any](parse Parser[I, O]) Parser[I, I] { - return func(input I) Result[I, I] { - if len(input) == 0 { - return Failure[I, I](NewError(input, "TakeUntil"), input) - } - - pos := 0 - for ; pos < len(input); pos++ { - current := input[pos:] - res := parse(current) - if res.Err == nil { - return Success(input[:pos], input[pos:]) - } - - continue - } - - return Failure[I, I](NewError(input, "TakeUntil"), input) - } -} - -// Take returns a subset of the input of size `count`. -func Take[I Bytes](count uint) Parser[I, I] { - return func(input I) Result[I, I] { - if len(input) == 0 && count > 0 { - return Failure[I, I](NewError(input, "TakeUntil"), input) - } - - if uint(len(input)) < count { - return Failure[I, I](NewError(input, "Take"), input) - } - - return Success(input[:count], input[count:]) - } -} - -// TakeWhileMN returns the longest input subset that matches the predicates, within -// the boundaries of `atLeast` <= len(input) <= `atMost`. -// -// If the provided parser is not successful or the pattern is out of the -// `atLeast` <= len(input) <= `atMost` range, the parser fails, and the entire -// input is returned as the Result's Remaining. -func TakeWhileMN[I Bytes](atLeast, atMost uint, predicate func(rune) bool) Parser[I, I] { - return func(input I) Result[I, I] { - if len(input) == 0 { - return Failure[I, I](NewError(input, "TakeWhileMN"), input) - } - - // Input is shorter than the minimum expected matching length, - // it is thus not possible to match it within the established - // constraints. - if uint(len(input)) < atLeast { - return Failure[I, I](NewError(input, "TakeWhileMN"), input) - } - - lastValidPos := 0 - for idx := 0; idx < len(input); idx++ { - if uint(idx) == atMost { - break - } - - matched := predicate(rune(input[idx])) - if !matched { - if uint(idx) < atLeast { - return Failure[I, I](NewError(input, "TakeWhileMN"), input) - } - - return Success(input[:idx], input[idx:]) - } - - lastValidPos++ - } - - return Success(input[:lastValidPos], input[lastValidPos:]) - } -} - // Map applies a function to the result of a parser. func Map[I Bytes, PO any, MO any](parse Parser[I, PO], fn func(PO) (MO, error)) Parser[I, MO] { return func(input I) Result[MO, I] { diff --git a/combinators_test.go b/combinators_test.go index 38d1fa0..5daf68d 100644 --- a/combinators_test.go +++ b/combinators_test.go @@ -5,350 +5,6 @@ import ( "testing" ) -func TestTakeWhileOneOf(t *testing.T) { - t.Parallel() - - type args struct { - p Parser[string, string] - } - testCases := []struct { - name string - args args - input string - wantErr bool - wantOutput string - wantRemaining string - }{ - { - name: "matching parser should succeed", - input: "abc123", - args: args{ - p: TakeWhileOneOf[string]('a', 'b', 'c'), - }, - wantErr: false, - wantOutput: "abc", - wantRemaining: "123", - }, - { - name: "no match should fail", - input: "123", - args: args{ - p: TakeWhileOneOf[string]('a', 'b', 'c'), - }, - wantErr: true, - wantOutput: "", - wantRemaining: "123", - }, - { - name: "empty input should fail", - input: "", - args: args{ - p: TakeWhileOneOf[string]('a', 'b', 'c'), - }, - wantErr: true, - wantOutput: "", - wantRemaining: "", - }, - } - for _, tc := range testCases { - tc := tc - - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - - gotResult := tc.args.p(tc.input) - if (gotResult.Err != nil) != tc.wantErr { - t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) - } - - if gotResult.Output != tc.wantOutput { - t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) - } - - if gotResult.Remaining != tc.wantRemaining { - t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) - } - }) - } -} - -func BenchmarkTakeWhileOneOf(b *testing.B) { - p := TakeWhileOneOf[string]('a', 'b', 'c') - - b.ResetTimer() - for i := 0; i < b.N; i++ { - p("abc123") - } -} - -func TestTakeUntil(t *testing.T) { - t.Parallel() - - type args struct { - p Parser[string, string] - } - testCases := []struct { - name string - args args - input string - wantErr bool - wantOutput string - wantRemaining string - }{ - { - name: "matching parser should succeed", - input: "abc123", - args: args{ - p: TakeUntil(Digit1[string]()), - }, - wantErr: false, - wantOutput: "abc", - wantRemaining: "123", - }, - { - name: "immediately matching parser should succeed", - input: "123", - args: args{ - p: TakeUntil(Digit1[string]()), - }, - wantErr: false, - wantOutput: "", - wantRemaining: "123", - }, - { - name: "no match should fail", - input: "abcdef", - args: args{ - p: TakeUntil(Digit1[string]()), - }, - wantErr: true, - wantOutput: "", - wantRemaining: "abcdef", - }, - { - name: "empty input should fail", - input: "", - args: args{ - p: TakeUntil(Digit1[string]()), - }, - wantErr: true, - wantOutput: "", - wantRemaining: "", - }, - } - for _, tc := range testCases { - tc := tc - - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - - gotResult := tc.args.p(tc.input) - if (gotResult.Err != nil) != tc.wantErr { - t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) - } - - if gotResult.Output != tc.wantOutput { - t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) - } - - if gotResult.Remaining != tc.wantRemaining { - t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) - } - }) - } -} - -func BenchmarkTakeUntil(b *testing.B) { - p := TakeUntil(Digit1[string]()) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - p("abc123") - } -} - -func TestTake(t *testing.T) { - t.Parallel() - - type args struct { - p Parser[string, string] - } - testCases := []struct { - name string - args args - input string - wantErr bool - wantOutput string - wantRemaining string - }{ - { - name: "taking less than input size should succeed", - input: "1234567", - args: args{ - p: Take[string](6), - }, - wantErr: false, - wantOutput: "123456", - wantRemaining: "7", - }, - { - name: "taking exact input size should succeed", - input: "123456", - args: args{ - p: Take[string](6), - }, - wantErr: false, - wantOutput: "123456", - wantRemaining: "", - }, - { - name: "taking more than input size should fail", - input: "123", - args: args{ - p: Take[string](6), - }, - wantErr: true, - wantOutput: "", - wantRemaining: "123", - }, - { - name: "taking from empty input should fail", - input: "", - args: args{ - p: Take[string](6), - }, - wantErr: true, - wantOutput: "", - wantRemaining: "", - }, - } - for _, tc := range testCases { - tc := tc - - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - - gotResult := tc.args.p(tc.input) - if (gotResult.Err != nil) != tc.wantErr { - t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) - } - - if gotResult.Output != tc.wantOutput { - t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) - } - - if gotResult.Remaining != tc.wantRemaining { - t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) - } - }) - } -} - -func BenchmarkTake(b *testing.B) { - p := Take[string](6) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - p("123456") - } -} - -func TestTakeWhileMN(t *testing.T) { - t.Parallel() - - type args struct { - p Parser[string, string] - } - testCases := []struct { - name string - args args - input string - wantErr bool - wantOutput string - wantRemaining string - }{ - { - name: "parsing input with enough characters and partially matching predicated should succeed", - input: "latin123", - args: args{ - p: TakeWhileMN[string](3, 6, IsAlpha), - }, - wantErr: false, - wantOutput: "latin", - wantRemaining: "123", - }, - { - name: "parsing input longer than atLeast and atMost should succeed", - input: "lengthy", - args: args{ - p: TakeWhileMN[string](3, 6, IsAlpha), - }, - wantErr: false, - wantOutput: "length", - wantRemaining: "y", - }, - { - name: "parsing input longer than atLeast and shorter than atMost should succeed", - input: "latin", - args: args{ - p: TakeWhileMN[string](3, 6, IsAlpha), - }, - wantErr: false, - wantOutput: "latin", - wantRemaining: "", - }, - { - name: "parsing too short input should fail", - input: "ed", - args: args{ - p: TakeWhileMN[string](3, 6, IsAlpha), - }, - wantErr: true, - wantOutput: "", - wantRemaining: "ed", - }, - { - name: "parsing with non-matching predicate should fail", - input: "12345", - args: args{ - p: TakeWhileMN[string](3, 6, IsAlpha), - }, - wantErr: true, - wantOutput: "", - wantRemaining: "12345", - }, - } - for _, tc := range testCases { - tc := tc - - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - - gotResult := tc.args.p(tc.input) - if (gotResult.Err != nil) != tc.wantErr { - t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) - } - - if gotResult.Output != tc.wantOutput { - t.Errorf("got output %v, want output %v", gotResult.Output, tc.wantOutput) - } - - if gotResult.Remaining != tc.wantRemaining { - t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) - } - }) - } -} - -func BenchmarkTakeWhileMN(b *testing.B) { - p := TakeWhileMN[string](3, 6, IsAlpha) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - p("latin") - } -} - func TestMap(t *testing.T) { t.Parallel() diff --git a/examples/csv/csv.go b/examples/csv/csv.go new file mode 100644 index 0000000..d11a785 --- /dev/null +++ b/examples/csv/csv.go @@ -0,0 +1,30 @@ +// Package csv implements a parser for CSV files. +// +// It is a simple, incomplete, example of how to use the gomme +// parser combinator library to build a parser targetting the +// format described in [RFC4180]. +// +// [RFC4180]: https://tools.ietf.org/html/rfc4180 +package csv + +import "github.com/oleiade/gomme" + +func ParseCSV(input string) ([][]string, error) { + parser := gomme.SeparatedList1( + gomme.SeparatedList1( + gomme.Alternative( + gomme.Alphanumeric1[string](), + gomme.Delimited(gomme.Char[string]('"'), gomme.Alphanumeric1[string](), gomme.Char[string]('"')), + ), + gomme.Char[string](','), + ), + gomme.CRLF[string](), + ) + + result := parser(input) + if result.Err != nil { + return nil, result.Err + } + + return result.Output, nil +} diff --git a/examples/csv/csv_test.go b/examples/csv/csv_test.go new file mode 100644 index 0000000..38559bd --- /dev/null +++ b/examples/csv/csv_test.go @@ -0,0 +1,58 @@ +package csv + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParseRGBColor(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + input string + wantErr bool + wantOutput [][]string + }{ + { + name: "parsing a single csv line should succeed", + input: "abc,def,ghi\r\n", + wantErr: false, + wantOutput: [][]string{{"abc", "def", "ghi"}}, + }, + { + name: "parsing multie csv lines should succeed", + input: "abc,def,ghi\r\njkl,mno,pqr\r\n", + wantErr: false, + wantOutput: [][]string{ + {"abc", "def", "ghi"}, + {"jkl", "mno", "pqr"}, + }, + }, + { + name: "parsing a single csv line of escaped strings should succeed", + input: "\"abc\",\"def\",\"ghi\"\r\n", + wantErr: false, + wantOutput: [][]string{{"abc", "def", "ghi"}}, + }, + } + for _, tc := range testCases { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + gotOutput, gotErr := ParseCSV(tc.input) + if (gotErr != nil) != tc.wantErr { + t.Errorf("got error %v, want error %v", gotErr, tc.wantErr) + } + + assert.Equal(t, + tc.wantOutput, + gotOutput, + "got output %v, want output %v", gotOutput, tc.wantOutput, + ) + }) + } +} diff --git a/multi.go b/multi.go index 28fa721..675142c 100644 --- a/multi.go +++ b/multi.go @@ -1,6 +1,9 @@ package gomme // Count runs the provided parser `count` times. +// +// If the provided parser cannot be successfully applied `count` times, the operation +// fails and the Result will contain an error. func Count[I Bytes, O any](parse Parser[I, O], count uint) Parser[I, []O] { return func(input I) Result[[]O, I] { if len(input) == 0 || count == 0 { @@ -91,3 +94,106 @@ func Many1[I Bytes, O any](parse Parser[I, O]) Parser[I, []O] { } } } + +// SeparatedList0 applies an element parser and a separator parser repeatedly in order +// to produce a list of elements. +// +// Note that SeparatedList0 will succeed even if the element parser fails to match at all. +// It will however fail if the provided element parser accepts empty inputs (such as +// `Digit0`, or `Alpha0`) in order to prevent infinite loops. +// +// Because the `SeparatedList0` is really looking to produce a list of elements resulting +// from the provided main parser, it will succeed even if the separator parser fails to +// match at all. It will however fail if the provided separator parser accepts empty +// inputs in order to prevent infinite loops. +func SeparatedList0[I Bytes, O any, S Separator](parse Parser[I, O], separator Parser[I, S]) Parser[I, []O] { + return func(input I) Result[[]O, I] { + results := []O{} + + res := parse(input) + if res.Err != nil { + return Success(results, input) + } + + // Checking for infinite loops, if nothing was consumed, + // the provided parser would make us go around in circles. + if len(res.Remaining) == len(input) { + return Failure[I, []O](NewError(input, "SeparatedList0"), input) + } + + results = append(results, res.Output) + remaining := res.Remaining + + for { + separatorResult := separator(remaining) + if separatorResult.Err != nil { + return Success(results, remaining) + } + + // Checking for infinite loops, if nothing was consumed, + // the provided parser would make us go around in circles. + if len(separatorResult.Remaining) == len(remaining) { + return Failure[I, []O](NewError(input, "SeparatedList0"), input) + } + + parserResult := parse(separatorResult.Remaining) + if parserResult.Err != nil { + return Success(results, remaining) + } + + results = append(results, parserResult.Output) + + remaining = parserResult.Remaining + } + } +} + +// SeparatedList1 applies an element parser and a separator parser repeatedly in order +// to produce a list of elements. +// +// Note that SeparatedList1 will fail if the element parser fails to match at all. +// +// Because the `SeparatedList1` is really looking to produce a list of elements resulting +// from the provided main parser, it will succeed even if the separator parser fails to +// match at all. +func SeparatedList1[I Bytes, O any, S Separator](parse Parser[I, O], separator Parser[I, S]) Parser[I, []O] { + return func(input I) Result[[]O, I] { + results := []O{} + + res := parse(input) + if res.Err != nil { + return Failure[I, []O](res.Err, input) + } + + // Checking for infinite loops, if nothing was consumed, + // the provided parser would make us go around in circles. + if len(res.Remaining) == len(input) { + return Failure[I, []O](NewError(input, "SeparatedList0"), input) + } + + results = append(results, res.Output) + remaining := res.Remaining + + for { + separatorResult := separator(remaining) + if separatorResult.Err != nil { + return Success(results, remaining) + } + + // Checking for infinite loops, if nothing was consumed, + // the provided parser would make us go around in circles. + if len(separatorResult.Remaining) == len(remaining) { + return Failure[I, []O](NewError(input, "SeparatedList0"), input) + } + + parserResult := parse(separatorResult.Remaining) + if parserResult.Err != nil { + return Success(results, remaining) + } + + results = append(results, parserResult.Output) + + remaining = parserResult.Remaining + } + } +} diff --git a/multi_test.go b/multi_test.go index d29cf6b..4771b3d 100644 --- a/multi_test.go +++ b/multi_test.go @@ -295,3 +295,219 @@ func BenchmarkMany1(b *testing.B) { parser("###") } } + +func TestSeparatedList0(t *testing.T) { + t.Parallel() + + type args struct { + p Parser[string, []string] + } + testCases := []struct { + name string + args args + input string + wantErr bool + wantOutput []string + wantRemaining string + }{ + { + name: "matching parser should succeed", + input: "abc,abc,abc", + args: args{ + p: SeparatedList0(Token[string]("abc"), Char[string](',')), + }, + wantErr: false, + wantOutput: []string{"abc", "abc", "abc"}, + wantRemaining: "", + }, + { + name: "matching parser and missing separator should succeed", + input: "abc123abc", + args: args{ + p: SeparatedList0(Token[string]("abc"), Char[string](',')), + }, + wantErr: false, + wantOutput: []string{"abc"}, + wantRemaining: "123abc", + }, + { + name: "parser with separator but non-matching right side should succeed", + input: "abc,def", + args: args{ + p: SeparatedList0(Token[string]("abc"), Char[string](',')), + }, + wantErr: false, + wantOutput: []string{"abc"}, + wantRemaining: ",def", + }, + { + name: "parser matching on the right of the separator should succeed", + input: "def,abc", + args: args{ + p: SeparatedList0(Token[string]("abc"), Char[string](',')), + }, + wantErr: false, + wantOutput: []string{}, + wantRemaining: "def,abc", + }, + { + name: "empty input should succeed", + input: "", + args: args{ + p: SeparatedList0(Token[string]("abc"), Char[string](',')), + }, + wantErr: false, + wantOutput: []string{}, + wantRemaining: "", + }, + { + name: "parsing input without separator should succeed", + input: "123", + args: args{ + p: SeparatedList0(Digit0[string](), Char[string](',')), + }, + wantErr: false, + wantOutput: []string{"123"}, + wantRemaining: "", + }, + { + name: "using a parser accepting empty input should fail", + input: "", + args: args{ + p: SeparatedList0(Digit0[string](), Char[string](',')), + }, + wantErr: true, + wantOutput: nil, + wantRemaining: "", + }, + } + for _, tc := range testCases { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + gotResult := tc.args.p(tc.input) + if (gotResult.Err != nil) != tc.wantErr { + t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) + } + + // testify makes it easier comparing slices + assert.Equal(t, + tc.wantOutput, gotResult.Output, + "got output %v, want output %v", gotResult.Output, tc.wantOutput, + ) + + if gotResult.Remaining != tc.wantRemaining { + t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) + } + }) + } +} + +func BenchmarkSeparatedList0(t *testing.B) { + parser := SeparatedList0(Char[string]('#'), Char[string](',')) + + t.ResetTimer() + for i := 0; i < t.N; i++ { + parser("#,#,#") + } +} + +func TestSeparatedList1(t *testing.T) { + t.Parallel() + + type args struct { + p Parser[string, []string] + } + testCases := []struct { + name string + args args + input string + wantErr bool + wantOutput []string + wantRemaining string + }{ + { + name: "matching parser should succeed", + input: "abc,abc,abc", + args: args{ + p: SeparatedList1(Token[string]("abc"), Char[string](',')), + }, + wantErr: false, + wantOutput: []string{"abc", "abc", "abc"}, + wantRemaining: "", + }, + { + name: "matching parser and missing separator should succeed", + input: "abc123abc", + args: args{ + p: SeparatedList1(Token[string]("abc"), Char[string](',')), + }, + wantErr: false, + wantOutput: []string{"abc"}, + wantRemaining: "123abc", + }, + { + name: "parser with separator but non-matching right side should succeed", + input: "abc,def", + args: args{ + p: SeparatedList1(Token[string]("abc"), Char[string](',')), + }, + wantErr: false, + wantOutput: []string{"abc"}, + wantRemaining: ",def", + }, + { + name: "parser matching on the right of the separator should succeed", + input: "def,abc", + args: args{ + p: SeparatedList1(Token[string]("abc"), Char[string](',')), + }, + wantErr: true, + wantOutput: nil, + wantRemaining: "def,abc", + }, + { + name: "empty input should fail", + input: "", + args: args{ + p: SeparatedList1(Token[string]("abc"), Char[string](',')), + }, + wantErr: true, + wantOutput: nil, + wantRemaining: "", + }, + } + for _, tc := range testCases { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + gotResult := tc.args.p(tc.input) + if (gotResult.Err != nil) != tc.wantErr { + t.Errorf("got error %v, want error %v", gotResult.Err, tc.wantErr) + } + + // testify makes it easier comparing slices + assert.Equal(t, + tc.wantOutput, gotResult.Output, + "got output %v, want output %v", gotResult.Output, tc.wantOutput, + ) + + if gotResult.Remaining != tc.wantRemaining { + t.Errorf("got remaining %v, want remaining %v", gotResult.Remaining, tc.wantRemaining) + } + }) + } +} + +func BenchmarkSeparatedList1(t *testing.B) { + parser := SeparatedList1(Char[string]('#'), Char[string](',')) + + t.ResetTimer() + for i := 0; i < t.N; i++ { + parser("#,#,#") + } +} diff --git a/sequence.go b/sequence.go index b373a1e..00f3135 100644 --- a/sequence.go +++ b/sequence.go @@ -9,8 +9,8 @@ func Delimited[I Bytes, OP, O, OS any](prefix Parser[I, OP], parser Parser[I, O] } } -// Pair applies two parsers and returns a Result containing a slice of -// size 2 as its output. +// Pair applies two parsers and returns a Result containing a pair container holding +// the resulting values. func Pair[I Bytes, LO, RO any, LP Parser[I, LO], RP Parser[I, RO]]( leftParser LP, rightParser RP, ) Parser[I, PairContainer[LO, RO]] {