Skip to content

Commit

Permalink
separate completion chapter
Browse files Browse the repository at this point in the history
  • Loading branch information
faldor20 committed May 2, 2024
1 parent 8389031 commit 8b4ff8d
Show file tree
Hide file tree
Showing 10 changed files with 1,125 additions and 1 deletion.
40 changes: 40 additions & 0 deletions completion/completion/Completion.roc
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
## Implements basic Lsp Types for
## Init, Hover, DidOpen, DidChange, Completion
app "completion"
packages { pf: "https://github.com/roc-lang/basic-cli/releases/download/0.9.0/oKWkaruh2zXxin_xfsYsCJobH1tO8_JvNkFzDwwzNUQ.tar.br" }
imports [
pf.Stdout,
ParserWithLoc.{Paragraph}
]
provides [main] to pf

## checks if pos is within the range, inclusive of start and end
isInRange= \range, pos->
pos>=range.start&&pos<=range.end

# TODO: completionsFromParagraph: Paragraph->CompletionItem
completionsFromParagraph= \paragraph->
word<-paragraph.val|>List.map
wordStr=word.val
#TODO: use actual LSP types
{label:wordStr,kind:(Some Text)}

getCompletion = \position,document->
## This should be a binary search or we could store ranges in a dict
paragraph<-
document
|>List.findFirst \p->p.region|>isInRange position
|>Result.mapErr NoPara
|>Result.map

paragraph|>completionsFromParagraph
handleCompletion =\state,completionRequest->
state

handleDocumentUpdate= \state,docPath,documentBytes->
parsedDoc=documentBytes|>ParserWithLoc.parse
docs=state.docs|>Dict.insert docPath parsedDoc
{state & docs}


main=\a->Stdout.line
80 changes: 80 additions & 0 deletions completion/completion/Parser.norg
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
@document.meta
tangle: Parser.roc
@end
* Parser
This is a short precursor to the completion chapter in which we will build an extremely basic parser to provide data for our completion system. You are welcome to skip this chapter and move directly on to the completion chapter.
@code roc
interface Parser
exposes [
parse,
]
imports [
]

## This is going to be the standin for our compiler.
## In a usual LS implementation you would call the langauges compiler and have it parse and then typecheck your files. Here We will simulate that for our text format

## The smallest unit of our "language" will be a word
Word : Str
## Words will be in paragraphs:
Paragraph : List Word
## many Paragraphs will be in a document
Document : List Paragraph
## Now lets go about parsing this whole thing.

# First lets define what a word is, in this case we well say a word is can be letters, numbers or a hyphen or apostrophy
isWordChar = \byte ->
(byte >= 'a' && byte <= 'z') || (byte >= 'A' && byte <= 'Z') || (byte >= '0' && byte <= '9') || byte == '-' || byte == '\''

parseWord = \bytes, word, start ->
when bytes is
[first, .. as rest] if isWordChar first ->
parseWord rest (word |> List.append first) start

rest ->
{ word: (word |> Str.fromUtf8), rest }

parseParagraph = \bytes, paragraph ->
# We have to do this seperately becasue of a compiler bug, once fixed we can just use one when is branch
when bytes is
['\n', '\n', .. as rest] ->
{ paragraph, rest }

_ ->
when bytes is
[first, .. as wordRest] if isWordChar first ->
when parseWord wordRest [first] is
{ word: Ok str, rest } ->
parseParagraph rest (paragraph |> List.append str)


{ word: _, rest } ->
# If the word is invalid we just don't add it. This is actually impossible because we already said words should only contain certain chars
parseParagraph rest paragraph

[_, .. as rest] -> parseParagraph rest paragraph
[] -> { paragraph, rest: [] }

parseDocument = \bytes, document ->
when bytes |> parseParagraph [] is
{ paragraph: [], rest: [] } -> document
{ paragraph, rest: [] } -> document |> List.append paragraph
{ paragraph: [], rest } -> parseDocument rest document
{ paragraph, rest } -> parseDocument rest (document |> List.append paragraph)

parse : List U8 -> Document
parse = \bytes -> parseDocument bytes []

expect
parsed =
"""
Hi I'm Eli. You are reading my tutorial.

This is the second paragraph.
"""
|> Str.toUtf8
|> parse
expected =
[["Hi", "I'm", "Eli", "You", "are", "reading", "my", "tutorial"], ["This", "is", "the", "second", "paragraph"]]
parsed == expected
@end
72 changes: 72 additions & 0 deletions completion/completion/Parser.roc
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
interface Parser
exposes [
parse,
]
imports [
]

## This is going to be the standin for our compiler.
## In a usual LS implementation you would call the langauges compiler and have it parse and then typecheck your files. Here We will simulate that for our text format

## The smallest unit of our "language" will be a word
Word : Str
## Words will be in paragraphs:
Paragraph : List Word
## many Paragraphs will be in a document
Document : List Paragraph
## Now lets go about parsing this whole thing.

# First lets define what a word is, in this case we well say a word is can be letters, numbers or a hyphen or apostrophy
isWordChar = \byte ->
(byte >= 'a' && byte <= 'z') || (byte >= 'A' && byte <= 'Z') || (byte >= '0' && byte <= '9') || byte == '-' || byte == '\''

parseWord = \bytes, word ->
when bytes is
[first, .. as rest] if isWordChar first ->
parseWord rest (word |> List.append first)

rest ->
{ word: (word |> Str.fromUtf8), rest }

parseParagraph = \bytes, paragraph ->
# We have to do this seperately becasue of a compiler bug, once fixed we can just use one when is branch
when bytes is
['\n', '\n', .. as rest] ->
{ paragraph, rest }

_ ->
when bytes is
[first, .. as wordRest] if isWordChar first ->
when parseWord wordRest [first] is
{ word: Ok str, rest } ->
parseParagraph rest (paragraph |> List.append str)

{ word: _, rest } ->
# If the word is invalid we just don't add it. This is actually impossible because we already said words should only contain certain chars
parseParagraph rest paragraph

[_, .. as rest] -> parseParagraph rest paragraph
[] -> { paragraph, rest: [] }

parseDocument = \bytes, document ->
when bytes |> parseParagraph [] is
{ paragraph: [], rest: [] } -> document
{ paragraph, rest: [] } -> document |> List.append paragraph
{ paragraph: [], rest } -> parseDocument rest document
{ paragraph, rest } -> parseDocument rest (document |> List.append paragraph)

parse : List U8 -> Document
parse = \bytes -> parseDocument bytes []

expect
parsed =
"""
Hi I'm Eli. You are reading my tutorial.
This is the second paragraph.
"""
|> Str.toUtf8
|> parse
expected =
[["Hi", "I'm", "Eli", "You", "are", "reading", "my", "tutorial"], ["This", "is", "the", "second", "paragraph"]]
parsed == expected
120 changes: 120 additions & 0 deletions completion/completion/ParserWithLoc.roc
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
interface ParserWithLoc
exposes [
parse,
Paragraph
]
imports [
]

## This is going to be the standin for our compiler.
## In a usual LS implementation you would call the langauges compiler and have it parse and then typecheck your files. Here We will simulate that for our text format
## The smallest unit of our "language" will be a word
Word : Token Str
## Words will be in paragraphs:
Paragraph : Token (List Word)
## many Paragraphs will be in a document
Document : List Paragraph
## Now lets go about parsing this whole thing.

## Offset into our document
Pos : U64
## Range that a symbol is within
Range : { start : Pos, end : Pos }

Token a : { region : Range, val : a }

# First lets define what a word is, in this case we well say a word is can be letters, numbers or a hyphen or apostrophy
isWordChar = \byte ->
(byte >= 'a' && byte <= 'z') || (byte >= 'A' && byte <= 'Z') || (byte >= '0' && byte <= '9') || byte == '-' || byte == '\''

parseWord = \bytes, word, start ->
dbg "parsing word"

when bytes is
[first, .. as rest] if isWordChar first ->
parseWord rest (word |> List.append first) start

rest ->
{ word: (word |> Str.fromUtf8), rest, region: { start, end: (word |> List.len) + start } }

parseParagraph = \bytesP, startCount ->
dbg "parsing parar"

offset = (startCount - (bytesP |> List.len))
loop = \bytes, paragraph ->
dbg "parsing parar loop"

# We have to do this seperately becasue of a compiler bug, once fixed we can just use one when is branch
when bytes is
['\n', '\n', .. as rest] ->
{ paragraph: { val: paragraph, region: { start: offset, end: startCount } }, rest }

_ ->
when bytes is
[first, .. as wordRest] if isWordChar first ->
when parseWord wordRest [first] (startCount - (bytes |> List.len)) is
{ word: Ok str, rest, region } ->
loop rest (paragraph |> List.append { val: str, region })

{ word: _, rest, region: _ } ->
# If the word is invalid we just don't add it. This is actually impossible because we already said words should only contain certain chars
loop rest paragraph

[_, .. as rest] -> loop rest paragraph
[] -> { paragraph: { val: paragraph, region: { start: offset, end: startCount } }, rest: [] }
loop bytesP []

parseDocument = \bytesD ->
startCount = (bytesD |> List.len)
loop = \bytes, document ->
dbg "parsing doc loop"

when bytes |> parseParagraph startCount is
{ paragraph: { val: [] }, rest: [] } -> document
{ paragraph, rest: [] } -> document |> List.append paragraph
{ paragraph: { val: [] }, rest } -> loop rest document
{ paragraph, rest } -> loop rest (document |> List.append paragraph)
loop bytesD []

parse : List U8 -> Document
parse = \bytes -> parseDocument bytes

expect
parsed =
"""
Hi I'm Eli. You are reading my tutorial.
This is the second paragraph.
"""
|> Str.toUtf8
|> parse

dbg "parsed"

expected = [
{
region: { end: 72, start: 0 },
val: [
{ region: { end: 2, start: 0 }, val: "Hi" },
{ region: { end: 6, start: 3 }, val: "I'm" },
{ region: { end: 10, start: 7 }, val: "Eli" },
{ region: { end: 15, start: 12 }, val: "You" },
{ region: { end: 19, start: 16 }, val: "are" },
{ region: { end: 27, start: 20 }, val: "reading" },
{ region: { end: 30, start: 28 }, val: "my" },
{ region: { end: 39, start: 31 }, val: "tutorial" },
],
},
{
region: { end: 72, start: 43 },
val: [
{ region: { end: 47, start: 43 }, val: "This" },
{ region: { end: 50, start: 48 }, val: "is" },
{ region: { end: 54, start: 51 }, val: "the" },
{ region: { end: 61, start: 55 }, val: "second" },
{ region: { end: 71, start: 62 }, val: "paragraph" },
],
},
]

parsed == expected
5 changes: 5 additions & 0 deletions completion/completion/parser.roc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
interface Parser
exposes [
]
imports [
]
File renamed without changes.
2 changes: 1 addition & 1 deletion completion/Core.roc → completion/lsp/Core.roc
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ FieldNameMapping : [
Custom (Str -> Str), # provide a custom formatting
]

# TODO encode as JSON numbers as base 10 decimal digits
# TODO encode as JSON kkkkers as base 10 decimal digits
# e.g. the REPL `Num.toStr 12e42f64` gives
# "12000000000000000000000000000000000000000000" : Str
# which should be encoded as "12e42" : Str
Expand Down
File renamed without changes.
Loading

0 comments on commit 8b4ff8d

Please sign in to comment.