separate completion chapter

roc-lang · May 2, 2024 · 8b4ff8d · 8b4ff8d
1 parent 8389031
commit 8b4ff8d
Show file tree

Hide file tree

Showing 10 changed files with 1,125 additions and 1 deletion.
diff --git a/completion/completion/Completion.roc b/completion/completion/Completion.roc
@@ -0,0 +1,40 @@
+## Implements basic Lsp Types for
+## Init, Hover, DidOpen, DidChange, Completion
+app "completion"
+    packages { pf: "https://github.com/roc-lang/basic-cli/releases/download/0.9.0/oKWkaruh2zXxin_xfsYsCJobH1tO8_JvNkFzDwwzNUQ.tar.br" }
+    imports [
+        pf.Stdout,
+        ParserWithLoc.{Paragraph}
+    ]
+    provides [main] to pf
+
+## checks if pos is within the range, inclusive of start and end
+isInRange= \range, pos->
+    pos>=range.start&&pos<=range.end
+
+# TODO: completionsFromParagraph: Paragraph->CompletionItem
+completionsFromParagraph= \paragraph->
+    word<-paragraph.val|>List.map 
+    wordStr=word.val
+    #TODO: use actual LSP types
+    {label:wordStr,kind:(Some Text)}
+
+getCompletion = \position,document->
+    ## This should be a binary search or we could store ranges in a dict
+    paragraph<-
+        document
+        |>List.findFirst \p->p.region|>isInRange position 
+        |>Result.mapErr NoPara
+        |>Result.map
+
+    paragraph|>completionsFromParagraph
+handleCompletion =\state,completionRequest->
+    state
+
+handleDocumentUpdate= \state,docPath,documentBytes->
+    parsedDoc=documentBytes|>ParserWithLoc.parse
+    docs=state.docs|>Dict.insert docPath parsedDoc
+    {state & docs}
+
+
+main=\a->Stdout.line
diff --git a/completion/completion/Parser.norg b/completion/completion/Parser.norg
@@ -0,0 +1,80 @@
+@document.meta
+tangle: Parser.roc
+@end
+* Parser
+This is a short precursor to the completion chapter in which we will build an extremely basic parser to provide data for our completion system. You are welcome to skip this chapter and move directly on to the completion chapter.
+@code roc
+interface Parser
+    exposes [
+        parse,
+    ]
+    imports [
+    ]
+
+## This is going to be the standin for our compiler.
+## In a usual LS implementation you would call the langauges compiler and have it parse and then typecheck your files. Here We will simulate that for our text format
+
+## The smallest unit of our "language" will be a word
+Word : Str
+## Words will be in paragraphs:
+Paragraph : List Word
+## many Paragraphs will be in a document
+Document : List Paragraph
+## Now lets go about parsing this whole thing.
+
+# First lets define what a word is, in this case we well say a word is can be letters, numbers or a hyphen or apostrophy
+isWordChar = \byte ->
+    (byte >= 'a' && byte <= 'z') || (byte >= 'A' && byte <= 'Z') || (byte >= '0' && byte <= '9') || byte == '-' || byte == '\''
+
+parseWord = \bytes, word, start ->
+    when bytes is
+        [first, .. as rest] if isWordChar first ->
+            parseWord rest (word |> List.append first) start
+
+        rest ->
+            { word: (word |> Str.fromUtf8), rest }
+
+parseParagraph = \bytes, paragraph ->
+    # We have to do this seperately becasue of a compiler bug, once fixed we can just use one when is branch
+    when bytes is
+        ['\n', '\n', .. as rest] ->
+            { paragraph, rest }
+
+        _ ->
+            when bytes is
+                [first, .. as wordRest] if isWordChar first ->
+                    when parseWord wordRest [first] is
+                        { word: Ok str, rest } ->
+                            parseParagraph rest (paragraph |> List.append str)
+
+
+                        { word: _, rest } ->
+                            # If the word is invalid we just don't add it. This is actually impossible because we already said words should only contain certain chars
+                            parseParagraph rest paragraph
+
+                [_, .. as rest] -> parseParagraph rest paragraph
+                [] -> { paragraph, rest: [] }
+
+parseDocument = \bytes, document ->
+    when bytes |> parseParagraph [] is
+        { paragraph: [], rest: [] } -> document
+        { paragraph, rest: [] } -> document |> List.append paragraph
+        { paragraph: [], rest } -> parseDocument rest document
+        { paragraph, rest } -> parseDocument rest (document |> List.append paragraph)
+
+parse : List U8 -> Document
+parse = \bytes -> parseDocument bytes []
+
+expect
+    parsed =
+        """
+        Hi I'm Eli. You are reading my tutorial. 
+
+        This is the second paragraph.
+        """
+        |> Str.toUtf8
+        |> parse
+    expected =
+        [["Hi", "I'm", "Eli", "You", "are", "reading", "my", "tutorial"], ["This", "is", "the", "second", "paragraph"]]
+    parsed == expected
+@end
diff --git a/completion/completion/Parser.roc b/completion/completion/Parser.roc
@@ -0,0 +1,72 @@
+interface Parser
+    exposes [
+        parse,
+    ]
+    imports [
+    ]
+
+## This is going to be the standin for our compiler.
+## In a usual LS implementation you would call the langauges compiler and have it parse and then typecheck your files. Here We will simulate that for our text format
+
+## The smallest unit of our "language" will be a word
+Word : Str
+## Words will be in paragraphs:
+Paragraph : List Word
+## many Paragraphs will be in a document
+Document : List Paragraph
+## Now lets go about parsing this whole thing.
+
+# First lets define what a word is, in this case we well say a word is can be letters, numbers or a hyphen or apostrophy
+isWordChar = \byte ->
+    (byte >= 'a' && byte <= 'z') || (byte >= 'A' && byte <= 'Z') || (byte >= '0' && byte <= '9') || byte == '-' || byte == '\''
+
+parseWord = \bytes, word ->
+    when bytes is
+        [first, .. as rest] if isWordChar first ->
+            parseWord rest (word |> List.append first)
+
+        rest ->
+            { word: (word |> Str.fromUtf8), rest }
+
+parseParagraph = \bytes, paragraph ->
+    # We have to do this seperately becasue of a compiler bug, once fixed we can just use one when is branch
+    when bytes is
+        ['\n', '\n', .. as rest] ->
+            { paragraph, rest }
+
+        _ ->
+            when bytes is
+                [first, .. as wordRest] if isWordChar first ->
+                    when parseWord wordRest [first] is
+                        { word: Ok str, rest } ->
+                            parseParagraph rest (paragraph |> List.append str)
+
+                        { word: _, rest } ->
+                            # If the word is invalid we just don't add it. This is actually impossible because we already said words should only contain certain chars
+                            parseParagraph rest paragraph
+
+                [_, .. as rest] -> parseParagraph rest paragraph
+                [] -> { paragraph, rest: [] }
+
+parseDocument = \bytes, document ->
+    when bytes |> parseParagraph [] is
+        { paragraph: [], rest: [] } -> document
+        { paragraph, rest: [] } -> document |> List.append paragraph
+        { paragraph: [], rest } -> parseDocument rest document
+        { paragraph, rest } -> parseDocument rest (document |> List.append paragraph)
+
+parse : List U8 -> Document
+parse = \bytes -> parseDocument bytes []
+
+expect
+    parsed =
+        """
+        Hi I'm Eli. You are reading my tutorial. 
+
+        This is the second paragraph.
+        """
+        |> Str.toUtf8
+        |> parse
+    expected =
+        [["Hi", "I'm", "Eli", "You", "are", "reading", "my", "tutorial"], ["This", "is", "the", "second", "paragraph"]]
+    parsed == expected
diff --git a/completion/completion/ParserWithLoc.roc b/completion/completion/ParserWithLoc.roc
@@ -0,0 +1,120 @@
+interface ParserWithLoc
+    exposes [
+        parse,
+        Paragraph
+    ]
+    imports [
+    ]
+
+## This is going to be the standin for our compiler.
+## In a usual LS implementation you would call the langauges compiler and have it parse and then typecheck your files. Here We will simulate that for our text format
+## The smallest unit of our "language" will be a word
+Word : Token Str
+## Words will be in paragraphs:
+Paragraph : Token (List Word)
+## many Paragraphs will be in a document
+Document : List Paragraph
+## Now lets go about parsing this whole thing.
+
+## Offset into our document
+Pos : U64
+## Range that a symbol is within
+Range : { start : Pos, end : Pos }
+
+Token a : { region : Range, val : a }
+
+# First lets define what a word is, in this case we well say a word is can be letters, numbers or a hyphen or apostrophy
+isWordChar = \byte ->
+    (byte >= 'a' && byte <= 'z') || (byte >= 'A' && byte <= 'Z') || (byte >= '0' && byte <= '9') || byte == '-' || byte == '\''
+
+parseWord = \bytes, word, start ->
+    dbg "parsing word"
+
+    when bytes is
+        [first, .. as rest] if isWordChar first ->
+            parseWord rest (word |> List.append first) start
+
+        rest ->
+            { word: (word |> Str.fromUtf8), rest, region: { start, end: (word |> List.len) + start } }
+
+parseParagraph = \bytesP, startCount ->
+    dbg "parsing parar"
+
+    offset = (startCount - (bytesP |> List.len))
+    loop = \bytes, paragraph ->
+        dbg "parsing parar loop"
+
+        # We have to do this seperately becasue of a compiler bug, once fixed we can just use one when is branch
+        when bytes is
+            ['\n', '\n', .. as rest] ->
+                { paragraph: { val: paragraph, region: { start: offset, end: startCount } }, rest }
+
+            _ ->
+                when bytes is
+                    [first, .. as wordRest] if isWordChar first ->
+                        when parseWord wordRest [first] (startCount - (bytes |> List.len)) is
+                            { word: Ok str, rest, region } ->
+                                loop rest (paragraph |> List.append { val: str, region })
+
+                            { word: _, rest, region: _ } ->
+                                # If the word is invalid we just don't add it. This is actually impossible because we already said words should only contain certain chars
+                                loop rest paragraph
+
+                    [_, .. as rest] -> loop rest paragraph
+                    [] -> { paragraph: { val: paragraph, region: { start: offset, end: startCount } }, rest: [] }
+    loop bytesP []
+
+parseDocument = \bytesD ->
+    startCount = (bytesD |> List.len)
+    loop = \bytes, document ->
+        dbg "parsing doc  loop"
+
+        when bytes |> parseParagraph startCount is
+            { paragraph: { val: [] }, rest: [] } -> document
+            { paragraph, rest: [] } -> document |> List.append paragraph
+            { paragraph: { val: [] }, rest } -> loop rest document
+            { paragraph, rest } -> loop rest (document |> List.append paragraph)
+    loop bytesD []
+
+parse : List U8 -> Document
+parse = \bytes -> parseDocument bytes
+
+expect
+    parsed =
+        """
+        Hi I'm Eli. You are reading my tutorial. 
+
+        This is the second paragraph.
+        """
+        |> Str.toUtf8
+        |> parse
+
+    dbg "parsed"
+
+    expected = [
+        {
+            region: { end: 72, start: 0 },
+            val: [
+                { region: { end: 2, start: 0 }, val: "Hi" },
+                { region: { end: 6, start: 3 }, val: "I'm" },
+                { region: { end: 10, start: 7 }, val: "Eli" },
+                { region: { end: 15, start: 12 }, val: "You" },
+                { region: { end: 19, start: 16 }, val: "are" },
+                { region: { end: 27, start: 20 }, val: "reading" },
+                { region: { end: 30, start: 28 }, val: "my" },
+                { region: { end: 39, start: 31 }, val: "tutorial" },
+            ],
+        },
+        {
+            region: { end: 72, start: 43 },
+            val: [
+                { region: { end: 47, start: 43 }, val: "This" },
+                { region: { end: 50, start: 48 }, val: "is" },
+                { region: { end: 54, start: 51 }, val: "the" },
+                { region: { end: 61, start: 55 }, val: "second" },
+                { region: { end: 71, start: 62 }, val: "paragraph" },
+            ],
+        },
+    ]
+
+    parsed == expected
diff --git a/completion/completion/parser.roc b/completion/completion/parser.roc
@@ -0,0 +1,5 @@
+interface Parser
+    exposes [
+    ]
+    imports [
+    ]
diff --git a/completion/CompletionItemKind.roc → completion/lsp/CompletionItemKind.roc b/completion/CompletionItemKind.roc → completion/lsp/CompletionItemKind.roc
diff --git a/completion/Core.roc → completion/lsp/Core.roc b/completion/Core.roc → completion/lsp/Core.roc
@@ -119,7 +119,7 @@ FieldNameMapping : [
     Custom (Str -> Str), # provide a custom formatting
 ]
 
-# TODO encode as JSON numbers as base 10 decimal digits
+# TODO encode as JSON kkkkers as base 10 decimal digits
 # e.g. the REPL `Num.toStr 12e42f64` gives
 # "12000000000000000000000000000000000000000000" : Str
 # which should be encoded as "12e42" : Str

diff --git a/completion/DecodeUtils.roc → completion/lsp/DecodeUtils.roc b/completion/DecodeUtils.roc → completion/lsp/DecodeUtils.roc