diff --git a/.bazelrc b/.bazelrc
new file mode 100644
index 0000000..cb9dfc1
--- /dev/null
+++ b/.bazelrc
@@ -0,0 +1 @@
+build --javacopt="--release 8"
diff --git a/.travis.yml b/.travis.yml
index c128646..4f6fc5c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,4 +1,4 @@
-dist: xenial
+dist: bionic
addons:
apt:
@@ -13,7 +13,9 @@ script:
- cd examples
# build examples as a means of testing
- bazel build --jobs 2 //antlr2/Cpp/... //antlr2/Calc/... //antlr2/Python/... //antlr3/Cpp/... //antlr3/Java/... //antlr3/Python2/... //antlr3/Python3/... //antlr4/Cpp/... //antlr4/Go/... //antlr4/Java/... //antlr4/Python2/... //antlr4/Python3/...
- - cd ..
+ - cd antlr4-opt
+ - bazel build --jobs 2 //...
+ - cd ../..
- bazel test --jobs 2 --test_output=errors //...
- bazel shutdown
diff --git a/README.md b/README.md
index 459b2a7..f79cb7b 100644
--- a/README.md
+++ b/README.md
@@ -6,13 +6,31 @@
# ANTLR Rules for Bazel
These build rules are used for processing [ANTLR](https://www.antlr.org)
-grammars with [Bazel](https://bazel.build/). Currently C/C++, Go, Java and Python targets are supported.
+grammars with [Bazel](https://bazel.build/).
+ * [Support Matrix](#matrix)
* [Workspace Setup](#setup)
+ [Details](docs/setup.md#setup)
* [Build Rules](#build-rules)
- [Java Example](#java-example)
+
+## Support Matrix
+
+| | antlr4 | antlr3 | antlr2
+|---------|:-------------:|:-------------:|:----:|
+| C | | Gen | Gen
+| C++ | Gen + Runtime | Gen + Runtime | Gen + Runtime
+| Go | Gen + Runtime | |
+| Java | Gen + Runtime | Gen + Runtime | Gen + Runtime
+| ObjC | | Gen |
+| Python2 | Gen + Runtime | Gen + Runtime | Gen + Runtime
+| Python3 | Gen + Runtime | Gen + Runtime |
+
+Gen: Code Generation\
+Runtime: Runtime Library bundled
+
+
## Setup
@@ -25,13 +43,13 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
http_archive(
name = "rules_antlr",
- sha256 = "f7c73e1fe3d3b1be3b65172da756a326d12100f6a8d1ef8327498705c0d52efc",
- strip_prefix = "rules_antlr-0.4.0",
- urls = ["https://github.com/marcohu/rules_antlr/archive/0.4.0.tar.gz"],
+ sha256 = "",
+ strip_prefix = "rules_antlr-0.5.0",
+ urls = ["https://github.com/marcohu/rules_antlr/archive/0.5.0.tar.gz"],
)
load("@rules_antlr//antlr:repositories.bzl", "rules_antlr_dependencies")
-rules_antlr_dependencies("4.7.2")
+rules_antlr_dependencies("4.8")
```
More detailed instructions can be found in the [Setup](docs/setup.md#setup) document.
diff --git a/antlr/impl.bzl b/antlr/impl.bzl
index bd69173..326f02f 100644
--- a/antlr/impl.bzl
+++ b/antlr/impl.bzl
@@ -1,12 +1,12 @@
"""The common ANTLR rule implementation."""
-load(":lang.bzl", "C", "CPP", "GO", "PYTHON", "PYTHON2", "PYTHON3")
+load(":lang.bzl", "C", "CPP", "GO", "OBJC", "PYTHON", "PYTHON2", "PYTHON3")
AntlrInfo = provider(
fields = {
"sources": "The generated source files.",
- "headers": "For C/C++ the generated header files.",
- "data": "Additional ANTLR data files",
+ "headers": "The generated header files (for C/C++/ObjC).",
+ "data": "Additional ANTLR data files.",
},
doc = "A provider containing information about ANTLR code generation.",
)
@@ -31,7 +31,7 @@ def antlr(version, ctx, args):
data = []
sources = []
headers = []
- cc = ctx.attr.language == CPP or ctx.attr.language == C
+ cc = ctx.attr.language == CPP or ctx.attr.language == C or ctx.attr.language == OBJC
output_type = "dir" if ctx.attr.language and ctx.attr.language != "Java" else "srcjar"
if output_type == "srcjar":
@@ -71,6 +71,7 @@ def antlr(version, ctx, args):
"OUTPUT_DIRECTORY": output_dir,
"PACKAGE_NAME": ctx.attr.package,
"SRC_JAR": srcjar.path if srcjar else "",
+ "TARGET": ctx.attr.name,
"TARGET_LANGUAGE": ctx.attr.language,
"TOOL_CLASSPATH": ",".join([f.path for f in tool_inputs]),
},
@@ -89,9 +90,6 @@ def antlr(version, ctx, args):
headers = headers,
data = [ctx.attr.name + ".antlr"],
),
- platform_common.TemplateVariableInfo({
- "INCLUDES": ctx.attr.name + ".inc/" + ctx.attr.package,
- }),
CcInfo(compilation_context = compilation_context) if cc else _NullInfo(),
DefaultInfo(files = depset(outputs)),
]
@@ -106,10 +104,12 @@ def extension(language):
"""
if language == CPP or language == C:
return ".cc"
- if language == PYTHON or language == PYTHON2 or language == PYTHON3:
- return ".py"
if language == GO:
return ".go"
+ if language == OBJC:
+ return ".objc"
+ if language == PYTHON or language == PYTHON2 or language == PYTHON3:
+ return ".py"
return ""
def lib_dir(imports):
@@ -122,7 +122,10 @@ def lib_dir(imports):
"""
lib = {}
for resource in imports:
- lib[resource.path.replace("/" + resource.basename, "")] = None
+ if resource.path.endswith(".srcjar"):
+ lib[resource.path] = None
+ else:
+ lib[resource.path.replace("/" + resource.basename, "")] = None
count = len(lib)
# the lib directory does not allow nested directories
diff --git a/antlr/lang.bzl b/antlr/lang.bzl
index 2913bdb..d48c260 100644
--- a/antlr/lang.bzl
+++ b/antlr/lang.bzl
@@ -4,6 +4,7 @@ CSHARP = "CSharp"
GO = "Go"
JAVA = "Java"
JAVASCRIPT = "JavaScript"
+OBJC = "ObjC"
PYTHON = "Python" # synonym for PYTHON3
PYTHON2 = "Python2"
PYTHON3 = "Python3"
@@ -16,4 +17,4 @@ def supported():
Returns:
the list of supported languages.
"""
- return [C, CPP, GO, JAVA, PYTHON, PYTHON2, PYTHON3]
+ return [C, CPP, GO, JAVA, OBJC, PYTHON, PYTHON2, PYTHON3]
diff --git a/antlr/repositories.bzl b/antlr/repositories.bzl
index 602929c..e2525ab 100644
--- a/antlr/repositories.bzl
+++ b/antlr/repositories.bzl
@@ -1,12 +1,140 @@
"""Loads ANTLR dependencies."""
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_jar")
-load(":lang.bzl", "C", "CPP", "GO", "JAVA", "PYTHON", "PYTHON2", "PYTHON3", supportedLanguages = "supported")
+load(":lang.bzl", "C", "CPP", "GO", "JAVA", "OBJC", "PYTHON", "PYTHON2", "PYTHON3", supportedLanguages = "supported")
-v4 = [4, "4.7.1", "4.7.2"]
+v4 = [4, "4.7.1", "4.7.2", "4.8"]
+v4_opt = [4, "4.7.1", "4.7.2", "4.7.3", "4.7.4"]
v3 = [3, "3.5.2"]
v2 = [2, "2.7.7"]
+PACKAGES = {
+ "antlr": {
+ "4.8": {
+ "url": "https://github.com/antlr/antlr4/archive/4.8.tar.gz",
+ "prefix": "antlr4-4.8",
+ "sha256": "992d52444b81ed75e52ea62f9f38ecb7652d5ce2a2130af143912b3042a6d77e",
+ },
+ "4.7.2": {
+ "url": "https://github.com/antlr/antlr4/archive/4.7.2.tar.gz",
+ "prefix": "antlr4-4.7.2",
+ "sha256": "46f5e1af5f4bd28ade55cb632f9a069656b31fc8c2408f9aa045f9b5f5caad64",
+ },
+ "4.7.1": {
+ "url": "https://github.com/antlr/antlr4/archive/4.7.1.tar.gz",
+ "prefix": "antlr4-4.7.1",
+ "sha256": "4d0714f441333a63e50031c9e8e4890c78f3d21e053d46416949803e122a6574",
+ },
+ "3.5.2": {
+ "url": "https://github.com/marcohu/antlr3/archive/master.tar.gz",
+ "prefix": "antlr3-master",
+ "sha256": "53cd6c8e41995efa0b7d01c53047ad8a0e2c74e56fe03f6e938d2f0493ee7ace",
+ },
+ "2.7.7": {
+ "url": "https://www.antlr2.org/download/antlr-2.7.7.tar.gz",
+ "prefix": "antlr-2.7.7",
+ "sha256": "853aeb021aef7586bda29e74a6b03006bcb565a755c86b66032d8ec31b67dbb9",
+ "patches": ["@rules_antlr//third_party:antlr2_strings.patch"],
+ },
+ },
+ "antlr4_runtime": {
+ "4.8": {
+ "path": "org/antlr/antlr4-runtime/4.8/antlr4-runtime-4.8.jar",
+ "sha256": "2337df5d81e715b39aeea07aac46ad47e4f1f9e9cd7c899f124f425913efdcf8",
+ },
+ "4.7.2": {
+ "path": "org/antlr/antlr4-runtime/4.7.2/antlr4-runtime-4.7.2.jar",
+ "sha256": "4c518b87d4bdff8b44cd8cbc1af816e944b62a3fe5b80b781501cf1f4759bbc4",
+ },
+ "4.7.1": {
+ "path": "org/antlr/antlr4-runtime/4.7.1/antlr4-runtime-4.7.1.jar",
+ "sha256": "43516d19beae35909e04d06af6c0c58c17bc94e0070c85e8dc9929ca640dc91d",
+ },
+ "4.7.4-opt": {
+ "path": "com/tunnelvisionlabs/antlr4-runtime/4.7.4/antlr4-runtime-4.7.4.jar",
+ "sha256": "c0616e1eb3b7aa6b4de9a304ea458d50cac279f78b0b65bf7a8176701f8402ee",
+ },
+ "4.7.3-opt": {
+ "path": "com/tunnelvisionlabs/antlr4-runtime/4.7.3/antlr4-runtime-4.7.3.jar",
+ "sha256": "5f4f0c4031e4b83cb369ef00f4909cdb6f62b11e3d253f83a6184d80c5eb3157",
+ },
+ "4.7.2-opt": {
+ "path": "com/tunnelvisionlabs/antlr4-runtime/4.7.2/antlr4-runtime-4.7.2.jar",
+ "sha256": "fdec73953ba059034336a8e0b0ea5204f6897900bf0b0fa35347ce8a8bb88816",
+ },
+ "4.7.1-opt": {
+ "path": "com/tunnelvisionlabs/antlr4-runtime/4.7.1/antlr4-runtime-4.7.1.jar",
+ "sha256": "ce4f77ff9dc014feb9a8e700de5c77101d203acb6a1e8fa3446905c391ac72b9",
+ },
+ },
+ "antlr4_tool": {
+ "4.8": {
+ "path": "org/antlr/antlr4/4.8/antlr4-4.8.jar",
+ "sha256": "6e4477689371f237d4d8aa40642badbb209d4628ccdd81234d90f829a743bac8",
+ },
+ "4.7.2": {
+ "path": "org/antlr/antlr4/4.7.2/antlr4-4.7.2.jar",
+ "sha256": "a3811fad1e4cb6dde62c189c204cf931c5fa40e06e43839ead4a9f2e188f2fe5",
+ },
+ "4.7.1": {
+ "path": "org/antlr/antlr4/4.7.1/antlr4-4.7.1.jar",
+ "sha256": "a2cdc2f2f8eb893728832568dc54d080eb5a1495edb3b66e51b97122a60a0d87",
+ },
+ "4.7.4-opt": {
+ "path": "com/tunnelvisionlabs/antlr4/4.7.4/antlr4-4.7.4.jar",
+ "sha256": "f84d71d130f17b13f0934af7575626890a4dab0c588a95b80572a66f7deacca4",
+ },
+ "4.7.3-opt": {
+ "path": "com/tunnelvisionlabs/antlr4/4.7.3/antlr4-4.7.3.jar",
+ "sha256": "06cd5f3a9488b32cb1022360df054bbe7aebe8e817c0aa58c8feec05879e0c63",
+ },
+ "4.7.2-opt": {
+ "path": "com/tunnelvisionlabs/antlr4/4.7.2/antlr4-4.7.2.jar",
+ "sha256": "fcc2a0365de371d8676ab9b45c49aa2e784036a77b76383892887c89c5725ca3",
+ },
+ "4.7.1-opt": {
+ "path": "com/tunnelvisionlabs/antlr4/4.7.1/antlr4-4.7.1.jar",
+ "sha256": "de9a7b94b48ea7c8100663cbb1a54465c37671841c0aefdf4c53a72212555ae8",
+ },
+ },
+ "antlr3_runtime": {
+ "3.5.2": {
+ "path": "org/antlr/antlr-runtime/3.5.2/antlr-runtime-3.5.2.jar",
+ "sha256": "ce3fc8ecb10f39e9a3cddcbb2ce350d272d9cd3d0b1e18e6fe73c3b9389c8734",
+ },
+ },
+ "antlr3_tool": {
+ "3.5.2": {
+ # the official release generates problematic C++ code, we therefore use a
+ # custom build forked from https://github.com/ibre5041/antlr3.git
+ "path": "https://github.com/marcohu/antlr3/raw/master/antlr-3.5.3.jar",
+ "sha256": "897d0b914adf2e63899ada179c5f4aeb606d59fdfbb6ccaff5bc87aec300e2ce",
+ },
+ },
+ "antlr2": {
+ "2.7.7": {
+ "path": "antlr/antlr/2.7.7/antlr-2.7.7.jar",
+ "sha256": "88fbda4b912596b9f56e8e12e580cc954bacfb51776ecfddd3e18fc1cf56dc4c",
+ },
+ },
+ "stringtemplate4": {
+ "4.3": {
+ "path": "org/antlr/ST4/4.3/ST4-4.3.jar",
+ "sha256": "28547dba48cfceb77b6efbfe069aebe9ed3324ae60dbd52093d13a1d636ed069",
+ },
+ "4.0.8": {
+ "path": "org/antlr/ST4/4.0.8/ST4-4.0.8.jar",
+ "sha256": "58caabc40c9f74b0b5993fd868e0f64a50c0759094e6a251aaafad98edfc7a3b",
+ },
+ },
+ "javax_json": {
+ "1.0.4": {
+ "path": "org/glassfish/javax.json/1.0.4/javax.json-1.0.4.jar",
+ "sha256": "0e1dec40a1ede965941251eda968aeee052cc4f50378bc316cc48e8159bdbeb4",
+ },
+ },
+}
+
def rules_antlr_dependencies(*versionsAndLanguages):
"""Loads the dependencies for the specified ANTLR releases.
@@ -51,7 +179,9 @@ def rules_antlr_dependencies(*versionsAndLanguages):
languages = [JAVA]
for version in sorted(versions, key = _toString):
- if version == 4 or version == "4.7.2":
+ if version == 4 or version == "4.8":
+ _antlr48_dependencies(languages)
+ elif version == "4.7.2":
_antlr472_dependencies(languages)
elif version == "4.7.1":
_antlr471_dependencies(languages)
@@ -74,99 +204,97 @@ def rules_antlr_optimized_dependencies(version):
Args:
version: the ANTLR release version to make available.
"""
- if version == 4 or version == "4.7.2":
+ if version == 4 or version == "4.7.4":
+ _antlr474_optimized_dependencies()
+ elif version == "4.7.3":
+ _antlr473_optimized_dependencies()
+ elif version == "4.7.2":
_antlr472_optimized_dependencies()
elif version == "4.7.1":
_antlr471_optimized_dependencies()
elif type(version) == "int" or str(version).isdigit():
fail('Integer version \'{}\' no longer valid. Use semantic version "{}" instead.'.format(version, ".".join(str(version).elems())), attr = "version")
else:
- fail('Unsupported ANTLR version provided: "{0}". Currently supported are: {1}'.format(version, v4), attr = "version")
+ fail('Unsupported ANTLR version provided: "{0}". Currently supported are: {1}'.format(version, v4_opt), attr = "version")
+
+def _antlr48_dependencies(languages):
+ _antlr4_dependencies(
+ "4.8",
+ languages,
+ {
+ "antlr4_runtime": "4.8",
+ "antlr4_tool": "4.8",
+ "antlr3_runtime": "3.5.2",
+ "stringtemplate4": "4.3",
+ "javax_json": "1.0.4",
+ },
+ )
def _antlr472_dependencies(languages):
_antlr4_dependencies(
+ "4.7.2",
languages,
{
- "url": "https://github.com/antlr/antlr4/archive/4.7.2.tar.gz",
- "prefix": "antlr4-4.7.2",
- "sha256": "46f5e1af5f4bd28ade55cb632f9a069656b31fc8c2408f9aa045f9b5f5caad64",
+ "antlr4_runtime": "4.7.2",
+ "antlr4_tool": "4.7.2",
+ "antlr3_runtime": "3.5.2",
+ "stringtemplate4": "4.0.8",
+ "javax_json": "1.0.4",
},
- _merge(
- {
- "antlr4_runtime": {
- "name": "antlr4_runtime",
- "path": "org/antlr/antlr4-runtime/4.7.2/antlr4-runtime-4.7.2.jar",
- "sha256": "4c518b87d4bdff8b44cd8cbc1af816e944b62a3fe5b80b781501cf1f4759bbc4",
- },
- "antlr4_tool": {
- "name": "antlr4_tool",
- "path": "org/antlr/antlr4/4.7.2/antlr4-4.7.2.jar",
- "sha256": "a3811fad1e4cb6dde62c189c204cf931c5fa40e06e43839ead4a9f2e188f2fe5",
- },
- },
- _antlr4_transitive_dependencies(),
- ),
)
def _antlr471_dependencies(languages):
_antlr4_dependencies(
+ "4.7.1",
languages,
{
- "url": "https://github.com/antlr/antlr4/archive/4.7.2.tar.gz",
- "prefix": "antlr4-4.7.2",
- "sha256": "46f5e1af5f4bd28ade55cb632f9a069656b31fc8c2408f9aa045f9b5f5caad64",
+ "antlr4_runtime": "4.7.1",
+ "antlr4_tool": "4.7.1",
+ "antlr3_runtime": "3.5.2",
+ "stringtemplate4": "4.0.8",
+ "javax_json": "1.0.4",
},
- _merge(
- {
- "antlr4_runtime": {
- "name": "antlr4_runtime",
- "path": "org/antlr/antlr4-runtime/4.7.1/antlr4-runtime-4.7.1.jar",
- "sha256": "43516d19beae35909e04d06af6c0c58c17bc94e0070c85e8dc9929ca640dc91d",
- },
- "antlr4_tool": {
- "name": "antlr4_tool",
- "path": "org/antlr/antlr4/4.7.1/antlr4-4.7.1.jar",
- "sha256": "a2cdc2f2f8eb893728832568dc54d080eb5a1495edb3b66e51b97122a60a0d87",
- },
- },
- _antlr4_transitive_dependencies(),
- ),
)
+def _antlr474_optimized_dependencies():
+ _dependencies({
+ "antlr4_runtime": "4.7.4-opt",
+ "antlr4_tool": "4.7.4-opt",
+ "antlr3_runtime": "3.5.2",
+ "stringtemplate4": "4.0.8",
+ "javax_json": "1.0.4",
+ })
+
+def _antlr473_optimized_dependencies():
+ _dependencies({
+ "antlr4_runtime": "4.7.3-opt",
+ "antlr4_tool": "4.7.3-opt",
+ "antlr3_runtime": "3.5.2",
+ "stringtemplate4": "4.0.8",
+ "javax_json": "1.0.4",
+ })
+
def _antlr472_optimized_dependencies():
- _download(
- name = "antlr4_runtime",
- path = "com/tunnelvisionlabs/antlr4-runtime/4.7.2/antlr4-runtime-4.7.2.jar",
- sha256 = "fdec73953ba059034336a8e0b0ea5204f6897900bf0b0fa35347ce8a8bb88816",
- )
- _download(
- name = "antlr4_tool",
- path = "com/tunnelvisionlabs/antlr4/4.7.2/antlr4-4.7.2.jar",
- sha256 = "fcc2a0365de371d8676ab9b45c49aa2e784036a77b76383892887c89c5725ca3",
- )
- _antlr4_transitive_dependencies(False)
+ _dependencies({
+ "antlr4_runtime": "4.7.2-opt",
+ "antlr4_tool": "4.7.2-opt",
+ "antlr3_runtime": "3.5.2",
+ "stringtemplate4": "4.0.8",
+ "javax_json": "1.0.4",
+ })
def _antlr471_optimized_dependencies():
- _download(
- name = "antlr4_runtime",
- path = "com/tunnelvisionlabs/antlr4-runtime/4.7.1/antlr4-runtime-4.7.1.jar",
- sha256 = "ce4f77ff9dc014feb9a8e700de5c77101d203acb6a1e8fa3446905c391ac72b9",
- )
- _download(
- name = "antlr4_tool",
- path = "com/tunnelvisionlabs/antlr4/4.7.1/antlr4-4.7.1.jar",
- sha256 = "de9a7b94b48ea7c8100663cbb1a54465c37671841c0aefdf4c53a72212555ae8",
- )
- _antlr4_transitive_dependencies()
-
-def _antlr4_dependencies(languages, archive, dependencies):
- for name in dependencies:
- _download(
- name = name,
- path = dependencies[name]["path"],
- sha256 = dependencies[name]["sha256"],
- )
-
+ _dependencies({
+ "antlr4_runtime": "4.7.1-opt",
+ "antlr4_tool": "4.7.1-opt",
+ "antlr3_runtime": "3.5.2",
+ "stringtemplate4": "4.0.8",
+ "javax_json": "1.0.4",
+ })
+
+def _antlr4_dependencies(version, languages, dependencies):
+ _dependencies(dependencies)
+ archive = PACKAGES["antlr"][version]
build_script, workspace = _antlr4_build_script(languages)
if build_script:
@@ -263,56 +391,20 @@ py_repositories()
def _load_rules_python_defs(script):
return "" if script.find('load("@rules_python//python:defs.bzl"') > -1 else 'load("@rules_python//python:defs.bzl", "py_library")'
-def _antlr4_transitive_dependencies():
- return {
- "antlr3_runtime": {
- "path": "org/antlr/antlr-runtime/3.5.2/antlr-runtime-3.5.2.jar",
- "sha256": "ce3fc8ecb10f39e9a3cddcbb2ce350d272d9cd3d0b1e18e6fe73c3b9389c8734",
- },
- "stringtemplate4": {
- "path": "org/antlr/ST4/4.0.8/ST4-4.0.8.jar",
- "sha256": "58caabc40c9f74b0b5993fd868e0f64a50c0759094e6a251aaafad98edfc7a3b",
- },
- "javax_json": {
- "path": "org/glassfish/javax.json/1.0.4/javax.json-1.0.4.jar",
- "sha256": "0e1dec40a1ede965941251eda968aeee052cc4f50378bc316cc48e8159bdbeb4",
- },
- }
-
def _antlr352_dependencies(languages):
_antlr3_dependencies(
+ "3.5.2",
languages,
{
- "url": "https://github.com/marcohu/antlr3/archive/master.tar.gz",
- "prefix": "antlr3-master",
- "sha256": "53cd6c8e41995efa0b7d01c53047ad8a0e2c74e56fe03f6e938d2f0493ee7ace",
- },
- {
- "antlr3_runtime": {
- "path": "org/antlr/antlr-runtime/3.5.2/antlr-runtime-3.5.2.jar",
- "sha256": "ce3fc8ecb10f39e9a3cddcbb2ce350d272d9cd3d0b1e18e6fe73c3b9389c8734",
- },
- # the official release generates problematic C++ code, we therefore use a
- # custom build forked from https://github.com/ibre5041/antlr3.git
- "antlr3_tool": {
- "path": "https://github.com/marcohu/antlr3/raw/master/antlr-3.5.3.jar",
- "sha256": "897d0b914adf2e63899ada179c5f4aeb606d59fdfbb6ccaff5bc87aec300e2ce",
- },
- "stringtemplate4": {
- "path": "org/antlr/ST4/4.0.8/ST4-4.0.8.jar",
- "sha256": "58caabc40c9f74b0b5993fd868e0f64a50c0759094e6a251aaafad98edfc7a3b",
- },
+ "antlr3_runtime": "3.5.2",
+ "antlr3_tool": "3.5.2",
+ "stringtemplate4": "4.0.8",
},
)
-def _antlr3_dependencies(languages, archive, dependencies):
- for name in dependencies:
- _download(
- name = name,
- path = dependencies[name]["path"],
- sha256 = dependencies[name]["sha256"],
- )
-
+def _antlr3_dependencies(version, languages, dependencies):
+ _dependencies(dependencies)
+ archive = PACKAGES["antlr"][version]
build_script = _antlr3_build_script(languages)
if build_script:
@@ -356,41 +448,30 @@ py_library(
visibility = ["//visibility:public"],
)
"""
+
return script
def _antlr277_dependencies(languages):
_antlr2_dependencies(
+ "2.7.7",
languages,
{
- "url": "https://www.antlr2.org/download/antlr-2.7.7.tar.gz",
- "prefix": "antlr-2.7.7",
- "sha256": "853aeb021aef7586bda29e74a6b03006bcb565a755c86b66032d8ec31b67dbb9",
- },
- {
- "antlr2": {
- "path": "antlr/antlr/2.7.7/antlr-2.7.7.jar",
- "sha256": "88fbda4b912596b9f56e8e12e580cc954bacfb51776ecfddd3e18fc1cf56dc4c",
- },
+ "antlr2": "2.7.7",
},
)
-def _antlr2_dependencies(languages, archive, dependencies):
- for name in dependencies:
- _download(
- name = name,
- path = dependencies[name]["path"],
- sha256 = dependencies[name]["sha256"],
- )
-
+def _antlr2_dependencies(version, languages, dependencies):
+ _dependencies(dependencies)
+ archive = PACKAGES["antlr"][version]
build_script = _antlr2_build_script(languages)
if build_script:
http_archive(
name = "antlr2_runtimes",
- sha256 = "853aeb021aef7586bda29e74a6b03006bcb565a755c86b66032d8ec31b67dbb9",
+ sha256 = archive["sha256"],
strip_prefix = "antlr-2.7.7",
- urls = ["https://www.antlr2.org/download/antlr-2.7.7.tar.gz"],
- patches = ["@rules_antlr//third_party:antlr2_strings.patch"],
+ url = archive["url"],
+ patches = archive["patches"] if "patches" in archive else [],
build_file_content = build_script,
)
@@ -423,6 +504,15 @@ py_library(
return script
+def _dependencies(dependencies):
+ for key in dependencies:
+ version = dependencies[key]
+ _download(
+ name = key,
+ path = PACKAGES[key][version]["path"],
+ sha256 = PACKAGES[key][version]["sha256"],
+ )
+
def _download(name, path, sha256):
http_jar(
name = name,
@@ -448,7 +538,3 @@ def _validateVersions(versions):
def _toString(x):
return str(x)
-
-def _merge(x, y):
- x.update(y)
- return x
diff --git a/docs/setup.md b/docs/setup.md
index 2543f1b..59f52d0 100644
--- a/docs/setup.md
+++ b/docs/setup.md
@@ -8,9 +8,9 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
http_archive(
name = "rules_antlr",
- sha256 = "f7c73e1fe3d3b1be3b65172da756a326d12100f6a8d1ef8327498705c0d52efc",
- strip_prefix = "rules_antlr-0.4.0",
- urls = ["https://github.com/marcohu/rules_antlr/archive/0.4.0.tar.gz"],
+ sha256 = "",
+ strip_prefix = "rules_antlr-0.5.0",
+ urls = ["https://github.com/marcohu/rules_antlr/archive/0.5.0.tar.gz"],
)
```
@@ -74,7 +74,7 @@ The currently supported releases are:
| Release Stream | Supported Versions| Bundled Runtimes
|-----------------|-------------------|---
-| 4 | 4.7.1, 4.7.2 | C++, Go, Java, Python2, Python3
+| 4 | 4.7.1, 4.7.2, 4.8 | C++, Go, Java, Python2, Python3
| 3 | 3.5.2 | C++, Java, Python2, Python3
| 2 | 2.7.7 | C++, Java, Python2
diff --git a/examples/WORKSPACE b/examples/WORKSPACE
index b0f45eb..8634864 100644
--- a/examples/WORKSPACE
+++ b/examples/WORKSPACE
@@ -32,7 +32,7 @@ load("@rules_python//python:repositories.bzl", "py_repositories")
py_repositories()
-load("@rules_antlr//antlr:lang.bzl", "C", "CPP", "GO", "JAVA", "PYTHON", "PYTHON2")
+load("@rules_antlr//antlr:lang.bzl", "C", "CPP", "GO", "JAVA", "OBJC", "PYTHON", "PYTHON2")
load("@rules_antlr//antlr:repositories.bzl", "rules_antlr_dependencies")
-rules_antlr_dependencies(2, 3, "4.7.2", C, CPP, GO, PYTHON, PYTHON2)
+rules_antlr_dependencies("2.7.7", 3, "4.8", C, CPP, GO, OBJC, PYTHON, PYTHON2)
diff --git a/examples/antlr3/BUILD b/examples/antlr3/BUILD
index 6e3ae64..8b9f424 100644
--- a/examples/antlr3/BUILD
+++ b/examples/antlr3/BUILD
@@ -7,6 +7,7 @@ filegroup(
"InheritSameFolder",
"Java",
"LanguageByAttribute",
+ "ObjC",
"Python2",
"Python3",
],
diff --git a/examples/antlr3/ImportGenerated/src/codegen/BUILD b/examples/antlr3/ImportGenerated/src/codegen/BUILD
new file mode 100644
index 0000000..229fc03
--- /dev/null
+++ b/examples/antlr3/ImportGenerated/src/codegen/BUILD
@@ -0,0 +1,7 @@
+load("@rules_antlr//antlr:antlr3.bzl", "antlr")
+
+antlr(
+ name = "codegen",
+ srcs = glob(["*.g"]),
+ imports = ["//antlr3/ImportGenerated/src/parse:parser"],
+)
diff --git a/examples/antlr3/ImportGenerated/src/codegen/SourceGenTriggers.g b/examples/antlr3/ImportGenerated/src/codegen/SourceGenTriggers.g
new file mode 100644
index 0000000..85e3dc4
--- /dev/null
+++ b/examples/antlr3/ImportGenerated/src/codegen/SourceGenTriggers.g
@@ -0,0 +1,198 @@
+/*
+ * [The "BSD license"]
+ * Copyright (c) 2012-2016 Terence Parr
+ * Copyright (c) 2012-2016 Sam Harwell
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+tree grammar SourceGenTriggers;
+options {
+ language = Java;
+ tokenVocab = ANTLRParser;
+ ASTLabelType = GrammarAST;
+}
+
+@header {
+package org.antlr.v4.codegen;
+import org.antlr.v4.misc.Utils;
+import org.antlr.v4.codegen.model.*;
+import org.antlr.v4.codegen.model.decl.*;
+import org.antlr.v4.tool.*;
+import org.antlr.v4.tool.ast.*;
+import java.util.Collections;
+import java.util.Map;
+import java.util.HashMap;
+}
+
+@members {
+ public OutputModelController controller;
+ public boolean hasLookaheadBlock;
+ public SourceGenTriggers(TreeNodeStream input, OutputModelController controller) {
+ this(input);
+ this.controller = controller;
+ }
+}
+
+dummy : block[null, null] ;
+
+block[GrammarAST label, GrammarAST ebnfRoot] returns [List extends SrcOp> omos]
+ : ^( blk=BLOCK (^(OPTIONS .+))?
+ {List alts = new ArrayList();}
+ ( alternative {alts.add($alternative.altCodeBlock);} )+
+ )
+ {
+ if ( alts.size()==1 && ebnfRoot==null) return alts;
+ if ( ebnfRoot==null ) {
+ $omos = DefaultOutputModelFactory.list(controller.getChoiceBlock((BlockAST)$blk, alts, $label));
+ }
+ else {
+ Choice choice = controller.getEBNFBlock($ebnfRoot, alts);
+ hasLookaheadBlock |= choice instanceof PlusBlock || choice instanceof StarBlock;
+ $omos = DefaultOutputModelFactory.list(choice);
+ }
+ }
+ ;
+
+alternative returns [CodeBlockForAlt altCodeBlock, List ops]
+@init {
+ boolean outerMost = inContext("RULE BLOCK");
+}
+@after {
+ controller.finishAlternative($altCodeBlock, $ops, outerMost);
+}
+ : a=alt[outerMost] {$altCodeBlock=$a.altCodeBlock; $ops=$a.ops;}
+ ;
+
+alt[boolean outerMost] returns [CodeBlockForAlt altCodeBlock, List ops]
+@init {
+ // set alt if outer ALT only (the only ones with alt field set to Alternative object)
+ AltAST altAST = (AltAST)retval.start;
+ if ( outerMost ) controller.setCurrentOuterMostAlt(altAST.alt);
+}
+ : {
+ List elems = new ArrayList();
+ // TODO: shouldn't we pass $start to controller.alternative()?
+ $altCodeBlock = controller.alternative(controller.getCurrentOuterMostAlt(), outerMost);
+ $altCodeBlock.ops = $ops = elems;
+ controller.setCurrentBlock($altCodeBlock);
+ }
+ ^( ALT elementOptions? ( element {if ($element.omos!=null) elems.addAll($element.omos);} )+ )
+
+ | ^(ALT elementOptions? EPSILON)
+ {$altCodeBlock = controller.epsilon(controller.getCurrentOuterMostAlt(), outerMost);}
+ ;
+
+element returns [List extends SrcOp> omos]
+ : labeledElement {$omos = $labeledElement.omos;}
+ | atom[null,false] {$omos = $atom.omos;}
+ | subrule {$omos = $subrule.omos;}
+ | ACTION {$omos = controller.action((ActionAST)$ACTION);}
+ | SEMPRED {$omos = controller.sempred((ActionAST)$SEMPRED);}
+ | ^(ACTION elementOptions) {$omos = controller.action((ActionAST)$ACTION);}
+ | ^(SEMPRED elementOptions) {$omos = controller.sempred((ActionAST)$SEMPRED);}
+ ;
+
+labeledElement returns [List extends SrcOp> omos]
+ : ^(ASSIGN ID atom[$ID,false] ) {$omos = $atom.omos;}
+ | ^(PLUS_ASSIGN ID atom[$ID,false]) {$omos = $atom.omos;}
+ | ^(ASSIGN ID block[$ID,null] ) {$omos = $block.omos;}
+ | ^(PLUS_ASSIGN ID block[$ID,null]) {$omos = $block.omos;}
+ ;
+
+subrule returns [List extends SrcOp> omos]
+ : ^(OPTIONAL b=block[null,$OPTIONAL])
+ {
+ $omos = $block.omos;
+ }
+ | ( ^(op=CLOSURE b=block[null,null])
+ | ^(op=POSITIVE_CLOSURE b=block[null,null])
+ )
+ {
+ List alts = new ArrayList();
+ SrcOp blk = $b.omos.get(0);
+ CodeBlockForAlt alt = new CodeBlockForAlt(controller.delegate);
+ alt.addOp(blk);
+ alts.add(alt);
+ SrcOp loop = controller.getEBNFBlock($op, alts); // "star it"
+ hasLookaheadBlock |= loop instanceof PlusBlock || loop instanceof StarBlock;
+ $omos = DefaultOutputModelFactory.list(loop);
+ }
+ | block[null, null] {$omos = $block.omos;}
+ ;
+
+blockSet[GrammarAST label, boolean invert] returns [List omos]
+ : ^(SET atom[label,invert]+) {$omos = controller.set($SET, $label, invert);}
+ ;
+
+/*
+setElement
+ : STRING_LITERAL
+ | TOKEN_REF
+ | ^(RANGE STRING_LITERAL STRING_LITERAL)
+ ;
+*/
+
+// TODO: combine ROOT/BANG into one then just make new op ref'ing return value of atom/terminal...
+// TODO: same for NOT
+atom[GrammarAST label, boolean invert] returns [List omos]
+ : ^(NOT a=atom[$label, true]) {$omos = $a.omos;}
+ | range[label] {$omos = $range.omos;}
+ | ^(DOT ID terminal[$label])
+ | ^(DOT ID ruleref[$label])
+ | ^(WILDCARD .) {$omos = controller.wildcard($WILDCARD, $label);}
+ | WILDCARD {$omos = controller.wildcard($WILDCARD, $label);}
+ | terminal[label] {$omos = $terminal.omos;}
+ | ruleref[label] {$omos = $ruleref.omos;}
+ | blockSet[$label, invert] {$omos = $blockSet.omos;}
+ ;
+
+ruleref[GrammarAST label] returns [List omos]
+ : ^(RULE_REF ARG_ACTION? elementOptions?) {$omos = controller.ruleRef($RULE_REF, $label, $ARG_ACTION);}
+ ;
+
+range[GrammarAST label] returns [List omos]
+ : ^(RANGE a=STRING_LITERAL b=STRING_LITERAL)
+ ;
+
+terminal[GrammarAST label] returns [List omos]
+ : ^(STRING_LITERAL .) {$omos = controller.stringRef($STRING_LITERAL, $label);}
+ | STRING_LITERAL {$omos = controller.stringRef($STRING_LITERAL, $label);}
+ | ^(TOKEN_REF ARG_ACTION .) {$omos = controller.tokenRef($TOKEN_REF, $label, $ARG_ACTION);}
+ | ^(TOKEN_REF .) {$omos = controller.tokenRef($TOKEN_REF, $label, null);}
+ | TOKEN_REF {$omos = controller.tokenRef($TOKEN_REF, $label, null);}
+ ;
+
+elementOptions
+ : ^(ELEMENT_OPTIONS elementOption+)
+ ;
+
+elementOption
+ : ID
+ | ^(ASSIGN ID ID)
+ | ^(ASSIGN ID STRING_LITERAL)
+ | ^(ASSIGN ID ACTION)
+ | ^(ASSIGN ID INT)
+ ;
diff --git a/examples/antlr3/ImportGenerated/src/parse/ANTLRLexer.g b/examples/antlr3/ImportGenerated/src/parse/ANTLRLexer.g
new file mode 100644
index 0000000..4f113a7
--- /dev/null
+++ b/examples/antlr3/ImportGenerated/src/parse/ANTLRLexer.g
@@ -0,0 +1,824 @@
+/*
+ * [The "BSD license"]
+ * Copyright (c) 2012-2016 Terence Parr
+ * Copyright (c) 2012-2016 Sam Harwell
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// File : A3Lexer.g
+// Author : Jim Idle (jimi@temporal-wave.com)
+// Copyright : Free BSD - See @header clause below
+// Version : First implemented as part of ANTLR 3.2 this is the self
+// hosting ANTLR 3 Lexer.
+//
+// Description
+// -----------
+// This is the definitive lexer grammar for parsing ANTLR V3.x.x grammars. All other
+// gramnmars are derived from this grammar via source code control integration (perforce)
+// or by the gdiff tool.
+//
+// This grammar and its associated grmmmars A3Parser.g and A3Walker.g exhibit the following
+// traits, which are recommended for all production quality grammars:
+//
+// 1) They are separate grammars, not composite grammars;
+// 2) They implement all supporting methods in a superclass (at least this is recommended
+// for language targets that support inheritence;
+// 3) All errors are pushed as far down the parsing chain as possible, which means
+// that the lexer tries to defer error reporting to the parser, and the parser
+// tries to defer error reporting to a semantic phase consisting of a single
+// walk of the AST. The reason for this is that the error messages produced
+// from later phases of the parse will generally have better context and so
+// be more useful to the end user. Consider the message: "Syntax error at 'options'"
+// vs: "You cannot specify two options{} sections in a single grammar file".
+// 4) The lexer is 'programmed' to catch common mistakes such as unterminated literals
+// and report them specifically and not just issue confusing lexer mismatch errors.
+//
+
+/** Read in an ANTLR grammar and build an AST. Try not to do
+ * any actions, just build the tree.
+ *
+ * The phases are:
+ *
+ * A3Lexer.g (this file)
+ * A3Parser.g
+ * A3Verify.g (derived from A3Walker.g)
+ * assign.types.g
+ * define.g
+ * buildnfa.g
+ * antlr.print.g (optional)
+ * codegen.g
+ *
+ * Terence Parr
+ * University of San Francisco
+ * 2005
+ * Jim Idle (this v3 grammar)
+ * Temporal Wave LLC
+ * 2009
+ */
+lexer grammar ANTLRLexer;
+
+// ==============================================================================
+// Note that while this grammar does not care about order of constructs
+// that don't really matter, such as options before @header etc, it must first
+// be parsed by the original v2 parser, before it replaces it. That parser does
+// care about order of structures. Hence we are constrained by the v2 parser
+// for at least the first bootstrap release that causes this parser to replace
+// the v2 version.
+// ==============================================================================
+
+// -------
+// Options
+//
+// V3 option directives to tell the tool what we are asking of it for this
+// grammar.
+//
+options {
+
+ // Target language is Java, which is the default but being specific
+ // here as this grammar is also meant as a good example grammar for
+ // for users.
+ //
+ language = Java;
+
+ // The super class that this lexer should expect to inherit from, and
+ // which contains any and all support routines for the lexer. This is
+ // commented out in this baseline (definitive or normative grammar)
+ // - see the ANTLR tool implementation for hints on how to use the super
+ // class
+ //
+ //superclass = AbstractA3Lexer;
+}
+
+tokens { SEMPRED; TOKEN_REF; RULE_REF; LEXER_CHAR_SET; ARG_ACTION; }
+
+// Include the copyright in this source and also the generated source
+//
+@lexer::header {
+/*
+ [The "BSD licence"]
+ Copyright (c) 2005-2009 Terence Parr
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ 3. The name of the author may not be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+package org.antlr.v4.parse;
+import org.antlr.v4.tool.*;
+import org.antlr.v4.runtime.misc.Interval;
+}
+
+
+@members {
+ public static final int COMMENTS_CHANNEL = 2;
+
+ public CommonTokenStream tokens; // track stream we push to; need for context info
+ public boolean isLexerRule = false;
+
+ public void grammarError(ErrorType etype, org.antlr.runtime.Token token, Object... args) { }
+
+ /** scan backwards from current point in this.tokens list
+ * looking for the start of the rule or subrule.
+ * Return token or null if for some reason we can't find the start.
+ */
+ public Token getRuleOrSubruleStartToken() {
+ if ( tokens==null ) return null;
+ int i = tokens.index();
+ int n = tokens.size();
+ if ( i>=n ) i = n-1; // seems index == n as we lex
+ while ( i>=0 && i ' $ANTLR' SRC
+ | ~(NLCHARS)*
+ )
+
+ | // Multi-line comment, which may be a documentation comment
+ // if it starts /** (note that we protect against accidentaly
+ // recognizing a comment /**/ as a documentation comment
+ //
+ '*' (
+ { input.LA(2) != '/'}?=> '*' { $type = DOC_COMMENT; }
+ | { true }?=> // Required to cover all alts with predicates
+ )
+
+ // Should we support embedded multiline comments here?
+ //
+ (
+ // Pick out end of multiline comment and exit the loop
+ // if we find it.
+ //
+ { !(input.LA(1) == '*' && input.LA(2) == '/') }?
+
+ // Anything else other than the non-greedy match of
+ // the comment close sequence
+ //
+ .
+ )*
+ (
+ // Look for the comment terminator, but if it is accidentally
+ // unterminated, then we will hit EOF, which will trigger the
+ // epsilon alt and hence we can issue an error message relative
+ // to the start of the unterminated multi-line comment
+ //
+ '*/'
+
+ | // Unterminated comment!
+ //
+ {
+ // ErrorManager.msg(Msg.UNTERMINATED_DOC_COMMENT, startLine, offset, $pos, startLine, offset, $pos, (Object)null);
+ }
+ )
+
+ | // There was nothing that made sense following the opening '/' and so
+ // we issue an error regarding the malformed comment
+ //
+ {
+ // TODO: Insert error message relative to comment start
+ //
+ }
+ )
+ {
+ // We do not wish to pass the comments in to the parser. If you are
+ // writing a formatter then you will want to preserve the comments off
+ // channel, but could just skip and save token space if not.
+ //
+ $channel=COMMENTS_CHANNEL;
+ }
+ ;
+
+ARG_OR_CHARSET
+options {k=1;}
+ : {isLexerRule}?=> LEXER_CHAR_SET {$type=LEXER_CHAR_SET;}
+ | {!isLexerRule}?=> ARG_ACTION
+ {
+ $type=ARG_ACTION;
+ // Set the token text to our gathered string minus outer [ ]
+ String t = $text;
+ t = t.substring(1,t.length()-1);
+ setText(t);
+ }
+ ;
+
+fragment
+LEXER_CHAR_SET
+ : '['
+ ( '\\' ~('\r'|'\n')
+ | ~('\r'|'\n'|'\\'|']')
+ )*
+ ']'
+ ;
+
+// --------------
+// Argument specs
+//
+// Certain argument lists, such as those specifying call parameters
+// to a rule invocation, or input parameters to a rule specification
+// are contained within square brackets. In the lexer we consume them
+// all at once and sort them out later in the grammar analysis.
+//
+fragment
+ARG_ACTION
+ : '['
+ (
+ ARG_ACTION
+
+ | ('"')=>ACTION_STRING_LITERAL
+
+ | ('\'')=>ACTION_CHAR_LITERAL
+
+ | ~('['|']')
+ )*
+
+ ']'
+ ;
+
+// -------
+// Actions
+//
+// Other than making sure to distinguish between { and } embedded
+// within what we have assumed to be literals in the action code, the
+// job of the lexer is merely to gather the code within the action
+// (delimited by {}) and pass it to the parser as a single token.
+// We know that this token will be asked for its text somewhere
+// in the upcoming parse, so setting the text here to exclude
+// the delimiting {} is no additional overhead.
+//
+ACTION
+ : NESTED_ACTION
+ ( '?' {$type = SEMPRED;}
+ ( (WSNLCHARS* '=>') => WSNLCHARS* '=>' // v3 gated sempred
+ {
+ Token t = new CommonToken(input, state.type, state.channel, state.tokenStartCharIndex, getCharIndex()-1);
+ t.setLine(state.tokenStartLine);
+ t.setText(state.text);
+ t.setCharPositionInLine(state.tokenStartCharPositionInLine);
+ grammarError(ErrorType.V3_GATED_SEMPRED, t);
+ }
+ )?
+ )?
+ ;
+
+// ----------------
+// Action structure
+//
+// Many language targets use {} as block delimiters and so we
+// must recursively match {} delimited blocks to balance the
+// braces. Additionally, we must make some assumptions about
+// literal string representation in the target language. We assume
+// that they are delimited by ' or " and so consume these
+// in their own alts so as not to inadvertantly match {}.
+// This rule calls itself on matching a {
+//
+fragment
+NESTED_ACTION
+@init {
+
+ // Record the start line and offsets as if we need to report an
+ // unterminated block, then we want to show the start of the comment
+ // we think is broken, not the end, where people will have to try and work
+ // it out themselves.
+ //
+ int startLine = getLine();
+ int offset = getCharPositionInLine();
+}
+
+ : // Action and other blocks start with opening {
+ //
+ '{'
+ (
+ // And now we can match one of a number of embedded
+ // elements within the action until we find a
+ // } that balances the opening {. If we do not find
+ // the balanced } then we will hit EOF and can issue
+ // an error message about the brace that we belive to
+ // be mismatched. This won't be foolproof but we will
+ // be able to at least report an error against the
+ // opening brace that we feel is in error and this will
+ // guide the user to the correction as best we can.
+ //
+
+
+ // An embedded {} block
+ //
+ NESTED_ACTION
+
+ | // What appears to be a literal
+ //
+ ACTION_CHAR_LITERAL
+
+ | // We have assumed that the target language has C/Java
+ // type comments.
+ //
+ COMMENT
+
+ | // What appears to be a literal
+ //
+ ACTION_STRING_LITERAL
+
+ | // What appears to be an escape sequence
+ //
+ ACTION_ESC
+
+ | // Some other single character that is not
+ // handled above
+ //
+ ~('\\'|'"'|'\''|'/'|'{'|'}')
+
+ )*
+
+ (
+ // Correctly balanced closing brace
+ //
+ '}'
+
+ | // Looks like have an imblanced {} block, report
+ // with respect to the opening brace.
+ //
+ {
+ // TODO: Report imbalanced {}
+ System.out.println("Block starting at line " + startLine + " offset " + (offset+1) + " contains imbalanced {} or is missing a }");
+ }
+ )
+ ;
+
+
+// Keywords
+// --------
+// keywords used to specify ANTLR v3 grammars. Keywords may not be used as
+// labels for rules or in any other context where they would be ambiguous
+// with the keyword vs some other identifier
+// OPTIONS, TOKENS, and CHANNELS must also consume the opening brace that captures
+// their option block, as this is the easiest way to parse it separate
+// to an ACTION block, despite it using the same {} delimiters.
+//
+OPTIONS : 'options' WSNLCHARS* '{' ;
+TOKENS_SPEC : 'tokens' WSNLCHARS* '{' ;
+CHANNELS : 'channels' WSNLCHARS* '{' ;
+
+IMPORT : 'import' ;
+FRAGMENT : 'fragment' ;
+LEXER : 'lexer' ;
+PARSER : 'parser' ;
+GRAMMAR : 'grammar' ;
+TREE_GRAMMAR : 'tree' WSNLCHARS* 'grammar' ;
+PROTECTED : 'protected' ;
+PUBLIC : 'public' ;
+PRIVATE : 'private' ;
+RETURNS : 'returns' ;
+LOCALS : 'locals' ;
+THROWS : 'throws' ;
+CATCH : 'catch' ;
+FINALLY : 'finally' ;
+MODE : 'mode' ;
+
+// -----------
+// Punctuation
+//
+// Character sequences used as separators, delimters, operators, etc
+//
+COLON : ':'
+ {
+ // scan backwards, looking for a RULE_REF or TOKEN_REF.
+ // which would indicate the start of a rule definition.
+ // If we see a LPAREN, then it's the start of the subrule.
+ // this.tokens is the token string we are pushing into, so
+ // just loop backwards looking for a rule definition. Then
+ // we set isLexerRule.
+ Token t = getRuleOrSubruleStartToken();
+ if ( t!=null ) {
+ if ( t.getType()==RULE_REF ) isLexerRule = false;
+ else if ( t.getType()==TOKEN_REF ) isLexerRule = true;
+ // else must be subrule; don't alter context
+ }
+ }
+ ;
+COLONCOLON : '::' ;
+COMMA : ',' ;
+SEMI : ';' ;
+LPAREN : '(' ;
+RPAREN : ')' ;
+RARROW : '->' ;
+LT : '<' ;
+GT : '>' ;
+ASSIGN : '=' ;
+QUESTION : '?' ;
+SYNPRED : '=>'
+ {
+ Token t = new CommonToken(input, state.type, state.channel,
+ state.tokenStartCharIndex, getCharIndex()-1);
+ t.setLine(state.tokenStartLine);
+ t.setText(state.text);
+ t.setCharPositionInLine(state.tokenStartCharPositionInLine);
+ grammarError(ErrorType.V3_SYNPRED, t);
+ $channel=HIDDEN;
+ }
+ ;
+STAR : '*' ;
+PLUS : '+' ;
+PLUS_ASSIGN : '+=' ;
+OR : '|' ;
+DOLLAR : '$' ;
+DOT : '.' ; // can be WILDCARD or DOT in qid or imported rule ref
+RANGE : '..' ;
+AT : '@' ;
+POUND : '#' ;
+NOT : '~' ;
+RBRACE : '}' ;
+
+/** Allow unicode rule/token names */
+ID : a=NameStartChar NameChar*
+ {
+ if ( Grammar.isTokenName($a.text) ) $type = TOKEN_REF;
+ else $type = RULE_REF;
+ }
+ ;
+
+fragment
+NameChar : NameStartChar
+ | '0'..'9'
+ | '_'
+ | '\u00B7'
+ | '\u0300'..'\u036F'
+ | '\u203F'..'\u2040'
+ ;
+
+fragment
+NameStartChar
+ : 'A'..'Z' | 'a'..'z'
+ | '\u00C0'..'\u00D6'
+ | '\u00D8'..'\u00F6'
+ | '\u00F8'..'\u02FF'
+ | '\u0370'..'\u037D'
+ | '\u037F'..'\u1FFF'
+ | '\u200C'..'\u200D'
+ | '\u2070'..'\u218F'
+ | '\u2C00'..'\u2FEF'
+ | '\u3001'..'\uD7FF'
+ | '\uF900'..'\uFDCF'
+ | '\uFDF0'..'\uFEFE'
+ | '\uFF00'..'\uFFFD'
+ ; // ignores | ['\u10000-'\uEFFFF] ;
+
+// ----------------------------
+// Literals embedded in actions
+//
+// Note that we have made the assumption that the language used within
+// actions uses the fairly standard " and ' delimiters for literals and
+// that within these literals, characters are escaped using the \ character.
+// There are some languages which do not conform to this in all cases, such
+// as by using /string/ and so on. We will have to deal with such cases if
+// if they come up in targets.
+//
+
+// Within actions, or other structures that are not part of the ANTLR
+// syntax, we may encounter literal characters. Within these, we do
+// not want to inadvertantly match things like '}' and so we eat them
+// specifically. While this rule is called CHAR it allows for the fact that
+// some languages may use/allow ' as the string delimiter.
+//
+fragment
+ACTION_CHAR_LITERAL
+ : '\'' (('\\')=>ACTION_ESC | ~'\'' )* '\''
+ ;
+
+// Within actions, or other structures that are not part of the ANTLR
+// syntax, we may encounter literal strings. Within these, we do
+// not want to inadvertantly match things like '}' and so we eat them
+// specifically.
+//
+fragment
+ACTION_STRING_LITERAL
+ : '"' (('\\')=>ACTION_ESC | ~'"')* '"'
+ ;
+
+// Within literal strings and characters that are not part of the ANTLR
+// syntax, we must allow for escaped character sequences so that we do not
+// inadvertantly recognize the end of a string or character when the terminating
+// delimiter has been esacped.
+//
+fragment
+ACTION_ESC
+ : '\\' .
+ ;
+
+// -------
+// Integer
+//
+// Obviously (I hope) match an aribtrary long sequence of digits.
+//
+INT : ('0'..'9')+
+ ;
+
+// -----------
+// Source spec
+//
+// A fragment rule for picking up information about an origrinating
+// file from which the grammar we are parsing has been generated. This allows
+// ANTLR to report errors against the originating file and not the generated
+// file.
+//
+fragment
+SRC : 'src' WSCHARS+ file=ACTION_STRING_LITERAL WSCHARS+ line=INT
+ {
+ // TODO: Add target specific code to change the source file name and current line number
+ //
+ }
+ ;
+
+// --------------
+// Literal string
+//
+// ANTLR makes no disticintion between a single character literal and a
+// multi-character string. All literals are single quote delimited and
+// may contain unicode escape sequences of the form \uxxxx or \u{xxxxxx},
+// where x is a valid hexadecimal number.
+STRING_LITERAL
+ : '\'' ( ( ESC_SEQ | ~('\\'|'\''|'\r'|'\n') ) )*
+ ( '\''
+ | // Unterminated string literal
+ {
+ Token t = new CommonToken(input, state.type, state.channel, state.tokenStartCharIndex, getCharIndex()-1);
+ t.setLine(state.tokenStartLine);
+ t.setText(state.text);
+ t.setCharPositionInLine(state.tokenStartCharPositionInLine);
+ grammarError(ErrorType.UNTERMINATED_STRING_LITERAL, t);
+ }
+ )
+ ;
+
+// A valid hex digit specification
+//
+fragment
+HEX_DIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ;
+
+// Any kind of escaped character that we can embed within ANTLR
+// literal strings.
+//
+fragment
+ESC_SEQ
+ : '\\'
+ (
+ // The standard escaped character set such as tab, newline, etc...
+ 'b'|'t'|'n'|'f'|'r'|'\''|'\\'
+
+ | // A Java style Unicode escape sequence
+ UNICODE_ESC
+
+ | // A Swift/Hack style Unicode escape sequence
+ UNICODE_EXTENDED_ESC
+
+ | // An illegal escape seqeunce
+ ~('b'|'t'|'n'|'f'|'r'|'\''|'\\'|'u') // \x for any invalid x (make sure to match char here)
+ {
+ Token t = new CommonToken(input, state.type, state.channel, getCharIndex()-2, getCharIndex()-1);
+ t.setText(t.getText());
+ t.setLine(input.getLine());
+ t.setCharPositionInLine(input.getCharPositionInLine()-2);
+ grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t, input.substring(getCharIndex()-2,getCharIndex()-1));
+ }
+ )
+ ;
+
+fragment
+UNICODE_ESC
+@init {
+
+ // Flag to tell us whether we have a valid number of
+ // hex digits in the escape sequence
+ //
+ int hCount = 0;
+}
+ : 'u' // Leadin for unicode escape sequence
+
+ // We now require 4 hex digits. Note though
+ // that we accept any number of characters
+ // and issue an error if we do not get 4. We cannot
+ // use an inifinite count such as + because this
+ // might consume too many, so we lay out the lexical
+ // options and issue an error at the invalid paths.
+ //
+ (
+ (
+ HEX_DIGIT { hCount++; }
+ (
+ HEX_DIGIT { hCount++; }
+ (
+ HEX_DIGIT { hCount++; }
+ (
+ // Four valid hex digits, we are good
+ //
+ HEX_DIGIT { hCount++; }
+
+ | // Three valid digits
+ )
+
+ | // Two valid digits
+ )
+
+ | // One valid digit
+ )
+ )
+ | // No valid hex digits at all
+ )
+
+ // Now check the digit count and issue an error if we need to
+ //
+ {
+ if (hCount < 4) {
+ Interval badRange = Interval.of(getCharIndex()-2-hCount, getCharIndex());
+ String lastChar = input.substring(badRange.b, badRange.b);
+ if ( lastChar.codePointAt(0)=='\'' ) {
+ badRange.b--;
+ }
+ String bad = input.substring(badRange.a, badRange.b);
+ Token t = new CommonToken(input, state.type, state.channel, badRange.a, badRange.b);
+ t.setLine(input.getLine());
+ t.setCharPositionInLine(input.getCharPositionInLine()-hCount-2);
+ grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t, bad);
+ }
+ }
+ ;
+
+fragment
+UNICODE_EXTENDED_ESC
+ : 'u{' // Leadin for unicode extended escape sequence
+
+ HEX_DIGIT+ // One or more hexadecimal digits
+
+ '}' // Leadout for unicode extended escape sequence
+
+ // Now check the digit count and issue an error if we need to
+ {
+ int numDigits = getCharIndex()-state.tokenStartCharIndex-6;
+ if (numDigits > 6) {
+ Token t = new CommonToken(input, state.type, state.channel, state.tokenStartCharIndex, getCharIndex()-1);
+ t.setText(t.getText());
+ t.setLine(input.getLine());
+ t.setCharPositionInLine(input.getCharPositionInLine()-numDigits);
+ grammarError(ErrorType.INVALID_ESCAPE_SEQUENCE, t, input.substring(state.tokenStartCharIndex,getCharIndex()-1));
+ }
+ }
+ ;
+
+// ----------
+// Whitespace
+//
+// Characters and character constructs that are of no import
+// to the parser and are used to make the grammar easier to read
+// for humans.
+//
+WS
+ : (
+ ' '
+ | '\t'
+ | '\r'
+ | '\n'
+ | '\f'
+ )+
+ {$channel=HIDDEN;}
+ ;
+
+// A fragment rule for use in recognizing end of line in
+// rules like COMMENT.
+//
+fragment
+NLCHARS
+ : '\n' | '\r'
+ ;
+
+// A fragment rule for recognizing traditional whitespace
+// characters within lexer rules.
+//
+fragment
+WSCHARS
+ : ' ' | '\t' | '\f'
+ ;
+
+// A fragment rule for recognizing both traditional whitespace and
+// end of line markers, when we don't care to distinguish but don't
+// want any action code going on.
+//
+fragment
+WSNLCHARS
+ : ' ' | '\t' | '\f' | '\n' | '\r'
+ ;
+
+// This rule allows ANTLR 4 to parse grammars using the UTF-8 encoding with a
+// byte order mark. Since this Unicode character doesn't appear as a token
+// anywhere else in the grammar, we can simply skip all instances of it without
+// problem. This rule will not break usage of \uFEFF inside a LEXER_CHAR_SET or
+// STRING_LITERAL.
+UnicodeBOM
+ : '\uFEFF' {skip();}
+ ;
+
+// -----------------
+// Illegal Character
+//
+// This is an illegal character trap which is always the last rule in the
+// lexer specification. It matches a single character of any value and being
+// the last rule in the file will match when no other rule knows what to do
+// about the character. It is reported as an error but is not passed on to the
+// parser. This means that the parser to deal with the gramamr file anyway
+// but we will not try to analyse or code generate from a file with lexical
+// errors.
+//
+ERRCHAR
+ : .
+ {
+ Token t = new CommonToken(input, state.type, state.channel, state.tokenStartCharIndex, getCharIndex()-1);
+ t.setLine(state.tokenStartLine);
+ t.setText(state.text);
+ t.setCharPositionInLine(state.tokenStartCharPositionInLine);
+ String msg = getTokenErrorDisplay(t) + " came as a complete surprise to me";
+ grammarError(ErrorType.SYNTAX_ERROR, t, msg);
+ state.syntaxErrors++;
+ skip();
+ }
+ ;
diff --git a/examples/antlr3/ImportGenerated/src/parse/ANTLRParser.g b/examples/antlr3/ImportGenerated/src/parse/ANTLRParser.g
new file mode 100644
index 0000000..3c10460
--- /dev/null
+++ b/examples/antlr3/ImportGenerated/src/parse/ANTLRParser.g
@@ -0,0 +1,922 @@
+/*
+ * [The "BSD license"]
+ * Copyright (c) 2012-2016 Terence Parr
+ * Copyright (c) 2012-2016 Sam Harwell
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** The definitive ANTLR v3 grammar to parse ANTLR v4 grammars.
+ * The grammar builds ASTs that are sniffed by subsequent stages.
+ */
+parser grammar ANTLRParser;
+
+options {
+ // Target language is Java, which is the default but being specific
+ // here as this grammar is also meant as a good example grammar for
+ // for users.
+ language = Java;
+
+ // The output of this grammar is going to be an AST upon which
+ // we run a semantic checking phase, then the rest of the analysis
+ // including final code generation.
+ output = AST;
+
+ // The vocabulary (tokens and their int token types) we are using
+ // for the parser. This is generated by the lexer. The vocab will be extended
+ // to include the imaginary tokens below.
+ tokenVocab = ANTLRLexer;
+
+ ASTLabelType = GrammarAST;
+}
+
+// Imaginary Tokens
+//
+// Imaginary tokens do not exist as far as the lexer is concerned, and it cannot
+// generate them. However we sometimes need additional 'tokens' to use as root
+// nodes for the AST we are generating. The tokens section is where we
+// specify any such tokens
+tokens {
+ RULE;
+ PREC_RULE; // flip to this if we find that it's left-recursive
+ RULES;
+ RULEMODIFIERS;
+ RULEACTIONS;
+ BLOCK;
+ OPTIONAL;
+ CLOSURE;
+ POSITIVE_CLOSURE;
+ RANGE;
+ SET;
+ CHAR_RANGE;
+ EPSILON;
+ ALT;
+ ALTLIST;
+ ID;
+ ARG;
+ ARGLIST;
+ RET;
+ COMBINED;
+ INITACTION;
+ LABEL; // $x used in rewrite rules
+ TEMPLATE;
+ WILDCARD;
+ // A generic node indicating a list of something when we don't
+ // really need to distinguish what we have a list of as the AST
+ // will 'kinow' by context.
+ //
+ LIST;
+ ELEMENT_OPTIONS; // TOKEN
+ RESULT;
+
+ // lexer action stuff
+ LEXER_ALT_ACTION;
+ LEXER_ACTION_CALL; // ID(foo)
+}
+
+// Include the copyright in this source and also the generated source
+//
+@header {
+/*
+ [The "BSD licence"]
+ Copyright (c) 2005-20012 Terence Parr
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ 3. The name of the author may not be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+package org.antlr.v4.parse;
+
+import org.antlr.v4.tool.*;
+import org.antlr.v4.tool.ast.*;
+
+import java.util.ArrayDeque;
+import java.util.Deque;
+}
+
+@members {
+Deque paraphrases = new ArrayDeque();
+public void grammarError(ErrorType etype, org.antlr.runtime.Token token, Object... args) { }
+}
+
+// The main entry point for parsing a V3 grammar from top to toe. This is
+// the method call from whence to obtain the AST for the parse.
+//
+grammarSpec
+@after {
+GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.OPTIONS);
+if ( options!=null ) {
+ Grammar.setNodeOptions($tree, options);
+}
+}
+ : // First we should see the type and name of the grammar file that
+ // we are about to parse.
+ //
+ grammarType id SEMI
+
+ // There now follows zero or more declaration sections that should
+ // be given to us before the rules are declared
+ //
+// A number of things can be declared/stated before the grammar rules
+// 'proper' are parsed. These include grammar imports (delegate), grammar
+// options, imaginary token declarations, global scope declarations,
+// and actions such as @header. In this rule we allow any number of
+// these constructs in any order so that the grammar author is not
+// constrained by some arbitrary order of declarations that nobody
+// can remember. In the next phase of the parse, we verify that these
+// constructs are valid, not repeated and so on.
+ sync ( prequelConstruct sync )*
+
+ // We should now see at least one ANTLR EBNF style rule
+ // declaration. If the rules are missing we will let the
+ // semantic verification phase tell the user about it.
+ //
+ rules
+
+ modeSpec*
+
+ // And we force ANTLR to process everything it finds in the input
+ // stream by specifying hte need to match End Of File before the
+ // parse is complete.
+ //
+ EOF
+
+ // Having parsed everything in the file and accumulated the relevant
+ // subtrees, we can now rewrite everything into the main AST form
+ // that our tree walkers are expecting.
+ //
+
+ -> ^(grammarType // The grammar type is our root AST node
+ id // We need to identify the grammar of course
+ prequelConstruct* // The set of declarations we accumulated
+ rules // And of course, we need the set of rules we discovered
+ modeSpec*
+ )
+ ;
+
+grammarType
+@after {
+ if ( $tg!=null ) throw new v3TreeGrammarException(tg);
+ if ( $t!=null ) ((GrammarRootAST)$tree).grammarType = $t.type;
+ else ((GrammarRootAST)$tree).grammarType=COMBINED;
+}
+ : ( t=LEXER g=GRAMMAR -> GRAMMAR[$g, "LEXER_GRAMMAR", getTokenStream()]
+ | // A standalone parser specification
+ t=PARSER g=GRAMMAR -> GRAMMAR[$g, "PARSER_GRAMMAR", getTokenStream()]
+
+ // A combined lexer and parser specification
+ | g=GRAMMAR -> GRAMMAR[$g, "COMBINED_GRAMMAR", getTokenStream()]
+ | tg=TREE_GRAMMAR
+
+ )
+ ;
+
+// This is the list of all constructs that can be declared before
+// the set of rules that compose the grammar, and is invoked 0..n
+// times by the grammarPrequel rule.
+prequelConstruct
+ : // A list of options that affect analysis and/or code generation
+ optionsSpec
+
+ | // A list of grammars to which this grammar will delegate certain
+ // parts of the parsing sequence - a set of imported grammars
+ delegateGrammars
+
+ | // The declaration of any token types we need that are not already
+ // specified by a preceeding grammar, such as when a parser declares
+ // imaginary tokens with which to construct the AST, or a rewriting
+ // tree parser adds further imaginary tokens to ones defined in a prior
+ // {tree} parser.
+ tokensSpec
+
+ | // A list of custom channels used by the grammar
+ channelsSpec
+
+ | // A declaration of language target implemented constructs. All such
+ // action sections start with '@' and are given to the language target's
+ // StringTemplate group. For instance @parser::header and @lexer::header
+ // are gathered here.
+ action
+ ;
+
+// A list of options that affect analysis and/or code generation
+optionsSpec
+ : OPTIONS (option SEMI)* RBRACE -> ^(OPTIONS[$OPTIONS, "OPTIONS"] option*)
+ ;
+
+option
+ : id ASSIGN^ optionValue
+ ;
+
+// ------------
+// Option Value
+//
+// The actual value of an option - Doh!
+//
+optionValue
+ : // If the option value is a single word that conforms to the
+ // lexical rules of token or rule names, then the user may skip quotes
+ // and so on. Many option values meet this description
+ qid
+ | STRING_LITERAL
+ | ACTION
+ | INT
+ ;
+
+// A list of grammars to which this grammar will delegate certain
+// parts of the parsing sequence - a set of imported grammars
+delegateGrammars
+ : IMPORT delegateGrammar (COMMA delegateGrammar)* SEMI -> ^(IMPORT delegateGrammar+)
+ ;
+
+// A possibly named grammar file that should be imported to this gramamr
+// and delgated to for the rules it specifies
+delegateGrammar
+ : id ASSIGN^ id
+ | id
+ ;
+
+tokensSpec
+ : TOKENS_SPEC id (COMMA id)* RBRACE -> ^(TOKENS_SPEC id+)
+ | TOKENS_SPEC RBRACE ->
+ | TOKENS_SPEC^ v3tokenSpec+ RBRACE!
+ {grammarError(ErrorType.V3_TOKENS_SYNTAX, $TOKENS_SPEC);}
+ ;
+
+v3tokenSpec
+ : id
+ ( ASSIGN lit=STRING_LITERAL
+ {
+ grammarError(ErrorType.V3_ASSIGN_IN_TOKENS, $id.start,
+ $id.text, $lit.getText());
+ }
+ -> id // ignore assignment
+ | -> id
+ )
+ SEMI
+ ;
+
+channelsSpec
+ : CHANNELS^ id (COMMA! id)* RBRACE!
+ ;
+
+// A declaration of a language target specifc section,
+// such as @header, @includes and so on. We do not verify these
+// sections, they are just passed on to the language target.
+/** Match stuff like @parser::members {int i;} */
+action
+ : AT (actionScopeName COLONCOLON)? id ACTION -> ^(AT actionScopeName? id ACTION)
+ ;
+
+/** Sometimes the scope names will collide with keywords; allow them as
+ * ids for action scopes.
+ */
+actionScopeName
+ : id
+ | LEXER -> ID[$LEXER]
+ | PARSER -> ID[$PARSER]
+ ;
+
+modeSpec
+ : MODE id SEMI sync (lexerRule sync)* -> ^(MODE id lexerRule*)
+ ;
+
+rules
+ : sync (rule sync)*
+ // Rewrite with an enclosing node as this is good for counting
+ // the number of rules and an easy marker for the walker to detect
+ // that there are no rules.
+ ->^(RULES rule*)
+ ;
+
+sync
+@init {
+ BitSet followSet = computeErrorRecoverySet();
+ if ( input.LA(1)!=Token.EOF && !followSet.member(input.LA(1)) ) {
+ reportError(new NoViableAltException("",0,0,input));
+ beginResync();
+ consumeUntil(input, followSet);
+ endResync();
+ }
+} :
+ ;
+
+rule: parserRule
+ | lexerRule
+ ;
+
+// The specification of an EBNF rule in ANTLR style, with all the
+// rule level parameters, declarations, actions, rewrite specs and so
+// on.
+//
+// Note that here we allow any number of rule declaration sections (such
+// as scope, returns, etc) in any order and we let the upcoming semantic
+// verification of the AST determine if things are repeated or if a
+// particular functional element is not valid in the context of the
+// grammar type, such as using returns in lexer rules and so on.
+parserRule
+@init { paraphrases.push("matching a rule"); }
+@after {
+ paraphrases.pop();
+ GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.OPTIONS);
+ if ( options!=null ) {
+ Grammar.setNodeOptions($tree, options);
+ }
+}
+ : // Start with the rule name. Here we do not distinguish between
+ // parser or lexer rules, the semantic verification phase will
+ // reject any rules that make no sense, such as lexer rules in
+ // a pure parser or tree parser.
+ RULE_REF
+
+ // Immediately following the rulename, there may be a specification
+ // of input parameters for the rule. We do not do anything with the
+ // parameters here except gather them for future phases such as
+ // semantic verifcation, type assignment etc. We require that
+ // the input parameters are the next syntactically significant element
+ // following the rule id.
+ ARG_ACTION?
+
+ ruleReturns?
+
+ throwsSpec?
+
+ localsSpec?
+
+ // Now, before the rule specification itself, which is introduced
+ // with a COLON, we may have zero or more configuration sections.
+ // As usual we just accept anything that is syntactically valid for
+ // one form of the rule or another and let the semantic verification
+ // phase throw out anything that is invalid.
+// At the rule level, a programmer may specify a number of sections, such
+// as scope declarations, rule return elements, @ sections (which may be
+// language target specific) and so on. We allow any number of these in any
+// order here and as usual rely onthe semantic verification phase to reject
+// anything invalid using its addinotal context information. Here we are
+// context free and just accept anything that is a syntactically correct
+// construct.
+//
+ rulePrequels
+
+ COLON
+
+ // The rule is, at the top level, just a list of alts, with
+ // finer grained structure defined within the alts.
+ ruleBlock
+
+ SEMI
+
+ exceptionGroup
+
+ -> ^( RULE RULE_REF ARG_ACTION?
+ ruleReturns? throwsSpec? localsSpec? rulePrequels? ruleBlock exceptionGroup*
+ )
+ ;
+
+// Many language targets support exceptions and the rule will
+// generally be able to throw the language target equivalent
+// of a recognition exception. The grammar programmar can
+// specify a list of exceptions to catch or a generic catch all
+// and the target language code generation template is
+// responsible for generating code that makes sense.
+exceptionGroup
+ : exceptionHandler* finallyClause?
+ ;
+
+// Specifies a handler for a particular type of exception
+// thrown by a rule
+exceptionHandler
+ : CATCH ARG_ACTION ACTION -> ^(CATCH ARG_ACTION ACTION)
+ ;
+
+finallyClause
+ : FINALLY ACTION -> ^(FINALLY ACTION)
+ ;
+
+rulePrequels
+@init { paraphrases.push("matching rule preamble"); }
+@after { paraphrases.pop(); }
+ : sync (rulePrequel sync)* -> rulePrequel*
+ ;
+
+// An individual rule level configuration as referenced by the ruleActions
+// rule above.
+//
+rulePrequel
+ : optionsSpec
+ | ruleAction
+ ;
+
+// A rule can return elements that it constructs as it executes.
+// The return values are specified in a 'returns' prequel element,
+// which contains COMMA separated declarations, where the declaration
+// is target language specific. Here we see the returns declaration
+// as a single lexical action element, to be processed later.
+//
+ruleReturns
+ : RETURNS^ ARG_ACTION
+ ;
+
+// --------------
+// Exception spec
+//
+// Some target languages, such as Java and C# support exceptions
+// and they are specified as a prequel element for each rule that
+// wishes to throw its own exception type. Note that the name of the
+// exception is just a single word, so the header section of the grammar
+// must specify the correct import statements (or language equivalent).
+// Target languages that do not support exceptions just safely ignore
+// them.
+//
+throwsSpec
+ : THROWS qid (COMMA qid)* -> ^(THROWS qid+)
+ ;
+
+// locals [Cat x, float g]
+localsSpec : LOCALS^ ARG_ACTION ;
+
+// @ Sections are generally target language specific things
+// such as local variable declarations, code to run before the
+// rule starts and so on. Fir instance most targets support the
+// @init {} section where declarations and code can be placed
+// to run before the rule is entered. The C target also has
+// an @declarations {} section, where local variables are declared
+// in order that the generated code is C89 copmliant.
+//
+/** Match stuff like @init {int i;} */
+ruleAction
+ : AT id ACTION -> ^(AT id ACTION)
+ ;
+
+// A set of alts, rewritten as a BLOCK for generic processing
+// in tree walkers. Used by the rule 'rule' so that the list of
+// alts for a rule appears as a BLOCK containing the alts and
+// can be processed by the generic BLOCK rule. Note that we
+// use a separate rule so that the BLOCK node has start and stop
+// boundaries set correctly by rule post processing of rewrites.
+ruleBlock
+@init {Token colon = input.LT(-1);}
+ : ruleAltList -> ^(BLOCK[colon,"BLOCK"] ruleAltList)
+ ;
+ catch [ResyncToEndOfRuleBlock e] {
+ // just resyncing; ignore error
+ retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), null);
+ }
+
+ruleAltList
+ : labeledAlt (OR labeledAlt)* -> labeledAlt+
+ ;
+
+labeledAlt
+ : alternative
+ ( POUND! id! {((AltAST)$alternative.tree).altLabel=$id.tree;}
+ )?
+ ;
+
+lexerRule
+@init { paraphrases.push("matching a lexer rule"); }
+@after {
+ paraphrases.pop();
+}
+ : FRAGMENT?
+ TOKEN_REF COLON lexerRuleBlock SEMI
+ -> ^( RULE TOKEN_REF
+ ^(RULEMODIFIERS FRAGMENT)? lexerRuleBlock
+ )
+ ;
+
+lexerRuleBlock
+@init {Token colon = input.LT(-1);}
+ : lexerAltList -> ^(BLOCK[colon,"BLOCK"] lexerAltList)
+ ;
+ catch [ResyncToEndOfRuleBlock e] {
+ // just resyncing; ignore error
+ retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), null);
+ }
+
+lexerAltList
+ : lexerAlt (OR lexerAlt)* -> lexerAlt+
+ ;
+
+lexerAlt
+ : lexerElements
+ ( lexerCommands -> ^(LEXER_ALT_ACTION lexerElements lexerCommands)
+ | -> lexerElements
+ )
+ ;
+
+lexerElements
+ : lexerElement+ -> ^(ALT lexerElement+)
+ | -> ^(ALT EPSILON) // empty alt
+ ;
+
+lexerElement
+@init {
+ paraphrases.push("looking for lexer rule element");
+ int m = input.mark();
+}
+@after { paraphrases.pop(); }
+ : labeledLexerElement
+ ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK[$labeledLexerElement.start,"BLOCK"] ^(ALT labeledLexerElement) ) )
+ | -> labeledLexerElement
+ )
+ | lexerAtom
+ ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK[$lexerAtom.start,"BLOCK"] ^(ALT lexerAtom) ) )
+ | -> lexerAtom
+ )
+ | lexerBlock
+ ( ebnfSuffix -> ^(ebnfSuffix lexerBlock)
+ | -> lexerBlock
+ )
+ | actionElement // actions only allowed at end of outer alt actually,
+ // but preds can be anywhere
+ ;
+ catch [RecognitionException re] {
+ retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), re);
+ int ttype = input.get(input.range()).getType(); // seems to be next token
+ // look for anything that really belongs at the start of the rule minus the initial ID
+ if ( ttype==COLON || ttype==RETURNS || ttype==CATCH || ttype==FINALLY || ttype==AT || ttype==EOF ) {
+ RecognitionException missingSemi =
+ new v4ParserException("unterminated rule (missing ';') detected at '"+
+ input.LT(1).getText()+" "+input.LT(2).getText()+"'", input);
+ reportError(missingSemi);
+ if ( ttype==EOF ) {
+ input.seek(input.index()+1);
+ }
+ else if ( ttype==CATCH || ttype==FINALLY ) {
+ input.seek(input.range()); // ignore what's before rule trailer stuff
+ }
+ else if ( ttype==RETURNS || ttype==AT ) { // scan back looking for ID of rule header
+ int p = input.index();
+ Token t = input.get(p);
+ while ( t.getType()!=RULE_REF && t.getType()!=TOKEN_REF ) {
+ p--;
+ t = input.get(p);
+ }
+ input.seek(p);
+ }
+ throw new ResyncToEndOfRuleBlock(); // make sure it goes back to rule block level to recover
+ }
+ reportError(re);
+ recover(input,re);
+ }
+
+labeledLexerElement
+ : id (ass=ASSIGN|ass=PLUS_ASSIGN)
+ ( lexerAtom -> ^($ass id lexerAtom)
+ | lexerBlock -> ^($ass id lexerBlock)
+ )
+ ;
+
+
+lexerBlock
+@after {
+GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.OPTIONS);
+if ( options!=null ) {
+ Grammar.setNodeOptions($tree, options);
+}
+}
+ : LPAREN
+ ( optionsSpec COLON )?
+ lexerAltList
+ RPAREN
+ -> ^(BLOCK[$LPAREN,"BLOCK"] optionsSpec? lexerAltList )
+ ;
+
+// channel=HIDDEN, skip, more, mode(INSIDE), push(INSIDE), pop
+lexerCommands
+ : RARROW lexerCommand (COMMA lexerCommand)* -> lexerCommand+
+ ;
+
+lexerCommand
+ : lexerCommandName LPAREN lexerCommandExpr RPAREN -> ^(LEXER_ACTION_CALL lexerCommandName lexerCommandExpr)
+ | lexerCommandName
+ ;
+
+lexerCommandExpr
+ : id
+ | INT
+ ;
+
+lexerCommandName
+ : id
+ | MODE ->ID[$MODE]
+ ;
+
+altList
+ : alternative (OR alternative)* -> alternative+
+ ;
+
+// An individual alt with an optional alt option like
+alternative
+@init { paraphrases.push("matching alternative"); }
+@after {
+ paraphrases.pop();
+ Grammar.setNodeOptions($tree, $o.tree);
+}
+ : o=elementOptions?
+ ( e+=element+ -> ^(ALT elementOptions? $e+)
+ | -> ^(ALT elementOptions? EPSILON) // empty alt
+ )
+ ;
+
+element
+@init {
+ paraphrases.push("looking for rule element");
+ int m = input.mark();
+}
+@after { paraphrases.pop(); }
+ : labeledElement
+ ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK[$labeledElement.start,"BLOCK"] ^(ALT labeledElement ) ))
+ | -> labeledElement
+ )
+ | atom
+ ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK[$atom.start,"BLOCK"] ^(ALT atom) ) )
+ | -> atom
+ )
+ | ebnf
+ | actionElement
+ ;
+ catch [RecognitionException re] {
+ retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), re);
+ int ttype = input.get(input.range()).getType();
+ // look for anything that really belongs at the start of the rule minus the initial ID
+ if ( ttype==COLON || ttype==RETURNS || ttype==CATCH || ttype==FINALLY || ttype==AT ) {
+ RecognitionException missingSemi =
+ new v4ParserException("unterminated rule (missing ';') detected at '"+
+ input.LT(1).getText()+" "+input.LT(2).getText()+"'", input);
+ reportError(missingSemi);
+ if ( ttype==CATCH || ttype==FINALLY ) {
+ input.seek(input.range()); // ignore what's before rule trailer stuff
+ }
+ if ( ttype==RETURNS || ttype==AT ) { // scan back looking for ID of rule header
+ int p = input.index();
+ Token t = input.get(p);
+ while ( t.getType()!=RULE_REF && t.getType()!=TOKEN_REF ) {
+ p--;
+ t = input.get(p);
+ }
+ input.seek(p);
+ }
+ throw new ResyncToEndOfRuleBlock(); // make sure it goes back to rule block level to recover
+ }
+ reportError(re);
+ recover(input,re);
+ }
+
+actionElement
+@after {
+ GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.ELEMENT_OPTIONS);
+ if ( options!=null ) {
+ Grammar.setNodeOptions($tree, options);
+ }
+}
+ : ACTION
+ | ACTION elementOptions -> ^(ACTION elementOptions)
+ | SEMPRED
+ | SEMPRED elementOptions -> ^(SEMPRED elementOptions)
+ ;
+
+labeledElement
+ : id (ass=ASSIGN|ass=PLUS_ASSIGN)
+ ( atom -> ^($ass id atom)
+ | block -> ^($ass id block)
+ )
+ ;
+
+// A block of gramamr structure optionally followed by standard EBNF
+// notation, or ANTLR specific notation. I.E. ? + ^ and so on
+ebnf
+ : block
+ // And now we see if we have any of the optional suffixs and rewrite
+ // the AST for this rule accordingly
+ ( blockSuffix -> ^(blockSuffix block)
+ | -> block
+ )
+ ;
+
+// The standard EBNF suffixes with additional components that make
+// sense only to ANTLR, in the context of a grammar block.
+blockSuffix
+ : ebnfSuffix // Standard EBNF
+ ;
+
+ebnfSuffix
+ : QUESTION nongreedy=QUESTION? -> OPTIONAL[$start, $nongreedy]
+ | STAR nongreedy=QUESTION? -> CLOSURE[$start, $nongreedy]
+ | PLUS nongreedy=QUESTION? -> POSITIVE_CLOSURE[$start, $nongreedy]
+ ;
+
+lexerAtom
+ : range
+ | terminal
+ | RULE_REF
+ | notSet
+ | wildcard
+ | LEXER_CHAR_SET
+ ;
+
+atom
+ : // Qualified reference delegate.rule. This must be
+ // lexically contiguous (no spaces either side of the DOT)
+ // otherwise it is two references with a wildcard in between
+ // and not a qualified reference.
+ /*
+ {
+ input.LT(1).getCharPositionInLine()+input.LT(1).getText().length()==
+ input.LT(2).getCharPositionInLine() &&
+ input.LT(2).getCharPositionInLine()+1==input.LT(3).getCharPositionInLine()
+ }?
+ id DOT ruleref -> ^(DOT id ruleref)
+
+ |
+ */
+ range // Range x..y - only valid in lexers
+ | terminal
+ | ruleref
+ | notSet
+ | wildcard
+ ;
+ catch [RecognitionException re] { throw re; } // pass upwards to element
+
+wildcard
+@after {
+ GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.ELEMENT_OPTIONS);
+ if ( options!=null ) {
+ Grammar.setNodeOptions($tree, options);
+ }
+}
+ : // Wildcard '.' means any character in a lexer, any
+ // token in parser and any node or subtree in a tree parser
+ // Because the terminal rule is allowed to be the node
+ // specification for the start of a tree rule, we must
+ // later check that wildcard was not used for that.
+ DOT elementOptions?
+ -> ^(WILDCARD[$DOT] elementOptions?)
+ ;
+
+// --------------------
+// Inverted element set
+//
+// A set of characters (in a lexer) or terminal tokens, if a parser,
+// that are then used to create the inverse set of them.
+notSet
+ : NOT setElement -> ^(NOT[$NOT] ^(SET[$setElement.start,"SET"] setElement))
+ | NOT blockSet -> ^(NOT[$NOT] blockSet)
+ ;
+
+blockSet
+@init {
+ Token t;
+ boolean ebnf = false;
+}
+ : LPAREN setElement (OR setElement)* RPAREN
+ -> ^(SET[$LPAREN,"SET"] setElement+ )
+ ;
+
+setElement
+ : TOKEN_REF^ elementOptions?
+ | STRING_LITERAL^ elementOptions?
+ | range
+ | LEXER_CHAR_SET
+ ;
+
+// -------------
+// Grammar Block
+//
+// Anywhere where an element is valid, the grammar may start a new block
+// of alts by surrounding that block with ( ). A new block may also have a set
+// of options, which apply only to that block.
+//
+block
+@after {
+GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.OPTIONS);
+if ( options!=null ) {
+ Grammar.setNodeOptions($tree, options);
+}
+}
+ : LPAREN
+ ( optionsSpec? ra+=ruleAction* COLON )?
+ altList
+ RPAREN
+ -> ^(BLOCK[$LPAREN,"BLOCK"] optionsSpec? $ra* altList )
+ ;
+
+// ----------------
+// Parser rule ref
+//
+// Reference to a parser rule with optional arguments and optional
+// directive to become the root node or ignore the tree produced
+//
+ruleref
+@after {
+GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.ELEMENT_OPTIONS);
+if ( options!=null ) {
+ Grammar.setNodeOptions($tree, options);
+}
+}
+ : RULE_REF ARG_ACTION? elementOptions? -> ^(RULE_REF ARG_ACTION? elementOptions?)
+ ;
+ catch [RecognitionException re] { throw re; } // pass upwards to element
+
+// ---------------
+// Character Range
+//
+// Specifies a range of characters. Valid for lexer rules only, but
+// we do not check that here, the tree walkers shoudl do that.
+// Note also that the parser also allows through more than just
+// character literals so that we can produce a much nicer semantic
+// error about any abuse of the .. operator.
+//
+range
+ : STRING_LITERAL RANGE^ STRING_LITERAL
+ ;
+
+terminal
+@after {
+GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.ELEMENT_OPTIONS);
+if ( options!=null ) {
+ Grammar.setNodeOptions($tree, options);
+}
+}
+ : TOKEN_REF elementOptions? -> ^(TOKEN_REF elementOptions?)
+ | STRING_LITERAL elementOptions? -> ^(STRING_LITERAL elementOptions?)
+ ;
+
+// Terminals may be adorned with certain options when
+// reference in the grammar: TOK<,,,>
+elementOptions
+ : LT (elementOption (COMMA elementOption)*)? GT
+ -> ^(ELEMENT_OPTIONS[$LT,"ELEMENT_OPTIONS"] elementOption*)
+ ;
+
+// When used with elements we can specify what the tree node type can
+// be and also assign settings of various options (which we do not check here)
+elementOption
+ : // This format indicates the default element option
+ qid
+ | id ASSIGN^ optionValue
+ ;
+
+// The name of the grammar, and indeed some other grammar elements may
+// come through to the parser looking like a rule reference or a token
+// reference, hence this rule is used to pick up whichever it is and rewrite
+// it as a generic ID token.
+id
+@init { paraphrases.push("looking for an identifier"); }
+@after { paraphrases.pop(); }
+ : RULE_REF ->ID[$RULE_REF]
+ | TOKEN_REF ->ID[$TOKEN_REF]
+ ;
+
+qid
+@init { paraphrases.push("looking for a qualified identifier"); }
+@after { paraphrases.pop(); }
+ : id (DOT id)* -> ID[$qid.start, $text]
+ ;
+
+alternativeEntry : alternative EOF ; // allow gunit to call alternative and see EOF afterwards
+elementEntry : element EOF ;
+ruleEntry : rule EOF ;
+blockEntry : block EOF ;
diff --git a/examples/antlr3/ImportGenerated/src/parse/BUILD b/examples/antlr3/ImportGenerated/src/parse/BUILD
new file mode 100644
index 0000000..73c5066
--- /dev/null
+++ b/examples/antlr3/ImportGenerated/src/parse/BUILD
@@ -0,0 +1,7 @@
+load("@rules_antlr//antlr:antlr3.bzl", "antlr")
+
+antlr(
+ name = "parser",
+ srcs = glob(["*.g"]),
+ visibility = ["//visibility:public"],
+)
diff --git a/examples/antlr3/ObjC/src/BUILD b/examples/antlr3/ObjC/src/BUILD
new file mode 100644
index 0000000..9f15510
--- /dev/null
+++ b/examples/antlr3/ObjC/src/BUILD
@@ -0,0 +1,8 @@
+load("@rules_antlr//antlr:antlr3.bzl", "antlr")
+
+antlr(
+ name = "parser",
+ srcs = glob(["SimpleC.g"]),
+ language = "ObjC",
+)
+
diff --git a/examples/antlr3/ObjC/src/SimpleC.g b/examples/antlr3/ObjC/src/SimpleC.g
new file mode 100644
index 0000000..016ded7
--- /dev/null
+++ b/examples/antlr3/ObjC/src/SimpleC.g
@@ -0,0 +1,107 @@
+grammar SimpleC;
+
+options {
+ language=ObjC;
+
+}
+
+program
+ : declaration+
+ ;
+
+/** In this rule, the functionHeader left prefix on the last two
+ * alternatives is not LL(k) for a fixed k. However, it is
+ * LL(*). The LL(*) algorithm simply scans ahead until it sees
+ * either the ';' or the '{' of the block and then it picks
+ * the appropriate alternative. Lookhead can be arbitrarily
+ * long in theory, but is <=10 in most cases. Works great.
+ * Use ANTLRWorks to see the lookahead use (step by Location)
+ * and look for blue tokens in the input window pane. :)
+ */
+declaration
+ : variable
+ | functionHeader ';'
+ { NSLog(@"\%@ is a declaration\n", $functionHeader.name); }
+ | functionHeader block
+ { NSLog(@"\%@ is a definition\n", $functionHeader.name); }
+ ;
+
+variable
+ : type declarator ';'
+ ;
+
+declarator
+ : ID
+ ;
+
+functionHeader returns [NSString *name]
+@init {
+ name=nil; // for now you must init here rather than in 'returns'
+}
+ : type ID '(' ( formalParameter ( ',' formalParameter )* )? ')'
+ {$name = $ID.text;}
+ ;
+
+formalParameter
+ : type declarator
+ ;
+
+type
+ : 'int'
+ | 'char'
+ | 'void'
+ | ID
+ ;
+
+block
+ : '{'
+ variable*
+ stat*
+ '}'
+ ;
+
+stat: forStat
+ | expr ';'
+ | block
+ | assignStat ';'
+ | ';'
+ ;
+
+forStat
+ : 'for' '(' assignStat ';' expr ';' assignStat ')' block
+ ;
+
+assignStat
+ : ID '=' expr
+ ;
+
+expr: condExpr
+ ;
+
+condExpr
+ : aexpr ( ('==' | '<') aexpr )?
+ ;
+
+aexpr
+ : atom ( '+' atom )*
+ ;
+
+atom
+ : ID
+ | INT
+ | '(' expr ')'
+ ;
+
+ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*
+ ;
+
+INT : ('0'..'9')+
+ ;
+
+WS : ( ' '
+ | '\t'
+ | '\r'
+ | '\n'
+ )+
+ { $channel=HIDDEN; }
+ ;
diff --git a/examples/antlr4-opt/Hello/src/main/antlr4/BUILD b/examples/antlr4-opt/Hello/src/main/antlr4/BUILD
new file mode 100644
index 0000000..d3e305a
--- /dev/null
+++ b/examples/antlr4-opt/Hello/src/main/antlr4/BUILD
@@ -0,0 +1,17 @@
+load("@rules_java//java:defs.bzl", "java_library")
+
+package(default_visibility = ["//visibility:public"])
+
+load("@rules_antlr//antlr:antlr4.bzl", "antlr")
+
+antlr(
+ name = "generated",
+ srcs = ["Hello.g4"],
+ package = "hello.world",
+)
+
+java_library(
+ name = "HelloWorld",
+ srcs = [":generated"],
+ deps = ["@antlr4_runtime//jar"],
+)
diff --git a/examples/antlr4-opt/Hello/src/main/antlr4/Hello.g4 b/examples/antlr4-opt/Hello/src/main/antlr4/Hello.g4
new file mode 100644
index 0000000..4a9d629
--- /dev/null
+++ b/examples/antlr4-opt/Hello/src/main/antlr4/Hello.g4
@@ -0,0 +1,4 @@
+grammar Hello;
+r : 'hello' ID ; // match keyword hello followed by an identifier
+ID : [a-z]+ ; // match lower-case identifiers
+WS : [ \t\r\n]+ -> skip ; // skip spaces, tabs, newlines
diff --git a/examples/antlr4-opt/WORKSPACE b/examples/antlr4-opt/WORKSPACE
new file mode 100644
index 0000000..a25c000
--- /dev/null
+++ b/examples/antlr4-opt/WORKSPACE
@@ -0,0 +1,10 @@
+workspace(name = "antlr4_opt")
+
+local_repository(
+ name = "rules_antlr",
+ path = "../..",
+)
+
+load("@rules_antlr//antlr:repositories.bzl", "rules_antlr_optimized_dependencies")
+
+rules_antlr_optimized_dependencies("4.7.4")
diff --git a/examples/antlr4-opt/groovy/BUILD b/examples/antlr4-opt/groovy/BUILD
new file mode 100644
index 0000000..13967a9
--- /dev/null
+++ b/examples/antlr4-opt/groovy/BUILD
@@ -0,0 +1,6 @@
+load("@rules_antlr//antlr:antlr4.bzl", "antlr")
+
+antlr(
+ name = "parser",
+ srcs = glob(["*.g4"]),
+)
diff --git a/examples/antlr4-opt/groovy/GroovyLexer.g4 b/examples/antlr4-opt/groovy/GroovyLexer.g4
new file mode 100644
index 0000000..016824f
--- /dev/null
+++ b/examples/antlr4-opt/groovy/GroovyLexer.g4
@@ -0,0 +1,955 @@
+/*
+ * This file is adapted from the Antlr4 Java grammar which has the following license
+ *
+ * Copyright (c) 2013 Terence Parr, Sam Harwell
+ * All rights reserved.
+ * [The "BSD licence"]
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * Subsequent modifications by the Groovy community have been done under the Apache License v2:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * The Groovy grammar is based on the official grammar for Java:
+ * https://github.com/antlr/grammars-v4/blob/master/java/Java.g4
+ */
+lexer grammar GroovyLexer;
+
+options {
+ superClass = AbstractLexer;
+}
+
+@header {
+ import java.util.Deque;
+ import java.util.ArrayDeque;
+ import java.util.Map;
+ import java.util.HashMap;
+ import java.util.Set;
+ import java.util.HashSet;
+ import java.util.Collections;
+ import java.util.Arrays;
+ import java.util.stream.IntStream;
+ import org.apache.groovy.util.Maps;
+ import static org.apache.groovy.parser.antlr4.SemanticPredicates.*;
+}
+
+@members {
+ private long tokenIndex = 0;
+ private int lastTokenType = 0;
+ private int invalidDigitCount = 0;
+
+ /**
+ * Record the index and token type of the current token while emitting tokens.
+ */
+ @Override
+ public void emit(Token token) {
+ this.tokenIndex++;
+
+ int tokenType = token.getType();
+ if (Token.DEFAULT_CHANNEL == token.getChannel()) {
+ this.lastTokenType = tokenType;
+ }
+
+ if (RollBackOne == tokenType) {
+ this.rollbackOneChar();
+ }
+
+ super.emit(token);
+ }
+
+ private static final int[] REGEX_CHECK_ARRAY =
+ IntStream.of(
+ Identifier, CapitalizedIdentifier, NullLiteral, BooleanLiteral, THIS, RPAREN, RBRACK, RBRACE,
+ IntegerLiteral, FloatingPointLiteral, StringLiteral, GStringEnd, INC, DEC
+ ).sorted().toArray();
+
+ private boolean isRegexAllowed() {
+ if (Arrays.binarySearch(REGEX_CHECK_ARRAY, this.lastTokenType) >= 0) {
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
+ * just a hook, which will be overrided by GroovyLangLexer
+ */
+ protected void rollbackOneChar() {}
+
+ private static class Paren {
+ private String text;
+ private int lastTokenType;
+ private int line;
+ private int column;
+
+ public Paren(String text, int lastTokenType, int line, int column) {
+ this.text = text;
+ this.lastTokenType = lastTokenType;
+ this.line = line;
+ this.column = column;
+ }
+
+ public String getText() {
+ return this.text;
+ }
+
+ public int getLastTokenType() {
+ return this.lastTokenType;
+ }
+
+ public int getLine() {
+ return line;
+ }
+
+ public int getColumn() {
+ return column;
+ }
+
+ @Override
+ public int hashCode() {
+ return (int) (text.hashCode() * line + column);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof Paren)) {
+ return false;
+ }
+
+ Paren other = (Paren) obj;
+
+ return this.text.equals(other.text) && (this.line == other.line && this.column == other.column);
+ }
+ }
+
+ private static final Map PAREN_MAP =
+ Maps.of(
+ "(", ")",
+ "[", "]",
+ "{", "}"
+ );
+
+ protected void enterParenCallback(String text) {}
+
+ protected void exitParenCallback(String text) {}
+
+ private final Deque parenStack = new ArrayDeque<>(32);
+
+ private void enterParen() {
+ String text = getText();
+ enterParenCallback(text);
+ parenStack.push(new Paren(text, this.lastTokenType, getLine(), getCharPositionInLine()));
+ }
+
+ private void exitParen() {
+ Paren paren = parenStack.peek();
+ String text = getText();
+
+ require(null != paren, "Too many '" + text + "'");
+ require(text.equals(PAREN_MAP.get(paren.getText())),
+ "'" + paren.getText() + "'" + new PositionInfo(paren.getLine(), paren.getColumn()) + " can not match '" + text + "'", -1);
+
+ exitParenCallback(text);
+ parenStack.pop();
+ }
+ private boolean isInsideParens() {
+ Paren paren = parenStack.peek();
+
+ // We just care about "(" and "[", inside which the new lines will be ignored.
+ // Notice: the new lines between "{" and "}" can not be ignored.
+ if (null == paren) {
+ return false;
+ }
+ return ("(".equals(paren.getText()) && TRY != paren.getLastTokenType()) // we don't treat try-paren(i.e. try (....)) as parenthesis
+ || "[".equals(paren.getText());
+ }
+ private void ignoreTokenInsideParens() {
+ if (!this.isInsideParens()) {
+ return;
+ }
+
+ this.setChannel(Token.HIDDEN_CHANNEL);
+ }
+ private void ignoreMultiLineCommentConditionally() {
+ if (!this.isInsideParens() && isFollowedByWhiteSpaces(_input)) {
+ return;
+ }
+
+ this.setChannel(Token.HIDDEN_CHANNEL);
+ }
+
+ @Override
+ public int getSyntaxErrorSource() {
+ return GroovySyntaxError.LEXER;
+ }
+
+ @Override
+ public int getErrorLine() {
+ return getLine();
+ }
+
+ @Override
+ public int getErrorColumn() {
+ return getCharPositionInLine() + 1;
+ }
+}
+
+
+// §3.10.5 String Literals
+StringLiteral
+ : GStringQuotationMark DqStringCharacter* GStringQuotationMark
+ | SqStringQuotationMark SqStringCharacter* SqStringQuotationMark
+
+ | Slash { this.isRegexAllowed() && _input.LA(1) != '*' }?
+ SlashyStringCharacter+ Slash
+
+ | TdqStringQuotationMark TdqStringCharacter* TdqStringQuotationMark
+ | TsqStringQuotationMark TsqStringCharacter* TsqStringQuotationMark
+ | DollarSlashyGStringQuotationMarkBegin DollarSlashyStringCharacter+ DollarSlashyGStringQuotationMarkEnd
+ ;
+
+// Groovy gstring
+GStringBegin
+ : GStringQuotationMark DqStringCharacter* Dollar -> pushMode(DQ_GSTRING_MODE), pushMode(GSTRING_TYPE_SELECTOR_MODE)
+ ;
+TdqGStringBegin
+ : TdqStringQuotationMark TdqStringCharacter* Dollar -> type(GStringBegin), pushMode(TDQ_GSTRING_MODE), pushMode(GSTRING_TYPE_SELECTOR_MODE)
+ ;
+SlashyGStringBegin
+ : Slash { this.isRegexAllowed() && _input.LA(1) != '*' }? SlashyStringCharacter* Dollar { isFollowedByJavaLetterInGString(_input) }? -> type(GStringBegin), pushMode(SLASHY_GSTRING_MODE), pushMode(GSTRING_TYPE_SELECTOR_MODE)
+ ;
+DollarSlashyGStringBegin
+ : DollarSlashyGStringQuotationMarkBegin DollarSlashyStringCharacter* Dollar { isFollowedByJavaLetterInGString(_input) }? -> type(GStringBegin), pushMode(DOLLAR_SLASHY_GSTRING_MODE), pushMode(GSTRING_TYPE_SELECTOR_MODE)
+ ;
+
+mode DQ_GSTRING_MODE;
+GStringEnd
+ : GStringQuotationMark -> popMode
+ ;
+GStringPart
+ : Dollar -> pushMode(GSTRING_TYPE_SELECTOR_MODE)
+ ;
+GStringCharacter
+ : DqStringCharacter -> more
+ ;
+
+mode TDQ_GSTRING_MODE;
+TdqGStringEnd
+ : TdqStringQuotationMark -> type(GStringEnd), popMode
+ ;
+TdqGStringPart
+ : Dollar -> type(GStringPart), pushMode(GSTRING_TYPE_SELECTOR_MODE)
+ ;
+TdqGStringCharacter
+ : TdqStringCharacter -> more
+ ;
+
+mode SLASHY_GSTRING_MODE;
+SlashyGStringEnd
+ : Dollar? Slash -> type(GStringEnd), popMode
+ ;
+SlashyGStringPart
+ : Dollar { isFollowedByJavaLetterInGString(_input) }? -> type(GStringPart), pushMode(GSTRING_TYPE_SELECTOR_MODE)
+ ;
+SlashyGStringCharacter
+ : SlashyStringCharacter -> more
+ ;
+
+mode DOLLAR_SLASHY_GSTRING_MODE;
+DollarSlashyGStringEnd
+ : DollarSlashyGStringQuotationMarkEnd -> type(GStringEnd), popMode
+ ;
+DollarSlashyGStringPart
+ : Dollar { isFollowedByJavaLetterInGString(_input) }? -> type(GStringPart), pushMode(GSTRING_TYPE_SELECTOR_MODE)
+ ;
+DollarSlashyGStringCharacter
+ : DollarSlashyStringCharacter -> more
+ ;
+
+mode GSTRING_TYPE_SELECTOR_MODE;
+GStringLBrace
+ : '{' { this.enterParen(); } -> type(LBRACE), popMode, pushMode(DEFAULT_MODE)
+ ;
+GStringIdentifier
+ : IdentifierInGString -> type(Identifier), popMode, pushMode(GSTRING_PATH_MODE)
+ ;
+
+
+mode GSTRING_PATH_MODE;
+GStringPathPart
+ : Dot IdentifierInGString
+ ;
+RollBackOne
+ : . {
+ // a trick to handle GStrings followed by EOF properly
+ int readChar = _input.LA(-1);
+ if (EOF == _input.LA(1) && ('"' == readChar || '/' == readChar)) {
+ setType(GStringEnd);
+ } else {
+ setChannel(HIDDEN);
+ }
+ } -> popMode
+ ;
+
+
+mode DEFAULT_MODE;
+// character in the double quotation string. e.g. "a"
+fragment
+DqStringCharacter
+ : ~["\r\n\\$]
+ | EscapeSequence
+ ;
+
+// character in the single quotation string. e.g. 'a'
+fragment
+SqStringCharacter
+ : ~['\r\n\\]
+ | EscapeSequence
+ ;
+
+// character in the triple double quotation string. e.g. """a"""
+fragment TdqStringCharacter
+ : ~["\\$]
+ | GStringQuotationMark { _input.LA(1) != '"' || _input.LA(2) != '"' || _input.LA(3) == '"' && (_input.LA(4) != '"' || _input.LA(5) != '"') }?
+ | EscapeSequence
+ ;
+
+// character in the triple single quotation string. e.g. '''a'''
+fragment TsqStringCharacter
+ : ~['\\]
+ | SqStringQuotationMark { _input.LA(1) != '\'' || _input.LA(2) != '\'' || _input.LA(3) == '\'' && (_input.LA(4) != '\'' || _input.LA(5) != '\'') }?
+ | EscapeSequence
+ ;
+
+// character in the slashy string. e.g. /a/
+fragment SlashyStringCharacter
+ : SlashEscape
+ | Dollar { !isFollowedByJavaLetterInGString(_input) }?
+ | ~[/$\u0000]
+ ;
+
+// character in the collar slashy string. e.g. $/a/$
+fragment DollarSlashyStringCharacter
+ : SlashEscape | DollarSlashEscape | DollarDollarEscape
+ | Slash { _input.LA(1) != '$' }?
+ | Dollar { !isFollowedByJavaLetterInGString(_input) }?
+ | ~[/$\u0000]
+ ;
+
+// Groovy keywords
+AS : 'as';
+DEF : 'def';
+IN : 'in';
+TRAIT : 'trait';
+THREADSAFE : 'threadsafe'; // reserved keyword
+
+// the reserved type name of Java10
+VAR : 'var';
+
+// §3.9 Keywords
+BuiltInPrimitiveType
+ : BOOLEAN
+ | CHAR
+ | BYTE
+ | SHORT
+ | INT
+ | LONG
+ | FLOAT
+ | DOUBLE
+ ;
+
+ABSTRACT : 'abstract';
+ASSERT : 'assert';
+
+fragment
+BOOLEAN : 'boolean';
+
+BREAK : 'break';
+
+fragment
+BYTE : 'byte';
+
+CASE : 'case';
+CATCH : 'catch';
+
+fragment
+CHAR : 'char';
+
+CLASS : 'class';
+CONST : 'const';
+CONTINUE : 'continue';
+DEFAULT : 'default';
+DO : 'do';
+
+fragment
+DOUBLE : 'double';
+
+ELSE : 'else';
+ENUM : 'enum';
+EXTENDS : 'extends';
+FINAL : 'final';
+FINALLY : 'finally';
+
+fragment
+FLOAT : 'float';
+
+
+FOR : 'for';
+IF : 'if';
+GOTO : 'goto';
+IMPLEMENTS : 'implements';
+IMPORT : 'import';
+INSTANCEOF : 'instanceof';
+
+fragment
+INT : 'int';
+
+INTERFACE : 'interface';
+
+fragment
+LONG : 'long';
+
+NATIVE : 'native';
+NEW : 'new';
+PACKAGE : 'package';
+PRIVATE : 'private';
+PROTECTED : 'protected';
+PUBLIC : 'public';
+RETURN : 'return';
+
+fragment
+SHORT : 'short';
+
+
+STATIC : 'static';
+STRICTFP : 'strictfp';
+SUPER : 'super';
+SWITCH : 'switch';
+SYNCHRONIZED : 'synchronized';
+THIS : 'this';
+THROW : 'throw';
+THROWS : 'throws';
+TRANSIENT : 'transient';
+TRY : 'try';
+VOID : 'void';
+VOLATILE : 'volatile';
+WHILE : 'while';
+
+
+// §3.10.1 Integer Literals
+
+IntegerLiteral
+ : ( DecimalIntegerLiteral
+ | HexIntegerLiteral
+ | OctalIntegerLiteral
+ | BinaryIntegerLiteral
+ ) (Underscore { require(false, "Number ending with underscores is invalid", -1, true); })?
+
+ // !!! Error Alternative !!!
+ | Zero ([0-9] { invalidDigitCount++; })+ { require(false, "Invalid octal number", -(invalidDigitCount + 1), true); } IntegerTypeSuffix?
+ ;
+
+fragment
+Zero
+ : '0'
+ ;
+
+fragment
+DecimalIntegerLiteral
+ : DecimalNumeral IntegerTypeSuffix?
+ ;
+
+fragment
+HexIntegerLiteral
+ : HexNumeral IntegerTypeSuffix?
+ ;
+
+fragment
+OctalIntegerLiteral
+ : OctalNumeral IntegerTypeSuffix?
+ ;
+
+fragment
+BinaryIntegerLiteral
+ : BinaryNumeral IntegerTypeSuffix?
+ ;
+
+fragment
+IntegerTypeSuffix
+ : [lLiIgG]
+ ;
+
+fragment
+DecimalNumeral
+ : Zero
+ | NonZeroDigit (Digits? | Underscores Digits)
+ ;
+
+fragment
+Digits
+ : Digit (DigitOrUnderscore* Digit)?
+ ;
+
+fragment
+Digit
+ : Zero
+ | NonZeroDigit
+ ;
+
+fragment
+NonZeroDigit
+ : [1-9]
+ ;
+
+fragment
+DigitOrUnderscore
+ : Digit
+ | Underscore
+ ;
+
+fragment
+Underscores
+ : Underscore+
+ ;
+
+fragment
+Underscore
+ : '_'
+ ;
+
+fragment
+HexNumeral
+ : Zero [xX] HexDigits
+ ;
+
+fragment
+HexDigits
+ : HexDigit (HexDigitOrUnderscore* HexDigit)?
+ ;
+
+fragment
+HexDigit
+ : [0-9a-fA-F]
+ ;
+
+fragment
+HexDigitOrUnderscore
+ : HexDigit
+ | Underscore
+ ;
+
+fragment
+OctalNumeral
+ : Zero Underscores? OctalDigits
+ ;
+
+fragment
+OctalDigits
+ : OctalDigit (OctalDigitOrUnderscore* OctalDigit)?
+ ;
+
+fragment
+OctalDigit
+ : [0-7]
+ ;
+
+fragment
+OctalDigitOrUnderscore
+ : OctalDigit
+ | Underscore
+ ;
+
+fragment
+BinaryNumeral
+ : Zero [bB] BinaryDigits
+ ;
+
+fragment
+BinaryDigits
+ : BinaryDigit (BinaryDigitOrUnderscore* BinaryDigit)?
+ ;
+
+fragment
+BinaryDigit
+ : [01]
+ ;
+
+fragment
+BinaryDigitOrUnderscore
+ : BinaryDigit
+ | Underscore
+ ;
+
+// §3.10.2 Floating-Point Literals
+
+FloatingPointLiteral
+ : ( DecimalFloatingPointLiteral
+ | HexadecimalFloatingPointLiteral
+ ) (Underscore { require(false, "Number ending with underscores is invalid", -1, true); })?
+ ;
+
+fragment
+DecimalFloatingPointLiteral
+ : Digits Dot Digits ExponentPart? FloatTypeSuffix?
+ | Digits ExponentPart FloatTypeSuffix?
+ | Digits FloatTypeSuffix
+ ;
+
+fragment
+ExponentPart
+ : ExponentIndicator SignedInteger
+ ;
+
+fragment
+ExponentIndicator
+ : [eE]
+ ;
+
+fragment
+SignedInteger
+ : Sign? Digits
+ ;
+
+fragment
+Sign
+ : [+\-]
+ ;
+
+fragment
+FloatTypeSuffix
+ : [fFdDgG]
+ ;
+
+fragment
+HexadecimalFloatingPointLiteral
+ : HexSignificand BinaryExponent FloatTypeSuffix?
+ ;
+
+fragment
+HexSignificand
+ : HexNumeral Dot?
+ | Zero [xX] HexDigits? Dot HexDigits
+ ;
+
+fragment
+BinaryExponent
+ : BinaryExponentIndicator SignedInteger
+ ;
+
+fragment
+BinaryExponentIndicator
+ : [pP]
+ ;
+
+fragment
+Dot : '.'
+ ;
+
+// §3.10.3 Boolean Literals
+
+BooleanLiteral
+ : 'true'
+ | 'false'
+ ;
+
+
+// §3.10.6 Escape Sequences for Character and String Literals
+
+fragment
+EscapeSequence
+ : Backslash [btnfr"'\\]
+ | OctalEscape
+ | UnicodeEscape
+ | DollarEscape
+ | LineEscape
+ ;
+
+
+fragment
+OctalEscape
+ : Backslash OctalDigit
+ | Backslash OctalDigit OctalDigit
+ | Backslash ZeroToThree OctalDigit OctalDigit
+ ;
+
+// Groovy allows 1 or more u's after the backslash
+fragment
+UnicodeEscape
+ : Backslash 'u' HexDigit HexDigit HexDigit HexDigit
+ ;
+
+fragment
+ZeroToThree
+ : [0-3]
+ ;
+
+// Groovy Escape Sequences
+
+fragment
+DollarEscape
+ : Backslash Dollar
+ ;
+
+fragment
+LineEscape
+ : Backslash '\r'? '\n'
+ ;
+
+fragment
+SlashEscape
+ : Backslash Slash
+ ;
+
+fragment
+Backslash
+ : '\\'
+ ;
+
+fragment
+Slash
+ : '/'
+ ;
+
+fragment
+Dollar
+ : '$'
+ ;
+
+fragment
+GStringQuotationMark
+ : '"'
+ ;
+
+fragment
+SqStringQuotationMark
+ : '\''
+ ;
+
+fragment
+TdqStringQuotationMark
+ : '"""'
+ ;
+
+fragment
+TsqStringQuotationMark
+ : '\'\'\''
+ ;
+
+fragment
+DollarSlashyGStringQuotationMarkBegin
+ : '$/'
+ ;
+
+fragment
+DollarSlashyGStringQuotationMarkEnd
+ : '/$'
+ ;
+
+fragment
+DollarSlashEscape
+ : '$/$'
+ ;
+
+fragment
+DollarDollarEscape
+ : '$$'
+ ;
+
+// §3.10.7 The Null Literal
+NullLiteral
+ : 'null'
+ ;
+
+// Groovy Operators
+
+RANGE_INCLUSIVE : '..';
+RANGE_EXCLUSIVE : '..<';
+SPREAD_DOT : '*.';
+SAFE_DOT : '?.';
+SAFE_CHAIN_DOT : '??.';
+ELVIS : '?:';
+METHOD_POINTER : '.&';
+METHOD_REFERENCE : '::';
+REGEX_FIND : '=~';
+REGEX_MATCH : '==~';
+POWER : '**';
+POWER_ASSIGN : '**=';
+SPACESHIP : '<=>';
+IDENTICAL : '===';
+NOT_IDENTICAL : '!==';
+ARROW : '->';
+
+// !internalPromise will be parsed as !in ternalPromise, so semantic predicates are necessary
+NOT_INSTANCEOF : '!instanceof' { isFollowedBy(_input, ' ', '\t', '\r', '\n') }?;
+NOT_IN : '!in' { isFollowedBy(_input, ' ', '\t', '\r', '\n', '[', '(', '{') }?;
+
+
+// §3.11 Separators
+
+LPAREN : '(' { this.enterParen(); } -> pushMode(DEFAULT_MODE);
+RPAREN : ')' { this.exitParen(); } -> popMode;
+LBRACE : '{' { this.enterParen(); } -> pushMode(DEFAULT_MODE);
+RBRACE : '}' { this.exitParen(); } -> popMode;
+LBRACK : '[' { this.enterParen(); } -> pushMode(DEFAULT_MODE);
+RBRACK : ']' { this.exitParen(); } -> popMode;
+
+SEMI : ';';
+COMMA : ',';
+DOT : Dot;
+
+// §3.12 Operators
+
+ASSIGN : '=';
+GT : '>';
+LT : '<';
+NOT : '!';
+BITNOT : '~';
+QUESTION : '?';
+COLON : ':';
+EQUAL : '==';
+LE : '<=';
+GE : '>=';
+NOTEQUAL : '!=';
+AND : '&&';
+OR : '||';
+INC : '++';
+DEC : '--';
+ADD : '+';
+SUB : '-';
+MUL : '*';
+DIV : Slash;
+BITAND : '&';
+BITOR : '|';
+XOR : '^';
+MOD : '%';
+
+
+ADD_ASSIGN : '+=';
+SUB_ASSIGN : '-=';
+MUL_ASSIGN : '*=';
+DIV_ASSIGN : '/=';
+AND_ASSIGN : '&=';
+OR_ASSIGN : '|=';
+XOR_ASSIGN : '^=';
+MOD_ASSIGN : '%=';
+LSHIFT_ASSIGN : '<<=';
+RSHIFT_ASSIGN : '>>=';
+URSHIFT_ASSIGN : '>>>=';
+ELVIS_ASSIGN : '?=';
+
+
+// §3.8 Identifiers (must appear after all keywords in the grammar)
+CapitalizedIdentifier
+ : [A-Z] JavaLetterOrDigit*
+ ;
+
+Identifier
+ : JavaLetter JavaLetterOrDigit*
+ ;
+
+fragment
+IdentifierInGString
+ : JavaLetterInGString JavaLetterOrDigitInGString*
+ ;
+
+fragment
+JavaLetterInGString
+ : [a-zA-Z_] // these are the "java letters" below 0x7F, except for $
+ | // covers all characters above 0x7F which are not a surrogate
+ ~[\u0000-\u007F\uD800-\uDBFF]
+ {Character.isJavaIdentifierStart(_input.LA(-1))}?
+ | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
+ [\uD800-\uDBFF] [\uDC00-\uDFFF]
+ {Character.isJavaIdentifierStart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}?
+ ;
+
+fragment
+JavaLetterOrDigitInGString
+ : [a-zA-Z0-9_] // these are the "java letters or digits" below 0x7F, except for $
+ | // covers all characters above 0x7F which are not a surrogate
+ ~[\u0000-\u007F\uD800-\uDBFF]
+ {Character.isJavaIdentifierPart(_input.LA(-1))}?
+ | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
+ [\uD800-\uDBFF] [\uDC00-\uDFFF]
+ {Character.isJavaIdentifierPart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}?
+ ;
+
+
+fragment
+JavaLetter
+ : [a-zA-Z$_] // these are the "java letters" below 0x7F
+ | // covers all characters above 0x7F which are not a surrogate
+ ~[\u0000-\u007F\uD800-\uDBFF]
+ {Character.isJavaIdentifierStart(_input.LA(-1))}?
+ | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
+ [\uD800-\uDBFF] [\uDC00-\uDFFF]
+ {Character.isJavaIdentifierStart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}?
+ ;
+
+fragment
+JavaLetterOrDigit
+ : [a-zA-Z0-9$_] // these are the "java letters or digits" below 0x7F
+ | // covers all characters above 0x7F which are not a surrogate
+ ~[\u0000-\u007F\uD800-\uDBFF]
+ {Character.isJavaIdentifierPart(_input.LA(-1))}?
+ | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
+ [\uD800-\uDBFF] [\uDC00-\uDFFF]
+ {Character.isJavaIdentifierPart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}?
+ ;
+
+//
+// Additional symbols not defined in the lexical specification
+//
+
+AT : '@';
+ELLIPSIS : '...';
+
+//
+// Whitespace, line escape and comments
+//
+WS : ([ \t\u000C]+ | LineEscape+) -> skip
+ ;
+
+
+// Inside (...) and [...] but not {...}, ignore newlines.
+NL : '\r'? '\n' { this.ignoreTokenInsideParens(); }
+ ;
+
+// Multiple-line comments(including groovydoc comments)
+ML_COMMENT
+ : '/*' .*? '*/' { this.ignoreMultiLineCommentConditionally(); } -> type(NL)
+ ;
+
+// Single-line comments
+SL_COMMENT
+ : '//' ~[\r\n\uFFFF]* { this.ignoreTokenInsideParens(); } -> type(NL)
+ ;
+
+// Script-header comments.
+// The very first characters of the file may be "#!". If so, ignore the first line.
+SH_COMMENT
+ : '#!' { require(0 == this.tokenIndex, "Shebang comment should appear at the first line", -2, true); } ~[\r\n\uFFFF]* -> skip
+ ;
+
+// Unexpected characters will be handled by groovy parser later.
+UNEXPECTED_CHAR
+ : .
+ ;
diff --git a/examples/antlr4-opt/groovy/GroovyParser.g4 b/examples/antlr4-opt/groovy/GroovyParser.g4
new file mode 100644
index 0000000..7d802fa
--- /dev/null
+++ b/examples/antlr4-opt/groovy/GroovyParser.g4
@@ -0,0 +1,1242 @@
+/*
+ * This file is adapted from the Antlr4 Java grammar which has the following license
+ *
+ * Copyright (c) 2013 Terence Parr, Sam Harwell
+ * All rights reserved.
+ * [The "BSD licence"]
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * Subsequent modifications by the Groovy community have been done under the Apache License v2:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * The Groovy grammar is based on the official grammar for Java:
+ * https://github.com/antlr/grammars-v4/blob/master/java/Java.g4
+ */
+parser grammar GroovyParser;
+
+options {
+ tokenVocab = GroovyLexer;
+ contextSuperClass = GroovyParserRuleContext;
+ superClass = AbstractParser;
+}
+
+@header {
+ import java.util.Map;
+ import org.codehaus.groovy.ast.NodeMetaDataHandler;
+ import org.apache.groovy.parser.antlr4.SemanticPredicates;
+}
+
+@members {
+
+ public static class GroovyParserRuleContext extends ParserRuleContext implements NodeMetaDataHandler {
+ private Map metaDataMap = null;
+
+ public GroovyParserRuleContext() {}
+
+ public GroovyParserRuleContext(ParserRuleContext parent, int invokingStateNumber) {
+ super(parent, invokingStateNumber);
+ }
+
+ @Override
+ public Map, ?> getMetaDataMap() {
+ return this.metaDataMap;
+ }
+
+ @Override
+ public void setMetaDataMap(Map, ?> metaDataMap) {
+ this.metaDataMap = metaDataMap;
+ }
+ }
+
+ @Override
+ public int getSyntaxErrorSource() {
+ return GroovySyntaxError.PARSER;
+ }
+
+ @Override
+ public int getErrorLine() {
+ Token token = _input.LT(-1);
+
+ if (null == token) {
+ return -1;
+ }
+
+ return token.getLine();
+ }
+
+ @Override
+ public int getErrorColumn() {
+ Token token = _input.LT(-1);
+
+ if (null == token) {
+ return -1;
+ }
+
+ return token.getCharPositionInLine() + 1 + token.getText().length();
+ }
+}
+
+// starting point for parsing a groovy file
+compilationUnit
+ : nls
+ packageDeclaration? sep? scriptStatements? EOF
+ ;
+
+scriptStatements
+ : scriptStatement (sep scriptStatement)* sep?
+ ;
+
+scriptStatement
+ : importDeclaration // Import statement. Can be used in any scope. Has "import x as y" also.
+ | typeDeclaration
+ | statement
+ ;
+
+packageDeclaration
+ : annotationsOpt PACKAGE qualifiedName
+ ;
+
+importDeclaration
+ : annotationsOpt IMPORT STATIC? qualifiedName (DOT MUL | AS alias=identifier)?
+ ;
+
+
+typeDeclaration
+ : classOrInterfaceModifiersOpt classDeclaration
+ ;
+
+modifier
+ : classOrInterfaceModifier
+ | m=( NATIVE
+ | SYNCHRONIZED
+ | TRANSIENT
+ | VOLATILE
+ | DEF
+ | VAR
+ )
+ ;
+
+modifiersOpt
+ : modifiers?
+ ;
+
+modifiers
+ : (modifier nls)+
+ ;
+
+classOrInterfaceModifiersOpt
+ : classOrInterfaceModifiers?
+ ;
+
+classOrInterfaceModifiers
+ : (classOrInterfaceModifier nls)+
+ ;
+
+classOrInterfaceModifier
+ : annotation // class or interface
+ | m=( PUBLIC // class or interface
+ | PROTECTED // class or interface
+ | PRIVATE // class or interface
+ | STATIC // class or interface
+ | ABSTRACT // class or interface
+ | FINAL // class only -- does not apply to interfaces
+ | STRICTFP // class or interface
+ | DEFAULT // interface only -- does not apply to classes
+ )
+ ;
+
+variableModifier
+ : annotation
+ | m=( FINAL
+ | DEF
+ | VAR
+ // Groovy supports declaring local variables as instance/class fields,
+ // e.g. import groovy.transform.*; @Field static List awe = [1, 2, 3]
+ // e.g. import groovy.transform.*; def a = { @Field public List awe = [1, 2, 3] }
+ // Notice: Groovy 2.4.7 just allows to declare local variables with the following modifiers when using annotations(e.g. @Field)
+ // TODO check whether the following modifiers accompany annotations or not. Because the legacy codes(e.g. benchmark/bench/heapsort.groovy) allow to declare the special instance/class fields without annotations, we leave it as it is for the time being
+ | PUBLIC
+ | PROTECTED
+ | PRIVATE
+ | STATIC
+ | ABSTRACT
+ | STRICTFP
+ )
+ ;
+
+variableModifiersOpt
+ : variableModifiers?
+ ;
+
+variableModifiers
+ : (variableModifier nls)+
+ ;
+
+typeParameters
+ : LT nls typeParameter (COMMA nls typeParameter)* nls GT
+ ;
+
+typeParameter
+ : className (EXTENDS nls typeBound)?
+ ;
+
+typeBound
+ : type (BITAND nls type)*
+ ;
+
+typeList
+ : type (COMMA nls type)*
+ ;
+
+
+/**
+ * t 0: class; 1: interface; 2: enum; 3: annotation; 4: trait
+ */
+classDeclaration
+locals[ int t ]
+ : ( CLASS { $t = 0; }
+ | INTERFACE { $t = 1; }
+ | ENUM { $t = 2; }
+ | AT INTERFACE { $t = 3; }
+ | TRAIT { $t = 4; }
+ )
+ identifier nls
+
+ (
+ { 3 != $t }?
+ typeParameters? nls
+ (
+ { 2 != $t }?
+ (EXTENDS nls
+ (
+ // Only interface can extend more than one super class
+ {1 == $t}? scs=typeList
+ |
+ sc=type
+ )
+ nls)?
+ |
+ /* enum should not have type parameters and extends */
+ )
+
+ (
+ {1 != $t}?
+ (IMPLEMENTS nls is=typeList nls)?
+ |
+ /* interface should not implement other interfaces */
+ )
+ |
+ /* annotation should not have implements and extends*/
+ )
+
+ classBody[$t]
+ ;
+
+// t see the comment of classDeclaration
+classBody[int t]
+ : LBRACE nls
+ (
+ /* Only enum can have enum constants */
+ { 2 == $t }?
+ enumConstants? sep?
+ |
+
+ )
+ classBodyDeclaration[$t]? (sep classBodyDeclaration[$t])* sep? RBRACE
+ ;
+
+enumConstants
+ : enumConstant (nls COMMA nls enumConstant)* (nls COMMA)?
+ ;
+
+enumConstant
+ : annotationsOpt identifier arguments? anonymousInnerClassDeclaration[1]?
+ ;
+
+classBodyDeclaration[int t]
+ : SEMI
+ | (STATIC nls)? block
+ | memberDeclaration[$t]
+ ;
+
+memberDeclaration[int t]
+ : methodDeclaration[0, $t]
+ | fieldDeclaration
+ | modifiersOpt classDeclaration
+ ;
+
+/**
+ * t 0: *class member* all kinds of method declaration AND constructor declaration,
+ * 1: normal method declaration, 2: abstract method declaration
+ * 3: normal method declaration OR abstract method declaration
+ * ct 9: script, other see the comment of classDeclaration
+ */
+methodDeclaration[int t, int ct]
+ : { 3 == $ct }?
+ returnType[$ct] methodName LPAREN rparen (DEFAULT nls elementValue)?
+ |
+ modifiersOpt typeParameters? returnType[$ct]?
+ methodName formalParameters (nls THROWS nls qualifiedClassNameList)?
+ nls methodBody?
+ ;
+
+methodName
+ : identifier
+ | stringLiteral
+ ;
+
+returnType[int ct]
+ :
+ standardType
+ |
+ // annotation method can not have void return type
+ { 3 != $ct }? VOID
+ ;
+
+fieldDeclaration
+ : variableDeclaration[1]
+ ;
+
+variableDeclarators
+ : variableDeclarator (COMMA nls variableDeclarator)*
+ ;
+
+variableDeclarator
+ : variableDeclaratorId (nls ASSIGN nls variableInitializer)?
+ ;
+
+variableDeclaratorId
+ : identifier
+ ;
+
+variableInitializer
+ : enhancedStatementExpression
+ ;
+
+variableInitializers
+ : variableInitializer nls (COMMA nls variableInitializer nls)* nls COMMA?
+ ;
+
+dims
+ : (annotationsOpt LBRACK RBRACK)+
+ ;
+
+dimsOpt
+ : dims?
+ ;
+
+standardType
+options { baseContext = type; }
+ : annotationsOpt
+ (
+ primitiveType
+ |
+ standardClassOrInterfaceType
+ )
+ dimsOpt
+ ;
+
+type
+ : annotationsOpt
+ (
+ (
+ primitiveType
+ |
+ // !!! Error Alternative !!!
+ VOID
+ )
+ |
+ generalClassOrInterfaceType
+ )
+ dimsOpt
+ ;
+
+classOrInterfaceType
+ : ( qualifiedClassName
+ | qualifiedStandardClassName
+ ) typeArguments?
+ ;
+
+generalClassOrInterfaceType
+options { baseContext = classOrInterfaceType; }
+ : qualifiedClassName typeArguments?
+ ;
+
+standardClassOrInterfaceType
+options { baseContext = classOrInterfaceType; }
+ : qualifiedStandardClassName typeArguments?
+ ;
+
+primitiveType
+ : BuiltInPrimitiveType
+ ;
+
+typeArguments
+ : LT nls typeArgument (COMMA nls typeArgument)* nls GT
+ ;
+
+typeArgument
+ : type
+ | annotationsOpt QUESTION ((EXTENDS | SUPER) nls type)?
+ ;
+
+annotatedQualifiedClassName
+ : annotationsOpt qualifiedClassName
+ ;
+
+qualifiedClassNameList
+ : annotatedQualifiedClassName (COMMA nls annotatedQualifiedClassName)*
+ ;
+
+formalParameters
+ : LPAREN formalParameterList? rparen
+ ;
+
+formalParameterList
+ : (formalParameter | thisFormalParameter) (COMMA nls formalParameter)*
+ ;
+
+thisFormalParameter
+ : type THIS
+ ;
+
+formalParameter
+ : variableModifiersOpt type? ELLIPSIS? variableDeclaratorId (nls ASSIGN nls expression)?
+ ;
+
+methodBody
+ : block
+ ;
+
+qualifiedName
+ : qualifiedNameElement (DOT qualifiedNameElement)*
+ ;
+
+/**
+ * Java doesn't have the keywords 'as', 'in', 'def', 'trait' so we make some allowances
+ * for them in package names for better integration with existing Java packages
+ */
+qualifiedNameElement
+ : identifier
+ | DEF
+ | IN
+ | AS
+ | TRAIT
+ ;
+
+qualifiedNameElements
+ : (qualifiedNameElement DOT)*
+ ;
+
+qualifiedClassName
+ : qualifiedNameElements identifier
+ ;
+
+qualifiedStandardClassName
+ : qualifiedNameElements className (DOT className)*
+ ;
+
+literal
+ : IntegerLiteral #integerLiteralAlt
+ | FloatingPointLiteral #floatingPointLiteralAlt
+ | stringLiteral #stringLiteralAlt
+ | BooleanLiteral #booleanLiteralAlt
+ | NullLiteral #nullLiteralAlt
+ ;
+
+// GSTRING
+
+gstring
+ : GStringBegin gstringValue (GStringPart gstringValue)* GStringEnd
+ ;
+
+gstringValue
+ : gstringPath
+ | LBRACE statementExpression? RBRACE
+ | closure
+ ;
+
+gstringPath
+ : identifier GStringPathPart*
+ ;
+
+
+// LAMBDA EXPRESSION
+lambdaExpression
+options { baseContext = standardLambdaExpression; }
+ : lambdaParameters nls ARROW nls lambdaBody
+ ;
+
+// JAVA STANDARD LAMBDA EXPRESSION
+standardLambdaExpression
+ : standardLambdaParameters nls ARROW nls lambdaBody
+ ;
+
+lambdaParameters
+options { baseContext = standardLambdaParameters; }
+ : formalParameters
+
+ // { a -> a * 2 } can be parsed as a lambda expression in a block, but we expect a closure.
+ // So it is better to put parameters in the parentheses and the following single parameter without parentheses is limited
+// | variableDeclaratorId
+ ;
+
+standardLambdaParameters
+ : formalParameters
+ | variableDeclaratorId
+ ;
+
+lambdaBody
+ : block
+ | statementExpression
+ ;
+
+// CLOSURE
+closure
+ : LBRACE nls (formalParameterList? nls ARROW nls)? blockStatementsOpt RBRACE
+ ;
+
+// GROOVY-8991: Difference in behaviour with closure and lambda
+closureOrLambdaExpression
+ : closure
+ | lambdaExpression
+ ;
+
+blockStatementsOpt
+ : blockStatements?
+ ;
+
+blockStatements
+ : blockStatement (sep blockStatement)* sep?
+ ;
+
+// ANNOTATIONS
+
+annotationsOpt
+ : (annotation nls)*
+ ;
+
+annotation
+ : AT annotationName ( LPAREN elementValues? rparen )?
+ ;
+
+elementValues
+ : elementValuePairs
+ | elementValue
+ ;
+
+annotationName : qualifiedClassName ;
+
+elementValuePairs
+ : elementValuePair (COMMA elementValuePair)*
+ ;
+
+elementValuePair
+ : elementValuePairName nls ASSIGN nls elementValue
+ ;
+
+elementValuePairName
+ : identifier
+ | keywords
+ ;
+
+// TODO verify the potential performance issue because rule expression contains sub-rule assignments(https://github.com/antlr/grammars-v4/issues/215)
+elementValue
+ : elementValueArrayInitializer
+ | annotation
+ | expression
+ ;
+
+elementValueArrayInitializer
+ : LBRACK (elementValue (COMMA elementValue)*)? (COMMA)? RBRACK
+ ;
+
+// STATEMENTS / BLOCKS
+
+block
+ : LBRACE (nls | sep*) blockStatementsOpt RBRACE
+ ;
+
+blockStatement
+ : localVariableDeclaration
+ | statement
+ ;
+
+localVariableDeclaration
+ : { !SemanticPredicates.isInvalidLocalVariableDeclaration(_input) }?
+ variableDeclaration[0]
+ ;
+
+classifiedModifiers[int t]
+ : { 0 == $t }? variableModifiers
+ | { 1 == $t }? modifiers
+ ;
+
+
+/**
+ * t 0: local variable declaration; 1: field declaration
+ */
+variableDeclaration[int t]
+@leftfactor { classifiedModifiers }
+ : classifiedModifiers[$t]
+ ( type? variableDeclarators
+ | typeNamePairs nls ASSIGN nls variableInitializer
+ )
+ |
+ classifiedModifiers[$t]?
+ type variableDeclarators
+ ;
+
+typeNamePairs
+ : LPAREN typeNamePair (COMMA typeNamePair)* rparen
+ ;
+
+typeNamePair
+ : type? variableDeclaratorId
+ ;
+
+variableNames
+ : LPAREN variableDeclaratorId (COMMA variableDeclaratorId)+ rparen
+ ;
+
+conditionalStatement
+ : ifElseStatement
+ | switchStatement
+ ;
+
+ifElseStatement
+ : IF expressionInPar nls tb=statement ((nls | sep) ELSE nls fb=statement)?
+ ;
+
+switchStatement
+ : SWITCH expressionInPar nls LBRACE nls switchBlockStatementGroup* nls RBRACE
+ ;
+
+loopStatement
+ : FOR LPAREN forControl rparen nls statement #forStmtAlt
+ | WHILE expressionInPar nls statement #whileStmtAlt
+ | DO nls statement nls WHILE expressionInPar #doWhileStmtAlt
+ ;
+
+continueStatement
+ : CONTINUE
+ identifier?
+ ;
+
+breakStatement
+ : BREAK
+ identifier?
+ ;
+
+tryCatchStatement
+ : TRY resources? nls block
+ (nls catchClause)*
+ (nls finallyBlock)?
+ ;
+
+assertStatement
+ : ASSERT ce=expression (nls (COLON | COMMA) nls me=expression)?
+ ;
+
+statement
+ : block #blockStmtAlt
+ | conditionalStatement #conditionalStmtAlt
+ | loopStatement #loopStmtAlt
+
+ | tryCatchStatement #tryCatchStmtAlt
+
+ | SYNCHRONIZED expressionInPar nls block #synchronizedStmtAlt
+ | RETURN expression? #returnStmtAlt
+ | THROW expression #throwStmtAlt
+
+ | breakStatement #breakStmtAlt
+ | continueStatement #continueStmtAlt
+
+ | identifier COLON nls statement #labeledStmtAlt
+
+ | assertStatement #assertStmtAlt
+
+ | localVariableDeclaration #localVariableDeclarationStmtAlt
+
+ // validate the method in the AstBuilder#visitMethodDeclaration, e.g. method without method body is not allowed
+ | { !SemanticPredicates.isInvalidMethodDeclaration(_input) }?
+ methodDeclaration[3, 9] #methodDeclarationStmtAlt
+
+ | statementExpression #expressionStmtAlt
+
+ | SEMI #emptyStmtAlt
+ ;
+
+catchClause
+ : CATCH LPAREN variableModifiersOpt catchType? identifier rparen nls block
+ ;
+
+catchType
+ : qualifiedClassName (BITOR qualifiedClassName)*
+ ;
+
+finallyBlock
+ : FINALLY nls block
+ ;
+
+resources
+ : LPAREN nls resourceList sep? rparen
+ ;
+
+resourceList
+ : resource (sep resource)*
+ ;
+
+resource
+ : localVariableDeclaration
+ | expression
+ ;
+
+
+/** Matches cases then statements, both of which are mandatory.
+ * To handle empty cases at the end, we add switchLabel* to statement.
+ */
+switchBlockStatementGroup
+ : (switchLabel nls)+ blockStatements
+ ;
+
+switchLabel
+ : CASE expression COLON
+ | DEFAULT COLON
+ ;
+
+forControl
+ : enhancedForControl
+ | classicalForControl
+ ;
+
+enhancedForControl
+ : variableModifiersOpt type? variableDeclaratorId (COLON | IN) expression
+ ;
+
+classicalForControl
+ : forInit? SEMI expression? SEMI forUpdate?
+ ;
+
+forInit
+ : localVariableDeclaration
+ | expressionList[false]
+ ;
+
+forUpdate
+ : expressionList[false]
+ ;
+
+
+// EXPRESSIONS
+
+castParExpression
+ : LPAREN type rparen
+ ;
+
+parExpression
+ : expressionInPar
+ ;
+
+expressionInPar
+ : LPAREN enhancedStatementExpression rparen
+ ;
+
+expressionList[boolean canSpread]
+ : expressionListElement[$canSpread] (COMMA expressionListElement[$canSpread])*
+ ;
+
+expressionListElement[boolean canSpread]
+ : ( MUL { require($canSpread, "spread operator is not allowed here", -1); }
+ |
+ ) expression
+ ;
+
+enhancedStatementExpression
+ : statementExpression
+ | standardLambdaExpression
+ ;
+
+statementExpression
+ : commandExpression #commandExprAlt
+ ;
+
+postfixExpression
+ : pathExpression op=(INC | DEC)?
+ ;
+
+expression
+ // qualified names, array expressions, method invocation, post inc/dec, type casting (level 1)
+ // The cast expression must be put before pathExpression to resovle the ambiguities between type casting and call on parentheses expression, e.g. (int)(1 / 2)
+ : castParExpression castOperandExpression #castExprAlt
+ | postfixExpression #postfixExprAlt
+
+ // ~(BNOT)/!(LNOT) (level 1)
+ | (BITNOT | NOT) nls expression #unaryNotExprAlt
+
+ // math power operator (**) (level 2)
+ | left=expression op=POWER nls right=expression #powerExprAlt
+
+ // ++(prefix)/--(prefix)/+(unary)/-(unary) (level 3)
+ | op=(INC | DEC | ADD | SUB) expression #unaryAddExprAlt
+
+ // multiplication/division/modulo (level 4)
+ | left=expression nls op=(MUL | DIV | MOD) nls right=expression #multiplicativeExprAlt
+
+ // binary addition/subtraction (level 5)
+ | left=expression op=(ADD | SUB) nls right=expression #additiveExprAlt
+
+ // bit shift expressions (level 6)
+ | left=expression nls
+ ( ( dlOp=LT LT
+ | tgOp=GT GT GT
+ | dgOp=GT GT
+ )
+ | rangeOp=( RANGE_INCLUSIVE
+ | RANGE_EXCLUSIVE
+ )
+ ) nls
+ right=expression #shiftExprAlt
+
+ // boolean relational expressions (level 7)
+ | left=expression nls op=(AS | INSTANCEOF | NOT_INSTANCEOF) nls type #relationalExprAlt
+ | left=expression nls op=(LE | GE | GT | LT | IN | NOT_IN) nls right=expression #relationalExprAlt
+
+ // equality/inequality (==/!=) (level 8)
+ | left=expression nls
+ op=( IDENTICAL
+ | NOT_IDENTICAL
+ | EQUAL
+ | NOTEQUAL
+ | SPACESHIP
+ ) nls
+ right=expression #equalityExprAlt
+
+ // regex find and match (=~ and ==~) (level 8.5)
+ // jez: moved =~ closer to precedence of == etc, as...
+ // 'if (foo =~ "a.c")' is very close in intent to 'if (foo == "abc")'
+ | left=expression nls op=(REGEX_FIND | REGEX_MATCH) nls right=expression #regexExprAlt
+
+ // bitwise or non-short-circuiting and (&) (level 9)
+ | left=expression nls op=BITAND nls right=expression #andExprAlt
+
+ // exclusive or (^) (level 10)
+ | left=expression nls op=XOR nls right=expression #exclusiveOrExprAlt
+
+ // bitwise or non-short-circuiting or (|) (level 11)
+ | left=expression nls op=BITOR nls right=expression #inclusiveOrExprAlt
+
+ // logical and (&&) (level 12)
+ | left=expression nls op=AND nls right=expression #logicalAndExprAlt
+
+ // logical or (||) (level 13)
+ | left=expression nls op=OR nls right=expression #logicalOrExprAlt
+
+ // conditional test (level 14)
+ | con=expression nls
+ ( QUESTION nls tb=expression nls COLON nls
+ | ELVIS nls
+ )
+ fb=expression #conditionalExprAlt
+
+ // assignment expression (level 15)
+ // "(a) = [1]" is a special case of multipleAssignmentExprAlt, it will be handle by assignmentExprAlt
+ | left=variableNames nls op=ASSIGN nls right=statementExpression #multipleAssignmentExprAlt
+ | left=expression nls
+ op=( ASSIGN
+ | ADD_ASSIGN
+ | SUB_ASSIGN
+ | MUL_ASSIGN
+ | DIV_ASSIGN
+ | AND_ASSIGN
+ | OR_ASSIGN
+ | XOR_ASSIGN
+ | RSHIFT_ASSIGN
+ | URSHIFT_ASSIGN
+ | LSHIFT_ASSIGN
+ | MOD_ASSIGN
+ | POWER_ASSIGN
+ | ELVIS_ASSIGN
+ ) nls
+ enhancedStatementExpression #assignmentExprAlt
+ ;
+
+
+castOperandExpression
+options { baseContext = expression; }
+ : castParExpression castOperandExpression #castExprAlt
+ | postfixExpression #postfixExprAlt
+ // ~(BNOT)/!(LNOT) (level 1)
+ | (BITNOT | NOT) nls castOperandExpression #unaryNotExprAlt
+ // ++(prefix)/--(prefix)/+(unary)/-(unary) (level 3)
+ | op=(INC | DEC | ADD | SUB) castOperandExpression #unaryAddExprAlt
+ ;
+
+
+/*
+enhancedExpression
+ : expression
+ | standardLambdaExpression
+ ;
+*/
+
+commandExpression
+ : expression
+ (
+ { !SemanticPredicates.isFollowingArgumentsOrClosure($expression.ctx) }?
+ argumentList
+ |
+ /* if pathExpression is a method call, no need to have any more arguments */
+ )
+
+ commandArgument*
+ ;
+
+commandArgument
+ : primary
+ // what follows is either a normal argument, parens,
+ // an appended block, an index operation, or nothing
+ // parens (a b already processed):
+ // a b c() d e -> a(b).c().d(e)
+ // a b c()() d e -> a(b).c().call().d(e)
+ // index (a b already processed):
+ // a b c[x] d e -> a(b).c[x].d(e)
+ // a b c[x][y] d e -> a(b).c[x][y].d(e)
+ // block (a b already processed):
+ // a b c {x} d e -> a(b).c({x}).d(e)
+ //
+ // parens/block completes method call
+ // index makes method call to property get with index
+ //
+ ( pathElement+
+ | argumentList
+ )?
+ ;
+
+/**
+ * A "path expression" is a name or other primary, possibly qualified by various
+ * forms of dot, and/or followed by various kinds of brackets.
+ * It can be used for value or assigned to, or else further qualified, indexed, or called.
+ * It is called a "path" because it looks like a linear path through a data structure.
+ * Examples: x.y, x?.y, x*.y, x.@y; x[], x[y], x[y,z]; x(), x(y), x(y,z); x{s}; a.b[n].c(x).d{s}
+ * (Compare to a C lvalue, or LeftHandSide in the JLS section 15.26.)
+ * General expressions are built up from path expressions, using operators like '+' and '='.
+ *
+ * t 0: primary, 1: namePart, 2: arguments, 3: closureOrLambdaExpression, 4: indexPropertyArgs, 5: namedPropertyArgs,
+ * 6: non-static inner class creator
+ */
+pathExpression returns [int t]
+ : primary (pathElement { $t = $pathElement.t; })*
+ ;
+
+pathElement returns [int t]
+ : nls
+
+ // AT: foo.@bar selects the field (or attribute), not property
+ (
+ ( DOT // The all-powerful dot.
+ | SPREAD_DOT // Spread operator: x*.y === x?.collect{it.y}
+ | SAFE_DOT // Optional-null operator: x?.y === (x==null)?null:x.y
+ | SAFE_CHAIN_DOT // Optional-null chain operator: x??.y.z === x?.y?.z
+ ) nls (AT | nonWildcardTypeArguments)?
+ |
+ METHOD_POINTER nls // Method pointer operator: foo.&y == foo.metaClass.getMethodPointer(foo, "y")
+ |
+ METHOD_REFERENCE nls // Method reference: System.out::println
+ )
+ namePart
+ { $t = 1; }
+ |
+ nls DOT nls NEW creator[1]
+ { $t = 6; }
+ | arguments
+ { $t = 2; }
+
+ // Can always append a block, as foo{bar}
+ | nls closureOrLambdaExpression
+ { $t = 3; }
+
+ // Element selection is always an option, too.
+ // In Groovy, the stuff between brackets is a general argument list,
+ // since the bracket operator is transformed into a method call.
+ | indexPropertyArgs
+ { $t = 4; }
+
+ | namedPropertyArgs
+ { $t = 5; }
+ ;
+
+/**
+ * This is the grammar for what can follow a dot: x.a, x.@a, x.&a, x.'a', etc.
+ */
+namePart
+ :
+ ( identifier
+
+ // foo.'bar' is in all ways same as foo.bar, except that bar can have an arbitrary spelling
+ | stringLiteral
+
+ | dynamicMemberName
+
+ /* just a PROPOSAL, which has not been implemented yet!
+ // PROPOSAL, DECIDE: Is this inline form of the 'with' statement useful?
+ // Definition: a.{foo} === {with(a) {foo}}
+ // May cover some path expression use-cases previously handled by dynamic scoping (closure delegates).
+ | block
+ */
+
+ // let's allow common keywords as property names
+ | keywords
+ )
+ ;
+
+/**
+ * If a dot is followed by a parenthesized or quoted expression, the member is computed dynamically,
+ * and the member selection is done only at runtime. This forces a statically unchecked member access.
+ */
+dynamicMemberName
+ : parExpression
+ | gstring
+ ;
+
+/** An expression may be followed by [...].
+ * Unlike Java, these brackets may contain a general argument list,
+ * which is passed to the array element operator, which can make of it what it wants.
+ * The brackets may also be empty, as in T[]. This is how Groovy names array types.
+ */
+indexPropertyArgs
+ : QUESTION? LBRACK expressionList[true]? RBRACK
+ ;
+
+namedPropertyArgs
+ : QUESTION? LBRACK (mapEntryList | COLON) RBRACK
+ ;
+
+primary
+ :
+ // Append `typeArguments?` to `identifier` to support constructor reference with generics, e.g. HashMap::new
+ // Though this is not a graceful solution, it is much faster than replacing `builtInType` with `type`
+ identifier typeArguments? #identifierPrmrAlt
+ | literal #literalPrmrAlt
+ | gstring #gstringPrmrAlt
+ | NEW nls creator[0] #newPrmrAlt
+ | THIS #thisPrmrAlt
+ | SUPER #superPrmrAlt
+ | parExpression #parenPrmrAlt
+ | closureOrLambdaExpression #closureOrLambdaExpressionPrmrAlt
+ | list #listPrmrAlt
+ | map #mapPrmrAlt
+ | builtInType #builtInTypePrmrAlt
+ ;
+
+list
+ : LBRACK expressionList[true]? COMMA? RBRACK
+ ;
+
+map
+ : LBRACK
+ ( mapEntryList COMMA?
+ | COLON
+ )
+ RBRACK
+ ;
+
+mapEntryList
+ : mapEntry (COMMA mapEntry)*
+ ;
+
+mapEntry
+ : mapEntryLabel COLON nls expression
+ | MUL COLON nls expression
+ ;
+
+mapEntryLabel
+ : keywords
+ | primary
+ ;
+
+/**
+ * t 0: general creation; 1: non-static inner class creation
+ */
+creator[int t]
+ : createdName
+ ( {0 == $t || 1 == $t}? nls arguments anonymousInnerClassDeclaration[0]?
+ | {0 == $t}? (annotationsOpt LBRACK expression RBRACK)+ dimsOpt
+ | {0 == $t}? dims nls arrayInitializer
+ )
+ ;
+
+arrayInitializer
+ : LBRACE nls variableInitializers? nls RBRACE
+ ;
+
+/**
+ * t 0: anonymous inner class; 1: anonymous enum
+ */
+anonymousInnerClassDeclaration[int t]
+ : classBody[0]
+ ;
+
+createdName
+ : annotationsOpt
+ ( primitiveType
+ | qualifiedClassName typeArgumentsOrDiamond?
+ )
+ ;
+
+nonWildcardTypeArguments
+ : LT nls typeList nls GT
+ ;
+
+typeArgumentsOrDiamond
+ : LT GT
+ | typeArguments
+ ;
+
+arguments
+ : LPAREN enhancedArgumentList? COMMA? rparen
+ ;
+
+argumentList
+options { baseContext = enhancedArgumentList; }
+ : argumentListElement
+ ( COMMA nls
+ argumentListElement
+ )*
+ ;
+
+enhancedArgumentList
+ : enhancedArgumentListElement
+ ( COMMA nls
+ enhancedArgumentListElement
+ )*
+ ;
+
+argumentListElement
+options { baseContext = enhancedArgumentListElement; }
+ : expressionListElement[true]
+ | mapEntry
+ ;
+
+enhancedArgumentListElement
+ : expressionListElement[true]
+ | standardLambdaExpression
+ | mapEntry
+ ;
+
+stringLiteral
+ : StringLiteral
+ ;
+
+className
+ : CapitalizedIdentifier
+ ;
+
+identifier
+ : Identifier
+ | CapitalizedIdentifier
+ | VAR
+ |
+ // if 'static' followed by DOT, we can treat them as identifiers, e.g. static.unused = { -> }
+ { DOT == _input.LT(2).getType() }?
+ STATIC
+ | IN
+// | DEF
+ | TRAIT
+ | AS
+ ;
+
+builtInType
+ : BuiltInPrimitiveType
+ | VOID
+ ;
+
+keywords
+ : ABSTRACT
+ | AS
+ | ASSERT
+ | BREAK
+ | CASE
+ | CATCH
+ | CLASS
+ | CONST
+ | CONTINUE
+ | DEF
+ | DEFAULT
+ | DO
+ | ELSE
+ | ENUM
+ | EXTENDS
+ | FINAL
+ | FINALLY
+ | FOR
+ | GOTO
+ | IF
+ | IMPLEMENTS
+ | IMPORT
+ | IN
+ | INSTANCEOF
+ | INTERFACE
+ | NATIVE
+ | NEW
+ | PACKAGE
+ | RETURN
+ | STATIC
+ | STRICTFP
+ | SUPER
+ | SWITCH
+ | SYNCHRONIZED
+ | THIS
+ | THROW
+ | THROWS
+ | TRANSIENT
+ | TRAIT
+ | THREADSAFE
+ | TRY
+ | VAR
+ | VOLATILE
+ | WHILE
+
+ | NullLiteral
+ | BooleanLiteral
+
+ | BuiltInPrimitiveType
+ | VOID
+
+ | PUBLIC
+ | PROTECTED
+ | PRIVATE
+ ;
+
+rparen
+ : RPAREN
+ |
+ // !!!Error Alternative, impact the performance of parsing
+ { require(false, "Missing ')'"); }
+ ;
+
+nls
+ : NL*
+ ;
+
+sep : (NL | SEMI)+
+ ;
diff --git a/src/it/java/org/antlr/bazel/Antlr3Test.java b/src/it/java/org/antlr/bazel/Antlr3Test.java
index c76234b..6ca5423 100644
--- a/src/it/java/org/antlr/bazel/Antlr3Test.java
+++ b/src/it/java/org/antlr/bazel/Antlr3Test.java
@@ -177,6 +177,29 @@ public void singleError() throws Exception
}
+ @Test
+ public void objC() throws Exception
+ {
+ try (TestProject project = TestProject.create("examples/antlr3/ObjC"))
+ {
+ AntlrRules.create(project.root())
+ .srcjar(project.srcjar().toString())
+ .version("3")
+ .classpath(classpath())
+ .outputDirectory(project.outputDirectory().toString())
+ .grammars(project.grammars())
+ .args(project.args())
+ .generate();
+
+ project.validate("SimpleCLexer.h",
+ "SimpleCLexer.m",
+ "SimpleCParser.h",
+ "SimpleCParser.m",
+ "SimpleC.tokens");
+ }
+ }
+
+
private String[] classpath() throws Exception
{
Path root = Paths.get(System.getenv().get("RUNFILES_DIR"));
diff --git a/src/it/java/org/antlr/bazel/BUILD b/src/it/java/org/antlr/bazel/BUILD
index cc5faec..1064c49 100644
--- a/src/it/java/org/antlr/bazel/BUILD
+++ b/src/it/java/org/antlr/bazel/BUILD
@@ -1,18 +1,23 @@
load("@rules_java//java:defs.bzl", "java_library")
load("//tools:gen_test_rules.bzl", "java_tests")
-filegroup(
+java_library(
name = "support",
srcs = glob(
["*.java"],
exclude = ["*Test.java"],
),
+ javacopts = ["--release 11"],
+ deps = [
+ "//src/test/java/org/antlr/bazel:tests",
+ "@junit//jar",
+ ],
)
java_library(
name = "antlr2_tests",
testonly = True,
- srcs = glob(["Antlr2Test.java"]) + [":support"],
+ srcs = glob(["Antlr2Test.java"]),
data = [
"//:srcs",
"//antlr:srcs",
@@ -22,6 +27,7 @@ java_library(
"@examples//antlr2:srcs",
],
deps = [
+ ":support",
"//src/main/java/org/antlr/bazel",
"//src/test/java/org/antlr/bazel:tests",
"@antlr2//jar",
@@ -41,7 +47,7 @@ java_tests(
java_library(
name = "antlr3_tests",
testonly = True,
- srcs = glob(["Antlr3Test.java"]) + [":support"],
+ srcs = glob(["Antlr3Test.java"]),
data = [
"//:srcs",
"//antlr:srcs",
@@ -50,6 +56,7 @@ java_library(
"@examples//antlr3:srcs",
],
deps = [
+ ":support",
"//src/main/java/org/antlr/bazel",
"//src/test/java/org/antlr/bazel:tests",
"@antlr3_runtime//jar",
@@ -71,7 +78,7 @@ java_tests(
java_library(
name = "antlr4_tests",
testonly = True,
- srcs = glob(["Antlr4Test.java"]) + [":support"],
+ srcs = glob(["Antlr4Test.java"]),
data = [
"//:srcs",
"//antlr:srcs",
@@ -80,6 +87,7 @@ java_library(
"@examples//antlr4:srcs",
],
deps = [
+ ":support",
"//src/main/java/org/antlr/bazel",
"//src/test/java/org/antlr/bazel:tests",
"@antlr3_runtime//jar",
@@ -103,7 +111,7 @@ java_tests(
java_library(
name = "repository_tests",
testonly = True,
- srcs = glob(["RepositoriesTest.java"]) + [":support"],
+ srcs = glob(["RepositoriesTest.java"]),
data = [
"//:srcs",
"//antlr:srcs",
@@ -113,6 +121,7 @@ java_library(
"@examples//antlr2:srcs",
],
deps = [
+ ":support",
"//src/main/java/org/antlr/bazel",
"//src/test/java/org/antlr/bazel:tests",
"@antlr2//jar",
diff --git a/src/main/java/org/antlr/bazel/AntlrRules.java b/src/main/java/org/antlr/bazel/AntlrRules.java
index 68768a1..decd3f5 100644
--- a/src/main/java/org/antlr/bazel/AntlrRules.java
+++ b/src/main/java/org/antlr/bazel/AntlrRules.java
@@ -38,6 +38,11 @@
*/
public class AntlrRules
{
+ private final static CopyOption[] COPY_OPTIONS = {
+ StandardCopyOption.COPY_ATTRIBUTES,
+ StandardCopyOption.REPLACE_EXISTING
+ };
+
private String[] args;
private String[] classpath;
private Charset encoding = Charset.defaultCharset();
@@ -50,6 +55,7 @@ public class AntlrRules
private Path outputDirectory;
private final Path sandbox;
private Path srcjar;
+ private String target;
private Version version;
private Output output;
private boolean split = true;
@@ -88,6 +94,7 @@ public static void main(String[] args) throws Exception
.namespace(env.get("PACKAGE_NAME"))
.language(env.get("TARGET_LANGUAGE"))
.layout(env.get("DIRECTORY_LAYOUT"))
+ .target(env.get("TARGET"))
.args(args)
.generate();
}
@@ -128,6 +135,8 @@ AntlrRules encoding(String encoding)
void generate() throws Exception
{
+ expandSrcJarImports();
+
Map> namespaces = groupByNamespace(grammars);
// use reflection so we are not tied to a specific ANTLR version
@@ -187,28 +196,15 @@ void generate() throws Exception
Path other = Files.createDirectories(
outputDirectory
.getParent()
- .resolve(
- outputDirectory
- .getFileName()
- .toString()
- .replace(".cc", ".antlr")
- .replace(".go", ".antlr")));
+ .resolve(target + ".antlr"));
Path headers = Files.createDirectories(
outputDirectory
.getParent()
- .resolve(
- outputDirectory
- .getFileName()
- .toString()
- .replace(".cc", ".inc")));
+ .resolve(target + ".inc"));
Path includes = Files.createDirectories(
outputDirectory
.getParent()
- .resolve(
- outputDirectory
- .getFileName()
- .toString()
- .replace(".cc", ".inc")));
+ .resolve(target + ".inc"));
Files.createDirectories(includes);
List files = new ArrayList<>();
@@ -218,9 +214,9 @@ void generate() throws Exception
PathMatcher expanded = outputDirectory.getFileSystem()
.getPathMatcher("glob:**/expanded*.g");
PathMatcher csources = outputDirectory.getFileSystem()
- .getPathMatcher("glob:**.{c,cc,cpp,cxx,c++,C}");
+ .getPathMatcher("glob:**.{c,cc,cpp,cxx,c++,C,m,mm}");
PathMatcher cheaders = outputDirectory.getFileSystem()
- .getPathMatcher("glob:**.{h,hh,hpp,hxx,inc,inl,H}");
+ .getPathMatcher("glob:**.{h,hh,hpp,hxx,h++,inc,inl,ipp,pch,tlh,tli,H}");
PathMatcher gosources = outputDirectory.getFileSystem()
.getPathMatcher("glob:**.{go}");
@@ -259,6 +255,7 @@ void generate() throws Exception
{
case C :
case CPP :
+ case OBJC:
{
if (cheaders.matches(entry))
{
@@ -269,7 +266,6 @@ void generate() throws Exception
.resolve(entry.getFileName());
Files.createDirectories(target.getParent());
Files.move(entry, target);
-
continue;
}
}
@@ -326,12 +322,6 @@ else if (!csources.matches(entry))
Files.walkFileTree(outputDirectory, new SimpleFileVisitor()
{
- CopyOption[] options =
- {
- StandardCopyOption.COPY_ATTRIBUTES,
- StandardCopyOption.REPLACE_EXISTING
- };
-
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attr)
throws IOException
@@ -368,7 +358,7 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attr)
}
Files.createDirectories(target.getParent());
- Files.copy(file, target, options);
+ Files.copy(file, target, COPY_OPTIONS);
return CONTINUE;
}
@@ -428,7 +418,17 @@ AntlrRules outputDirectory(String directory)
AntlrRules srcjar(String srcjar)
{
this.srcjar = sandbox.resolve(srcjar);
- this.output = srcjar.isBlank() ? Output.FOLDER : Output.SRCJAR;
+ this.output = srcjar.trim().isEmpty() ? Output.FOLDER : Output.SRCJAR;
+
+ return this;
+ }
+
+
+ AntlrRules target(String target)
+ {
+ if (target == null) throw new NullPointerException("target must not be null");
+
+ this.target = target;
return this;
}
@@ -539,6 +539,52 @@ private URLClassLoader classloader(String[] classpath) throws IOException
}
+ private void expandSrcJarImports() throws IOException
+ {
+ for (int i = 0; i < args.length; i++)
+ {
+ // ANTLR can't handle imports in an archive. We therefore expand it and alter
+ // the lib path accordingly
+ if (args[i].equals("-lib") && args[i + 1].endsWith(".srcjar"))
+ {
+ Path srcjar = sandbox.resolve(args[i + 1]);
+ URI uri = URI.create("jar:file:" + srcjar.toUri().getPath());
+
+ try (FileSystem fs = FileSystems.newFileSystem(uri, new HashMap()))
+ {
+ Path root = fs.getPath("/");
+ Path target = sandbox.resolve(this.target + ".imports");
+
+ Files.createDirectories(target);
+ Files.walkFileTree(root, new SimpleFileVisitor()
+ {
+ @Override
+ public FileVisitResult preVisitDirectory(Path dir,
+ BasicFileAttributes attrs) throws IOException
+ {
+ Files.createDirectories(target.resolve(root.relativize(dir).toString()));
+
+ return FileVisitResult.CONTINUE;
+ }
+
+
+ @Override
+ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
+ throws IOException
+ {
+ Files.copy(file, target.resolve(file.getFileName().toString()), COPY_OPTIONS);
+
+ return FileVisitResult.CONTINUE;
+ }
+ });
+
+ args[i + 1] = sandbox.relativize(target).toString();
+ }
+ }
+ }
+ }
+
+
/**
* Finds the grammar that corresponds to the given generated file.
*
diff --git a/src/main/java/org/antlr/bazel/Language.java b/src/main/java/org/antlr/bazel/Language.java
index ca85b6f..050a8f3 100644
--- a/src/main/java/org/antlr/bazel/Language.java
+++ b/src/main/java/org/antlr/bazel/Language.java
@@ -541,7 +541,8 @@ public static Language of(String name)
*/
private static String header(String grammar)
{
- Matcher header = HEADER.matcher(grammar);
+ // remove comments to avoid erroneous matches
+ Matcher header = HEADER.matcher(grammar.replaceAll("(?m://.*$)|(?s:/\\*.*?\\*/)",""));
return header.find() ? header.group(1) : null;
}
diff --git a/src/test/java/org/antlr/bazel/LanguageTest.java b/src/test/java/org/antlr/bazel/LanguageTest.java
index 763e913..0f77406 100644
--- a/src/test/java/org/antlr/bazel/LanguageTest.java
+++ b/src/test/java/org/antlr/bazel/LanguageTest.java
@@ -116,6 +116,9 @@ public void namespace()
JAVA.detectNamespace("header {package foo.bar;}").toString());
assertEquals("foo.bar",
JAVA.detectNamespace("header {package\nfoo.bar;}").toString());
+ assertEquals("org.antlr.v4.parse", JAVA.detectNamespace("// @header test { comment }\n@lexer::header {\npackage org.antlr.v4.parse;\n}").toString());
+ assertEquals("org.antlr.v4.parse", JAVA.detectNamespace("/* @header test { comment } */\n@lexer::header {\npackage org.antlr.v4.parse;\n}").toString());
+ assertEquals("org.antlr.v4.parse", JAVA.detectNamespace("/*\n * @header test { comment }\n */\n@lexer::header {\npackage org.antlr.v4.parse;\n}").toString());
assertEquals(null, OBJC.detectNamespace(""));