Skip to content

Commit

Permalink
Add support for any character in multiline comments + tests
Browse files Browse the repository at this point in the history
  • Loading branch information
matt2xu committed Apr 29, 2020
1 parent 420cf1b commit 2886c35
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 30 deletions.
53 changes: 24 additions & 29 deletions src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,43 +144,38 @@ impl<'a, R: Read> Lexer<'a, R> {
}

fn bracketed_comment(&mut self) -> Result<bool, ParserError> {
if comment_1_char!(self.lookahead_char()?) {
let c = self.skip_char()?;

if comment_2_char!(self.lookahead_char()?) {
self.skip_char()?;

loop {
let mut c = self.lookahead_char()?;
while prolog_char!(c) && !comment_2_char!(c) {
self.skip_char()?;
c = self.lookahead_char()?;
}
// we have already checked that the current lookahead_char is comment_1_char, just skip it
let c = self.skip_char()?;

if prolog_char!(c) {
self.skip_char()?;
}
if comment_2_char!(self.lookahead_char()?) {
self.skip_char()?;

// Keep reading until we find characters '*' and '/'
// Deliberately skip checks for prolog_char to allow comments to contain any characters,
// including so-called "extended characters", without having to explicitly add them to a character class.
let mut c = self.lookahead_char()?;
loop {
while !comment_2_char!(c) {
self.skip_char()?;
c = self.lookahead_char()?;

if !(prolog_char!(c) && !comment_1_char!(c)) {
break;
}
}

let c = self.lookahead_char()?;
self.skip_char()?;

if prolog_char!(c) {
self.skip_char()?;
Ok(true)
} else {
Err(ParserError::NonPrologChar(self.line_num, self.col_num))
c = self.lookahead_char()?;
if comment_1_char!(c) {
break;
}
}

if prolog_char!(c) {
self.skip_char()?;
Ok(true)
} else {
self.return_char(c);
Ok(false)
Err(ParserError::NonPrologChar(self.line_num, self.col_num))
}
} else {
self.return_char(c);
Ok(false)
}
}
Expand Down Expand Up @@ -834,11 +829,11 @@ impl<'a, R: Read> Lexer<'a, R> {
self.skip_char()?;
layout_inserted = true;
},
Ok(c) if c == '%' => {
Ok(c) if end_line_comment_char!(c) => {
self.single_line_comment()?;
layout_inserted = true;
},
Ok(c) if c == '/' =>
Ok(c) if comment_1_char!(c) =>
if self.bracketed_comment()? {
layout_inserted = true;
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ extern crate unicode_reader;
pub mod parser;
pub mod put_back_n;

mod lexer;
pub mod lexer;
5 changes: 5 additions & 0 deletions src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ macro_rules! new_line_char {
($c: expr) => ($c == '\n')
}

#[macro_export]
macro_rules! end_line_comment_char {
($c: expr) => ($c == '%')
}

#[macro_export]
macro_rules! comment_1_char {
($c: expr) => ($c == '/')
Expand Down
35 changes: 35 additions & 0 deletions tests/parse_comments.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
extern crate prolog_parser;

use prolog_parser::ast::*;
use prolog_parser::lexer::{Lexer, Token};
use prolog_parser::tabled_rc::TabledData;

use std::rc::Rc;

fn read_all_tokens(text: &str) -> Result<Vec<Token>, ParserError> {
let atom_tbl = TabledData::new(Rc::new("my_module".to_string()));
let flags = MachineFlags::default();
let mut stream = parsing_stream(text.as_bytes());
let mut lexer = Lexer::new(atom_tbl, flags, &mut stream);

let mut tokens = Vec::new();
while !lexer.eof()? {
let token = lexer.next_token()?;
tokens.push(token);
}
Ok(tokens)
}

#[test]
fn empty_multiline_comment() -> Result<(), ParserError> {
let tokens = read_all_tokens("/**/ 4\n")?;
assert_eq!(tokens, [Token::Constant(Constant::Fixnum(4))]);
Ok(())
}

#[test]
fn any_char_multiline_comment() -> Result<(), ParserError> {
let tokens = read_all_tokens("/* █╗╚═══╝ © */ 4\n")?;
assert_eq!(tokens, [Token::Constant(Constant::Fixnum(4))]);
Ok(())
}

0 comments on commit 2886c35

Please sign in to comment.