Skip to content

Commit

Permalink
Fix source locations with --no-preprocessor and add a virtual file …
Browse files Browse the repository at this point in the history
…system layer to the parser (#2377)

* Add a virtual file system layer to the parser

The default virtual file system used by the parser is the real
file system. And it's possible to initialize the parser driver
with a different virtual file system.

The Overlay file system is a stack of virtual file systems.

* Fix source locations with `--no-preprocessor`.

Source location were off by one line.

The source location on the first line had an uninitialized column
number.

Error messages would point to the wrong column when the location is
after consecutive non-leading whitespaces.
  • Loading branch information
quentin authored Jan 16, 2023
1 parent 4ee37c0 commit 84878b9
Show file tree
Hide file tree
Showing 32 changed files with 677 additions and 84 deletions.
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ set(SOUFFLE_SOURCES
parser/ParserDriver.cpp
parser/ParserUtils.cpp
parser/SrcLocation.cpp
parser/VirtualFileSystem.cpp
ram/Node.cpp
ram/TranslationUnit.cpp
ram/analysis/Complexity.cpp
Expand Down
22 changes: 20 additions & 2 deletions src/MainDriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ class InputProvider {
virtual ~InputProvider() {}
virtual FILE* getInputStream() = 0;
virtual bool endInput() = 0;
virtual bool reducedConsecutiveNonLeadingWhitespaces() const = 0;
};

class FileInput : public InputProvider {
Expand Down Expand Up @@ -257,6 +258,10 @@ class FileInput : public InputProvider {
}
}

bool reducedConsecutiveNonLeadingWhitespaces() const override {
return false;
}

private:
const std::filesystem::path Path;
FILE* Stream = nullptr;
Expand Down Expand Up @@ -331,6 +336,10 @@ class PreprocInput : public InputProvider {
return true;
}

virtual bool reducedConsecutiveNonLeadingWhitespaces() const override {
return true;
}

static bool available(const std::string& Exec) {
return !which(Exec).empty();
}
Expand All @@ -355,6 +364,10 @@ class GCCPreprocInput : public PreprocInput {
static bool available() {
return PreprocInput::available("gcc");
}

bool reducedConsecutiveNonLeadingWhitespaces() const override {
return true;
}
};

class MCPPPreprocInput : public PreprocInput {
Expand All @@ -367,6 +380,10 @@ class MCPPPreprocInput : public PreprocInput {
static bool available() {
return PreprocInput::available("mcpp");
}

bool reducedConsecutiveNonLeadingWhitespaces() const override {
return true;
}
};

static WarnSet process_warn_opts(const Global& glb) {
Expand Down Expand Up @@ -852,8 +869,9 @@ int main(Global& glb, const char* souffle_executable) {
ErrorReport errReport(process_warn_opts(glb));

DebugReport debugReport(glb);
Own<ast::TranslationUnit> astTranslationUnit = ParserDriver::parseTranslationUnit(
glb, InputPath.string(), Input->getInputStream(), errReport, debugReport);
Own<ast::TranslationUnit> astTranslationUnit =
ParserDriver::parseTranslationUnit(glb, InputPath.u8string(), Input->getInputStream(),
Input->reducedConsecutiveNonLeadingWhitespaces(), errReport, debugReport);
Input->endInput();

/* Report run-time of the parser if verbose flag is set */
Expand Down
141 changes: 116 additions & 25 deletions src/parser/ParserDriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,28 +39,90 @@
#include <memory>
#include <utility>
#include <vector>
#ifndef _MSC_VER
#include <unistd.h>
#endif

using YY_BUFFER_STATE = struct yy_buffer_state*;
extern YY_BUFFER_STATE yy_scan_string(const char*, yyscan_t scanner);
extern YY_BUFFER_STATE yy_create_buffer(FILE* file, int size, yyscan_t yyscanner);
extern void yy_switch_to_buffer(YY_BUFFER_STATE new_buffer, yyscan_t yyscanner);
extern void yypush_buffer_state(YY_BUFFER_STATE new_buffer, yyscan_t yyscanner);
extern int yylex_destroy(yyscan_t scanner);
extern int yylex_init_extra(ScannerInfo* data, yyscan_t* scanner);
extern void yyset_in(FILE* in_str, yyscan_t scanner);
extern void yyset_debug(int, yyscan_t scanner);

namespace souffle {

Own<ast::TranslationUnit> ParserDriver::parse(
const std::string& filename, FILE* in, ErrorReport& errorReport, DebugReport& debugReport) {
ParserDriver::ParserDriver(Global& g) : glb(g) {
vfs = std::make_shared<RealFileSystem>();
}

ParserDriver::ParserDriver(Global& g, std::shared_ptr<FileSystem> fs) : glb(g) {
if (fs) {
vfs = fs;
} else {
vfs = std::make_shared<RealFileSystem>();
}
}

Own<ast::TranslationUnit> ParserDriver::parse(const std::string& filename, FILE* in,
bool reducedConsecutiveNonLeadingWhitespaces, ErrorReport& errorReport, DebugReport& debugReport) {
translationUnit = mk<ast::TranslationUnit>(glb, mk<ast::Program>(), errorReport, debugReport);
yyscan_t scanner;
ScannerInfo data(vfs);
SrcLocation emptyLoc;

std::filesystem::path filePath(filename);
if (vfs->exists(filename)) {
std::error_code ec;
filePath = vfs->canonical(filename, ec);
}
data.push(filePath, emptyLoc, reducedConsecutiveNonLeadingWhitespaces);

yylex_init_extra(&data, &scanner);
yyset_debug(0, scanner);

auto state = yy_create_buffer(in, 32768, scanner);
yy_switch_to_buffer(state, scanner);

yy::parser parser(*this, scanner);
parser.parse();

yylex_destroy(scanner);

return std::move(translationUnit);
}

Own<ast::TranslationUnit> ParserDriver::parseFromFS(
const std::filesystem::path& path, ErrorReport& errorReport, DebugReport& debugReport) {
translationUnit = mk<ast::TranslationUnit>(glb, mk<ast::Program>(), errorReport, debugReport);
yyscan_t scanner;
ScannerInfo data;
ScannerInfo data(vfs);
SrcLocation emptyLoc;
data.push(std::filesystem::weakly_canonical(filename).string(), emptyLoc);

std::filesystem::path filePath(path);
if (!vfs->exists(filePath)) {
throw std::runtime_error(std::string("File does not exist: ") + filePath.string());
}

std::error_code ec;
filePath = vfs->canonical(filePath, ec);

yylex_init_extra(&data, &scanner);
yyset_debug(0, scanner);
yyset_in(in, scanner);

auto code = readFile(filePath, ec);
if (ec) {
throw std::runtime_error(std::string("Cannot read file: ") + filePath.string());
}

data.push(filePath, emptyLoc);

yy_scan_string(code->c_str(), scanner);

yy::parser parser(*this, scanner);

parser.parse();

yylex_destroy(scanner);
Expand All @@ -72,13 +134,17 @@ Own<ast::TranslationUnit> ParserDriver::parse(
const std::string& code, ErrorReport& errorReport, DebugReport& debugReport) {
translationUnit = mk<ast::TranslationUnit>(glb, mk<ast::Program>(), errorReport, debugReport);

ScannerInfo data;
ScannerInfo data(vfs);
SrcLocation emptyLoc;
data.push("<in-memory>", emptyLoc);
data.setReported("<in-memory>");

yyscan_t scanner;
yylex_init_extra(&data, &scanner);
yyset_debug(0, scanner);

yy_scan_string(code.c_str(), scanner);

yy::parser parser(*this, scanner);
parser.parse();

Expand All @@ -88,14 +154,22 @@ Own<ast::TranslationUnit> ParserDriver::parse(
}

Own<ast::TranslationUnit> ParserDriver::parseTranslationUnit(Global& glb, const std::string& filename,
FILE* in, ErrorReport& errorReport, DebugReport& debugReport) {
ParserDriver parser(glb);
return parser.parse(filename, in, errorReport, debugReport);
FILE* in, bool reducedConsecutiveNonLeadingWhitespaces, ErrorReport& errorReport,
DebugReport& debugReport, std::shared_ptr<FileSystem> vfs) {
ParserDriver parser(glb, vfs);
return parser.parse(filename, in, reducedConsecutiveNonLeadingWhitespaces, errorReport, debugReport);
}

Own<ast::TranslationUnit> ParserDriver::parseTranslationUnitFromFS(Global& glb,
const std::filesystem::path& path, ErrorReport& errorReport, DebugReport& debugReport,
std::shared_ptr<FileSystem> vfs) {
ParserDriver parser(glb, vfs);
return parser.parseFromFS(path, errorReport, debugReport);
}

Own<ast::TranslationUnit> ParserDriver::parseTranslationUnit(
Global& glb, const std::string& code, ErrorReport& errorReport, DebugReport& debugReport) {
ParserDriver parser(glb);
Own<ast::TranslationUnit> ParserDriver::parseTranslationUnit(Global& glb, const std::string& code,
ErrorReport& errorReport, DebugReport& debugReport, std::shared_ptr<FileSystem> vfs) {
ParserDriver parser(glb, vfs);
return parser.parse(code, errorReport, debugReport);
}

Expand Down Expand Up @@ -261,43 +335,60 @@ void ParserDriver::error(const std::string& msg) {
Diagnostic(Diagnostic::Type::ERROR, DiagnosticMessage(msg)));
}

std::unique_ptr<std::string> ParserDriver::readFile(const std::filesystem::path& path, std::error_code& ec) {
return std::make_unique<std::string>(vfs->readFile(path, ec));
}

std::optional<std::filesystem::path> ParserDriver::searchIncludePath(
const std::string& IncludeString, const SrcLocation& Loc) {
std::error_code ec;
std::filesystem::path Request(IncludeString);

if (Request.is_absolute()) {
if (std::filesystem::exists(Request)) {
return std::filesystem::canonical(Request);
if (vfs->exists(Request)) {
return vfs->canonical(Request, ec);
} else {
return std::nullopt;
}
}

// search relative from current input file
// search relative from current physical input file
std::filesystem::path Candidate = std::filesystem::path(Loc.file->Physical).parent_path() / Request;
if (std::filesystem::exists(Candidate)) {
return std::filesystem::canonical(Candidate);
if (vfs->exists(Candidate)) {
return vfs->canonical(Candidate, ec);
}

#if defined(__APPLE__)
// work-around a bug in libcxx version <= 12, std::filesystem::current_path
// writes out of bound and corrupt memory.
char* cwd = ::getcwd(nullptr, 0);
if (cwd == nullptr) {
std::cerr << "Error: cannot get current working directory.\n";
return std::nullopt;
}
const std::filesystem::path CurrentWD = std::string(cwd);
free(cwd);
#else
const std::filesystem::path CurrentWD = std::filesystem::current_path();
#endif

// search relative from include directories
for (auto&& includeDir : glb.config().getMany("include-dir")) {
auto dir = std::filesystem::path(includeDir);
if (dir.is_relative()) {
dir = (std::filesystem::current_path() / includeDir);
dir = (CurrentWD / dir);
}
if (std::filesystem::exists(dir)) {
Candidate = std::filesystem::path(dir) / Request;
if (std::filesystem::exists(Candidate)) {
return std::filesystem::canonical(Candidate);
}
Candidate = std::filesystem::path(dir) / Request;
if (vfs->exists(Candidate)) {
return vfs->canonical(Candidate, ec);
}
}

return std::nullopt;
}

bool ParserDriver::canEnterOnce(const SrcLocation& onceLoc) {
const auto Inserted = VisitedLocations.emplace(onceLoc.file->Physical, onceLoc.start.line);
const auto Inserted = VisitedOnceLocations.emplace(onceLoc.file->Physical, onceLoc.start.line);
return Inserted.second;
}

Expand Down
35 changes: 28 additions & 7 deletions src/parser/ParserDriver.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#pragma once

#include "RelationTag.h"
#include "VirtualFileSystem.h"
#include "ast/Clause.h"
#include "ast/Component.h"
#include "ast/ComponentInit.h"
Expand All @@ -43,7 +44,8 @@ namespace souffle {

class ParserDriver {
public:
ParserDriver(Global& g) : glb(g) {}
ParserDriver(Global& g);
ParserDriver(Global& g, std::shared_ptr<FileSystem> fs);
virtual ~ParserDriver() = default;

void addRelation(Own<ast::Relation> r);
Expand All @@ -67,36 +69,55 @@ class ParserDriver {

Own<ast::Counter> addDeprecatedCounter(SrcLocation tagLoc);

Own<ast::TranslationUnit> parse(
const std::string& filename, FILE* in, ErrorReport& errorReport, DebugReport& debugReport);
Own<ast::TranslationUnit> parse(const std::string& filename, FILE* in,
bool reducedConsecutiveNonLeadingWhitespaces, ErrorReport& errorReport, DebugReport& debugReport);
Own<ast::TranslationUnit> parse(
const std::string& code, ErrorReport& errorReport, DebugReport& debugReport);
Own<ast::TranslationUnit> parseFromFS(
const std::filesystem::path& path, ErrorReport& errorReport, DebugReport& debugReport);

static Own<ast::TranslationUnit> parseTranslationUnit(Global& glb, const std::string& filename, FILE* in,
ErrorReport& errorReport, DebugReport& debugReport);
static Own<ast::TranslationUnit> parseTranslationUnit(
Global& glb, const std::string& code, ErrorReport& errorReport, DebugReport& debugReport);
bool reducedConsecutiveNonLeadingWhitespaces, ErrorReport& errorReport, DebugReport& debugReport,
std::shared_ptr<FileSystem> vfs = nullptr);
static Own<ast::TranslationUnit> parseTranslationUnit(Global& glb, const std::string& code,
ErrorReport& errorReport, DebugReport& debugReport, std::shared_ptr<FileSystem> vfs = nullptr);
static Own<ast::TranslationUnit> parseTranslationUnitFromFS(Global& glb,
const std::filesystem::path& path, ErrorReport& errorReport, DebugReport& debugReport,
std::shared_ptr<FileSystem> vfs = nullptr);

void warning(const WarnType warn, const SrcLocation& loc, const std::string& msg);
void error(const SrcLocation& loc, const std::string& msg);
void error(const std::string& msg);

std::unique_ptr<std::string> readFile(const std::filesystem::path& path, std::error_code& ec);

std::optional<std::filesystem::path> searchIncludePath(
const std::string& IncludeString, const SrcLocation& IncludeLoc);

// Return true if the given source location is visited for the first time by `.once`
// and record that source location so that next calls will return false.
//
// The source location column number is non-significant.
bool canEnterOnce(const SrcLocation& onceLoc);

// Add a scanned comment.
void addComment(const SrcLocation& Loc, const std::stringstream& Content);

Own<ast::TranslationUnit> translationUnit;

bool trace_scanning = false;

std::set<std::pair<std::filesystem::path, int>> VisitedLocations;
// Canonical path and line number of location that have already been
// visited by `.once`.
std::set<std::pair<std::filesystem::path, int>> VisitedOnceLocations;

// All the scanned comments.
std::deque<std::pair<SrcLocation, std::string>> ScannedComments;

private:
Global& glb;

std::shared_ptr<FileSystem> vfs;
};

} // end of namespace souffle
Loading

0 comments on commit 84878b9

Please sign in to comment.