From 473b66d90d3b4d1dbda44fc45c667d8165c9989d Mon Sep 17 00:00:00 2001 From: Quentin Sabah Date: Sun, 1 May 2022 14:24:30 +0200 Subject: [PATCH] add support for include in datalog scanner - add option `--no-preprocessor` - new directive `.include "path"` - new directive `.once` - scanner treats `__FILE__` and `__LINE__`. - scanner harvests comments (for future usage) --- cmake/SouffleTests.cmake | 10 +- src/main.cpp | 219 ++++++++++++---- src/parser/ParserDriver.cpp | 38 ++- src/parser/ParserDriver.h | 21 +- src/parser/SrcLocation.cpp | 86 +++--- src/parser/SrcLocation.h | 116 +++++--- src/parser/parser.yy | 22 +- src/parser/scanner.ll | 247 ++++++++++++------ tests/syntactic/CMakeLists.txt | 16 ++ tests/syntactic/include_directive1/baz.dl | 6 + tests/syntactic/include_directive1/foo.dl | 2 + .../include_directive1/include_directive1.dl | 5 + .../include_directive1/include_directive1.err | 0 .../include_directive1/include_directive1.out | 0 .../syntactic/include_directive1/location.csv | 4 + .../include_directive2/include_directive2.dl | 2 + .../include_directive2/include_directive2.err | 4 + .../include_directive2/include_directive2.out | 0 .../include_directive3/include_directive3.dl | 7 + .../include_directive3/include_directive3.err | 0 .../include_directive3/include_directive3.out | 0 tests/syntactic/include_directive3/ok.csv | 1 + 22 files changed, 594 insertions(+), 212 deletions(-) create mode 100644 tests/syntactic/include_directive1/baz.dl create mode 100644 tests/syntactic/include_directive1/foo.dl create mode 100644 tests/syntactic/include_directive1/include_directive1.dl create mode 100644 tests/syntactic/include_directive1/include_directive1.err create mode 100644 tests/syntactic/include_directive1/include_directive1.out create mode 100644 tests/syntactic/include_directive1/location.csv create mode 100644 tests/syntactic/include_directive2/include_directive2.dl create mode 100644 tests/syntactic/include_directive2/include_directive2.err create mode 100644 tests/syntactic/include_directive2/include_directive2.out create mode 100644 tests/syntactic/include_directive3/include_directive3.dl create mode 100644 tests/syntactic/include_directive3/include_directive3.err create mode 100644 tests/syntactic/include_directive3/include_directive3.out create mode 100644 tests/syntactic/include_directive3/ok.csv diff --git a/cmake/SouffleTests.cmake b/cmake/SouffleTests.cmake index 983a209092f..148e344b064 100644 --- a/cmake/SouffleTests.cmake +++ b/cmake/SouffleTests.cmake @@ -142,7 +142,7 @@ function(SOUFFLE_RUN_TEST_HELPER) #Usually just "facts" but can be different when running multi - tests cmake_parse_arguments( PARAM - "COMPILED;FUNCTORS;NEGATIVE;MULTI_TEST" # Options + "COMPILED;FUNCTORS;NEGATIVE;MULTI_TEST;NO_PREPROCESSOR" # Options "TEST_NAME;CATEGORY;FACTS_DIR_NAME;EXTRA_DATA" #Single valued options "" ${ARGV} @@ -159,8 +159,12 @@ function(SOUFFLE_RUN_TEST_HELPER) set(SHORT_EXEC_STYLE "") endif() - if (MSVC) - list(APPEND EXTRA_FLAGS "--preprocessor" "cl -nologo -TC -E") + if (PARAM_NO_PREPROCESSOR) + list(APPEND EXTRA_FLAGS "--no-preprocessor") + else () + if (MSVC) + list(APPEND EXTRA_FLAGS "--preprocessor" "cl -nologo -TC -E") + endif() endif() if (PARAM_FUNCTORS) diff --git a/src/main.cpp b/src/main.cpp index 4139ad5a212..8f4ba085c3e 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -206,6 +206,154 @@ void compileToBinary(const std::string& command, std::string_view sourceFilename throw std::invalid_argument(tfm::format("failed to compile C++ source <%s>", sourceFilename)); } +class InputProvider { +public: + virtual ~InputProvider() {} + virtual FILE* getInputStream() = 0; + virtual bool endInput() = 0; +}; + +class FileInput : public InputProvider { +public: + FileInput(const std::filesystem::path& path) : Path(path) {} + + ~FileInput() { + if (Stream) { + fclose(Stream); + } + } + + FILE* getInputStream() override { + if (std::filesystem::exists(Path)) { + Stream = fopen(Path.string().c_str(), "rb"); + return Stream; + } else { + return nullptr; + } + } + + bool endInput() override { + if (Stream == nullptr) { + return false; + } else { + fclose(Stream); + Stream = nullptr; + return true; + } + } + +private: + const std::filesystem::path Path; + FILE* Stream = nullptr; +}; + +class PreprocInput : public InputProvider { +public: + PreprocInput(const std::filesystem::path& path, MainConfig& conf, const std::string& exec, + const std::string& options) + : Exec(which(exec)), Options(options), InitCmd(), Path(path), Conf(conf) {} + + PreprocInput(const std::filesystem::path& path, MainConfig& conf, const std::string& cmd) + : Exec(), Options(), InitCmd(cmd), Path(path), Conf(conf) {} + + virtual ~PreprocInput() { + if (Stream) { + pclose(Stream); + } + } + + FILE* getInputStream() override { + Cmd.str(""); + + if (Exec) { + if (Exec->empty()) { + return nullptr; + } + Cmd << *Exec; + } else if (InitCmd) { + Cmd << *InitCmd; + } else { + return nullptr; + } + + if (Options && !Options->empty()) { + Cmd << " "; + Cmd << *Options; + } + + Cmd << " "; + Cmd << toString(join(Conf.getMany("include-dir"), " ", + [&](auto&& os, auto&& dir) { tfm::format(os, "-I \"%s\"", dir); })); + + if (Conf.has("macro")) { + Cmd << " " << Conf.get("macro"); + } + // Add RamDomain size as a macro + Cmd << " -DRAM_DOMAIN_SIZE=" << std::to_string(RAM_DOMAIN_SIZE); + Cmd << " \"" + Path.string() + "\""; + +#if defined(_MSC_VER) + // cl.exe prints the input file name on the standard error stream, + // we must silent it in order to preserve an empty error output + // because Souffle test-suite is sensible to error outputs. + Cmd << " 2> nul"; +#endif + + Stream = popen(Cmd.str().c_str(), "r"); + return Stream; + } + + bool endInput() { + const int Status = pclose(Stream); + Stream = nullptr; + if (Status == -1) { + perror(nullptr); + throw std::runtime_error("failed to close pre-processor pipe"); + } else if (Status != 0) { + std::cerr << "Pre-processors command failed with code " << Status << ": '" << Cmd.str() << "'\n"; + throw std::runtime_error("Pre-processor command failed"); + } + return true; + } + + static bool available(const std::string& Exec) { + return !which(Exec).empty(); + } + +private: + std::optional Exec; + std::optional Options; + std::optional InitCmd; + std::filesystem::path Path; + MainConfig& Conf; + std::stringstream Cmd; + FILE* Stream = nullptr; +}; + +class GCCPreprocInput : public PreprocInput { +public: + GCCPreprocInput(const std::filesystem::path& mainSource, MainConfig& conf) + : PreprocInput(mainSource, conf, "gcc", "-x c -E") {} + + virtual ~GCCPreprocInput() {} + + static bool available() { + return PreprocInput::available("gcc"); + } +}; + +class MCPPPreprocInput : public PreprocInput { +public: + MCPPPreprocInput(const std::filesystem::path& mainSource, MainConfig& conf) + : PreprocInput(mainSource, conf, "mcpp", "-e utf8 -W0") {} + + virtual ~MCPPPreprocInput() {} + + static bool available() { + return PreprocInput::available("mcpp"); + } +}; + int main(int argc, char** argv) { /* Time taking for overall runtime */ auto souffle_start = std::chrono::high_resolution_clock::now(); @@ -298,7 +446,8 @@ int main(int argc, char** argv) { {"parse-errors", '\5', "", "", false, "Show parsing errors, if any, then exit."}, {"help", 'h', "", "", false, "Display this help message."}, {"legacy", '\6', "", "", false, "Enable legacy support."}, - {"preprocessor", '\7', "CMD", "", false, "C preprocessor to use."}}; + {"preprocessor", '\7', "CMD", "", false, "C preprocessor to use."}, + {"no-preprocessor", 10, "", "", false, "Do not use a C preprocessor."}}; Global::config().processArgs(argc, argv, header.str(), versionFooter, options); // ------ command line arguments ------------- @@ -424,44 +573,30 @@ int main(int argc, char** argv) { throw std::runtime_error("failed to determine souffle executable path"); } - /* Create the pipe to establish a communication between cpp and souffle */ - - std::string cmd; - - if (Global::config().has("preprocessor")) { - cmd = Global::config().get("preprocessor"); - } else { - cmd = which("mcpp"); - if (isExecutable(cmd)) { - cmd += " -e utf8 -W0"; - } else { - cmd = which("gcc"); - if (isExecutable(cmd)) { - cmd += " -x c -E"; + const std::filesystem::path InputPath(Global::config().get("")); + std::unique_ptr Input; + const bool use_preprocessor = !Global::config().has("no-preprocessor"); + if (use_preprocessor) { + if (Global::config().has("preprocessor")) { + auto cmd = Global::config().get("preprocessor"); + if (cmd == "gcc") { + Input = std::make_unique(InputPath, Global::config()); + } else if (cmd == "mcpp") { + Input = std::make_unique(InputPath, Global::config()); } else { - std::cerr << "failed to locate mcpp or gcc pre-processors\n"; - throw std::runtime_error("failed to locate mcpp or gcc pre-processors"); + Input = std::make_unique(InputPath, Global::config(), cmd); } + } else if (MCPPPreprocInput::available()) { // mcpp fallback + Input = std::make_unique(InputPath, Global::config()); + } else if (GCCPreprocInput::available()) { // gcc fallback + Input = std::make_unique(InputPath, Global::config()); + } else { + throw std::runtime_error("failed to locate mcpp or gcc pre-processors"); } + } else { + Input = std::make_unique(Global::config().get("")); } - cmd += " " + toString(join(Global::config().getMany("include-dir"), " ", - [&](auto&& os, auto&& dir) { tfm::format(os, "-I \"%s\"", dir); })); - - if (Global::config().has("macro")) { - cmd += " " + Global::config().get("macro"); - } - // Add RamDomain size as a macro - cmd += " -DRAM_DOMAIN_SIZE=" + std::to_string(RAM_DOMAIN_SIZE); - cmd += " \"" + Global::config().get("") + "\""; -#if defined(_MSC_VER) - // cl.exe prints the input file name on the standard error stream, - // we must silent it in order to preserve an empty error output - // because Souffle test-suite is sensible to error outputs. - cmd += " 2> nul"; -#endif - FILE* in = popen(cmd.c_str(), "r"); - /* Time taking for parsing */ auto parser_start = std::chrono::high_resolution_clock::now(); @@ -470,19 +605,9 @@ int main(int argc, char** argv) { // parse file ErrorReport errReport(Global::config().has("no-warn")); DebugReport debugReport; - Own astTranslationUnit = - ParserDriver::parseTranslationUnit("", in, errReport, debugReport); - - // close input pipe - int preprocessor_status = pclose(in); - if (preprocessor_status == -1) { - perror(nullptr); - throw std::runtime_error("failed to close pre-processor pipe"); - } else if (preprocessor_status != 0) { - std::cerr << "Pre-processors command failed with code " << preprocessor_status << ": '" << cmd - << "'\n"; - throw std::runtime_error("Pre-processor command failed"); - } + Own astTranslationUnit = ParserDriver::parseTranslationUnit( + InputPath.string(), Input->getInputStream(), errReport, debugReport); + Input->endInput(); /* Report run-time of the parser if verbose flag is set */ if (Global::config().has("verbose")) { diff --git a/src/parser/ParserDriver.cpp b/src/parser/ParserDriver.cpp index b53b5ffa0f5..7edbd21ca4d 100644 --- a/src/parser/ParserDriver.cpp +++ b/src/parser/ParserDriver.cpp @@ -54,7 +54,8 @@ Own ParserDriver::parse( translationUnit = mk(mk(), errorReport, debugReport); yyscan_t scanner; ScannerInfo data; - data.yyfilename = filename; + SrcLocation emptyLoc; + data.push(std::filesystem::weakly_canonical(filename).string(), emptyLoc); yylex_init_extra(&data, &scanner); yyset_debug(0, scanner); yyset_in(in, scanner); @@ -72,7 +73,8 @@ Own ParserDriver::parse( translationUnit = mk(mk(), errorReport, debugReport); ScannerInfo data; - data.yyfilename = ""; + SrcLocation emptyLoc; + data.push("", emptyLoc); yyscan_t scanner; yylex_init_extra(&data, &scanner); yyset_debug(0, scanner); @@ -258,4 +260,36 @@ void ParserDriver::error(const std::string& msg) { Diagnostic(Diagnostic::Type::ERROR, DiagnosticMessage(msg))); } +std::optional ParserDriver::searchIncludePath( + const std::string& IncludeString, const SrcLocation& Loc) { + std::filesystem::path Candidate(IncludeString); + + if (Candidate.is_absolute()) { + if (std::filesystem::exists(Candidate)) { + return std::filesystem::canonical(Candidate); + } else { + return std::nullopt; + } + } + + // search relative from current input file + Candidate = std::filesystem::path(Loc.file->Physical).parent_path() / IncludeString; + if (std::filesystem::exists(Candidate)) { + return std::filesystem::canonical(Candidate); + } else if (Candidate.is_absolute()) { + return std::nullopt; + } + + return std::nullopt; +} + +bool ParserDriver::canEnterOnce(const SrcLocation& onceLoc) { + const auto Inserted = VisitedLocations.emplace(onceLoc.file->Physical, onceLoc.start.line); + return Inserted.second; +} + +void ParserDriver::addComment(const SrcLocation& Loc, const std::stringstream& Content) { + ScannedComments.emplace_back(Loc, Content.str()); +} + } // end of namespace souffle diff --git a/src/parser/ParserDriver.h b/src/parser/ParserDriver.h index 9eee3ad5e7a..9e7cc08921f 100644 --- a/src/parser/ParserDriver.h +++ b/src/parser/ParserDriver.h @@ -31,7 +31,9 @@ #include "ast/Type.h" #include "parser/SrcLocation.h" #include "reports/DebugReport.h" + #include +#include #include #include #include @@ -43,8 +45,6 @@ class ParserDriver { public: virtual ~ParserDriver() = default; - Own translationUnit; - void addRelation(Own r); void addFunctorDeclaration(Own f); void addDirective(Own d); @@ -66,8 +66,6 @@ class ParserDriver { Own addDeprecatedCounter(SrcLocation tagLoc); - bool trace_scanning = false; - Own parse( const std::string& filename, FILE* in, ErrorReport& errorReport, DebugReport& debugReport); Own parse( @@ -80,6 +78,21 @@ class ParserDriver { void warning(const SrcLocation& loc, const std::string& msg); void error(const SrcLocation& loc, const std::string& msg); void error(const std::string& msg); + + std::optional searchIncludePath( + const std::string& IncludeString, const SrcLocation& IncludeLoc); + + bool canEnterOnce(const SrcLocation& onceLoc); + + void addComment(const SrcLocation& Loc, const std::stringstream& Content); + + Own translationUnit; + + bool trace_scanning = false; + + std::set> VisitedLocations; + + std::deque> ScannedComments; }; } // end of namespace souffle diff --git a/src/parser/SrcLocation.cpp b/src/parser/SrcLocation.cpp index 60110a4b2db..0b9321c7211 100644 --- a/src/parser/SrcLocation.cpp +++ b/src/parser/SrcLocation.cpp @@ -31,31 +31,27 @@ namespace souffle { -std::string getCurrentFilename(const std::vector& filenames) { - if (filenames.empty()) { - return ""; +std::string SrcLocation::getReportedFilename() const { + static const std::string emptyFilename(""); + if (file) { + return std::filesystem::path(file->Reported).filename().string(); + } else { + return emptyFilename; } +} - std::string path = "."; - for (const std::string& filename : filenames) { - if (!filename.empty() && isAbsolute(filename)) { - path = dirName(filename); - } else if (existFile(path + pathSeparator + filename)) { - path = dirName(path + pathSeparator + filename); - } else if (existFile(filename)) { - path = dirName(filename); - } else { - path = "."; - } +const std::string& SrcLocation::getReportedPath() const { + static const std::string emptyFilename(""); + if (file) { + return file->Reported; + } else { + return emptyFilename; } - - return path + pathSeparator + baseName(filenames.back()); } bool SrcLocation::operator<(const SrcLocation& other) const { - // Translate filename stack into current files - std::string filename = getCurrentFilename(filenames); - std::string otherFilename = getCurrentFilename(other.filenames); + const std::string& filename = getReportedPath(); + const std::string& otherFilename = other.getReportedPath(); if (filename < otherFilename) { return true; @@ -76,28 +72,26 @@ bool SrcLocation::operator<(const SrcLocation& other) const { return false; } -void SrcLocation::setFilename(std::string filename) { - makePreferred(filename); - if (filenames.empty()) { - filenames.emplace_back(filename); - return; - } - if (filenames.back() == filename) { - return; - } - if (filenames.size() > 1 && filenames.at(filenames.size() - 2) == filename) { - filenames.pop_back(); - return; +SrcLocation& SrcLocation::operator+=(const SrcLocation& other) { + if (file.get() == other.file.get()) { + if (*this < other) { + end = other.end; + } else { + start = other.start; + } } - filenames.emplace_back(filename); + return *this; +} + +void SrcLocation::setFile(const std::shared_ptr& f) { + file = f; } std::string SrcLocation::extloc() const { - std::string filename = getCurrentFilename(filenames); - std::ifstream in(filename); + std::ifstream in(file->Reported); std::stringstream s; if (in.is_open()) { - s << "file " << baseName(filename) << " at line " << start.line << "\n"; + s << "file " << getReportedFilename() << " at line " << start.line << "\n"; for (int i = 0; i < start.line - 1; ++i) { in.ignore(std::numeric_limits::max(), '\n'); } @@ -131,12 +125,30 @@ std::string SrcLocation::extloc() const { } in.close(); } else { - s << filename << ":" << start.line << ":" << start.column; + s << getReportedFilename() << ":" << start.line << ":" << start.column; } return s.str(); } void SrcLocation::print(std::ostream& out) const { - out << getCurrentFilename(filenames) << " [" << start << "-" << end << "]"; + out << getReportedFilename() << " [" << start << "-" << end << "]"; +} + +void ScannerInfo::push(const std::string& Physical, const SrcLocation& IncludeLoc) { + yyfilename = std::make_shared(yyfilename, IncludeLoc.start, Physical, Physical); +} + +void ScannerInfo::pop() { + if (yyfilename) { + yyfilename = yyfilename->ParentStack; + } } + +void ScannerInfo::setReported(const std::string& Reported) { + if (yyfilename && yyfilename->Reported != Reported) { + yyfilename = std::make_shared( + yyfilename->ParentStack, yyfilename->IncludePos, yyfilename->Physical, Reported); + } +} + } // end of namespace souffle diff --git a/src/parser/SrcLocation.h b/src/parser/SrcLocation.h index 6153306e18e..112021a4b84 100644 --- a/src/parser/SrcLocation.h +++ b/src/parser/SrcLocation.h @@ -16,45 +16,65 @@ #pragma once +#include #include +#include #include -#include namespace souffle { +/** A class locating a single point in an input file */ +struct Point { + /** The line in the source file */ + int line; + + /** The column in the source file */ + int column; + + /** A comparison for points */ + bool operator<(const Point& other) const { + return line < other.line || (line == other.line && column < other.column); + } + + bool operator>(const Point& other) const { + return other < *this; + } + + void print(std::ostream& out) const { + out << line << ":" << column; + } + + /** Enables locations to be printed */ + friend std::ostream& operator<<(std::ostream& out, const Point& loc) { + loc.print(out); + return out; + } +}; + +/** A recursive include stack. */ +struct IncludeStack { + explicit IncludeStack(std::shared_ptr parent, Point includePos, const std::string& physical, + const std::string& reported) + : ParentStack(parent), IncludePos(includePos), Physical(physical), Reported(reported) {} + + /** The parent file. */ + const std::shared_ptr ParentStack; + + /** The position of the include directive in the parent file. */ + const Point IncludePos; + + /** This file. */ + const std::string Physical; + + /** The reported path for this file. */ + const std::string Reported; +}; + /** A class describing a range in an input file */ class SrcLocation { public: - /** A class locating a single point in an input file */ - struct Point { - /** The line in the source file */ - int line; - - /** The column in the source file */ - int column; - - /** A comparison for points */ - bool operator<(const Point& other) const { - return line < other.line || (line == other.line && column < other.column); - } - - bool operator>(const Point& other) const { - return other < *this; - } - - void print(std::ostream& out) const { - out << line << ":" << column; - } - - /** Enables locations to be printed */ - friend std::ostream& operator<<(std::ostream& out, const Point& loc) { - loc.print(out); - return out; - } - }; - /** The file referred to */ - std::vector filenames; + std::shared_ptr file; /** The start location */ Point start = {}; @@ -62,10 +82,21 @@ class SrcLocation { /** The End location */ Point end = {}; + /** Return the shortened reported file name */ + std::string getReportedFilename() const; + + /** Return the full reported file path */ + const std::string& getReportedPath() const; + /** A comparison for source locations */ bool operator<(const SrcLocation& other) const; - void setFilename(std::string filename); + /** Extend the current source location with the other, only if both have + * the same include stack */ + SrcLocation& operator+=(const SrcLocation& other); + + /** Set the source location's file (hence include stack) */ + void setFile(const std::shared_ptr& file); /** An extended string describing this location in a end-user friendly way */ std::string extloc() const; @@ -81,10 +112,29 @@ class SrcLocation { /** Information struct for scanner */ struct ScannerInfo { + /** Scanner's current location */ SrcLocation yylloc; - /* Stack of parsed files */ - std::string yyfilename; + /** Include stack of scanned files, top is the current scanned file */ + std::shared_ptr yyfilename; + + /** Location of last .include directive */ + SrcLocation LastIncludeDirectiveLoc; + + /** Extent of the current comment */ + SrcLocation CommentExtent; + + /** Content of the current comment */ + std::stringstream CommentContent; + + /** Push a file on the include stack */ + void push(const std::string& NewFile, const SrcLocation& IncludeLoc); + + /** Pop a file from the include stack */ + void pop(); + + /** Set the reported path for the current file */ + void setReported(const std::string& Reported); }; } // end of namespace souffle diff --git a/src/parser/parser.yy b/src/parser/parser.yy index c1f93be33e2..39c7177d361 100644 --- a/src/parser/parser.yy +++ b/src/parser/parser.yy @@ -95,17 +95,17 @@ #define YY_NULLPTR nullptr /* Macro to update locations as parsing proceeds */ - #define YYLLOC_DEFAULT(Cur, Rhs, N) \ - do { \ - if (N) { \ - (Cur).start = YYRHSLOC(Rhs, 1).start; \ - (Cur).end = YYRHSLOC(Rhs, N).end; \ - (Cur).filenames = YYRHSLOC(Rhs, N).filenames; \ - } else { \ - (Cur).start = YYRHSLOC(Rhs, 0).end; \ - (Cur).end = YYRHSLOC(Rhs, 0).end; \ - (Cur).filenames = YYRHSLOC(Rhs, 0).filenames; \ - } \ +#define YYLLOC_DEFAULT(Cur, Rhs, N) \ + do { \ + if (N) { \ + (Cur).start = YYRHSLOC(Rhs, 1).start; \ + (Cur).end = YYRHSLOC(Rhs, N).end; \ + (Cur).file = YYRHSLOC(Rhs, N).file; \ + } else { \ + (Cur).start = YYRHSLOC(Rhs, 0).end; \ + (Cur).end = YYRHSLOC(Rhs, 0).end; \ + (Cur).file = YYRHSLOC(Rhs, 0).file; \ + } \ } while (0) } diff --git a/src/parser/scanner.ll b/src/parser/scanner.ll index dd24ecf3f50..ff99cbc35cc 100644 --- a/src/parser/scanner.ll +++ b/src/parser/scanner.ll @@ -70,17 +70,57 @@ #define yyfilename yyget_extra(yyscanner)->yyfilename +#define yyinfo (*yyget_extra(yyscanner)) + /* Execute when matching */ #define YY_USER_ACTION { \ - yylloc.start = SrcLocation::Point({ yylineno, yycolumn }); \ + yylloc.start = Point({ yylineno, yycolumn }); \ yycolumn += yyleng; \ - yylloc.end = SrcLocation::Point({ yylineno, yycolumn }); \ - yylloc.setFilename(yyfilename); \ + yylloc.end = Point({ yylineno, yycolumn }); \ + yylloc.setFile(yyfilename); \ } + // scan a string with escape sequences, skipping surrounding double-quotes if any. + std::string lexString(souffle::ParserDriver& driver, const SrcLocation& loc, const char* text) { + std::string result; + const size_t start = (text[0] == '"' ? 1 : 0); + const size_t end = strlen(text) - (text[0] == '"' ? 1 : 0); + bool error = false; + char error_char; + for (size_t i = start; i < end; i++) { + if (text[i] == '\\' && i + 1 < end) { + switch (text[i+1]) { + case '"': result += '"'; break; + case '\'': result += '\''; break; + case '\\': result += '\\'; break; + case 'a': result += '\a'; break; + case 'b': result += '\b'; break; + case 'f': result += '\f'; break; + case 'n': result += '\n'; break; + case 'r': result += '\r'; break; + case 't': result += '\t'; break; + case 'v': result += '\v'; break; + default: + error_char = text[i+1]; + error = true; + break; + } + i++; + } else { + result += text[i]; + } + if (error) { + break; + } + } + if (error) driver.error(loc, std::string("Unknown escape sequence \\") + error_char); + return result; + } + %} %x COMMENT +%x INCLUDE WS [ \t\r\v\f] @@ -102,6 +142,19 @@ WS [ \t\r\v\f] ".override"/{WS} { return yy::parser::make_OVERRIDE(yylloc); } ".pragma"/{WS} { return yy::parser::make_PRAGMA(yylloc); } ".plan"/{WS} { return yy::parser::make_PLAN(yylloc); } +".include" { + yyinfo.LastIncludeDirectiveLoc = yylloc; + BEGIN(INCLUDE); + } +".once" { + if (!driver.canEnterOnce(yylloc)) { + yypop_buffer_state(yyscanner); + yyinfo.pop(); + if (!YY_CURRENT_BUFFER) { + return yy::parser::make_END(yylloc); + } + } + } "autoinc" { return yy::parser::make_AUTOINC(yylloc); } "band" { return yy::parser::make_BW_AND(yylloc); } "bor" { return yy::parser::make_BW_OR(yylloc); } @@ -149,6 +202,30 @@ WS [ \t\r\v\f] "to_string" { return yy::parser::make_TOSTRING(yylloc); } "to_unsigned" { return yy::parser::make_TOUNSIGNED(yylloc); } "choice-domain" { return yy::parser::make_CHOICEDOMAIN(yylloc); } +"__FILE__" { + return yy::parser::make_STRING(yylloc.file->Reported, yylloc); + } +"__LINE__" { return yy::parser::make_NUMBER(std::to_string(yylineno), yylloc); } +"__INCL__" { + std::string result; + const IncludeStack* incl = yylloc.file.get(); + const Point* pos = &incl->IncludePos; + // skip top + if (incl) incl = incl->ParentStack.get(); + + bool first = true; + while(incl) { + std::stringstream concat; + concat << incl->Reported << ":" << *pos; + if (!first) concat << ';'; + concat << result; + result = concat.str(); + first = false; + pos = &incl->IncludePos; + incl = incl->ParentStack.get(); + } + return yy::parser::make_STRING(result, yylloc); + } "|" { return yy::parser::make_PIPE(yylloc); } "[" { return yy::parser::make_LBRACKET(yylloc); } "]" { return yy::parser::make_RBRACKET(yylloc); } @@ -209,90 +286,110 @@ WS [ \t\r\v\f] return yy::parser::make_IDENT(yytext, yylloc); } \"(\\.|[^"\\])*\" { - std::string result; - size_t end = strlen(yytext) - 1; - bool error = false; - char error_char; - for (size_t i = 1; i < end; i++) { - if (yytext[i] == '\\' && i + 1 < end) { - switch (yytext[i+1]) { - case '"': result += '"'; break; - case '\'': result += '\''; break; - case '\\': result += '\\'; break; - case 'a': result += '\a'; break; - case 'b': result += '\b'; break; - case 'f': result += '\f'; break; - case 'n': result += '\n'; break; - case 'r': result += '\r'; break; - case 't': result += '\t'; break; - case 'v': result += '\v'; break; - default: - error_char = yytext[i+1]; - error = true; - break; - } - i++; - } else { - result += yytext[i]; - } - if (error) { - break; - } - } - if (error) driver.error(yylloc, std::string("Unknown escape sequence \\") + error_char); - return yy::parser::make_STRING(result, yylloc); + return yy::parser::make_STRING(lexString(driver, yylloc, yytext), yylloc); } \#.*$ { + /* formats: + "#" linenum filename + "#" linenum filename 1 + "#" linenum filename 2 + "#line" linenum + "#line" linenum filename + */ std::unique_ptr fname_ptr = std::make_unique(yyleng+1); char* fname = fname_ptr.get(); - int lineno; - if ((sscanf(yytext,"# %d \"%[^\"]",&lineno,fname)>=2) || - (sscanf(yytext,"#line %d \"%[^\"]",&lineno,fname)>=2)) { - std::size_t fnamelen = strlen(fname); - assert(fnamelen > 0 && "failed conversion"); - fname[fnamelen]='\0'; + fname[0] = 0; + int lineno = 0; + int flag = 0; + + if ((sscanf(yytext,"# %d \"%[^\"]\" %d",&lineno,fname,&flag)>=2) || + (sscanf(yytext,"#line %d \"%[^\"]\" %d",&lineno,fname,&flag)>=1)) { - // fname is a literal string with escape sequences - if (strchr(fname,'\\')) { - std::string filename; - std::size_t i; - for (i = 0; i < fnamelen; ++i) { - if (fname[i] == '\\' && (i + 1) < fnamelen) { - switch(fname[i+1]) { - case '"': filename += '"'; break; - case '\'': filename += '\''; break; - case '\\': filename += '\\'; break; - case 'a': filename += '\a'; break; - case 'b': filename += '\b'; break; - case 'f': filename += '\f'; break; - case 'n': filename += '\n'; break; - case 'r': filename += '\r'; break; - case 't': filename += '\t'; break; - case 'v': filename += '\v'; break; - } - ++i; - } else { - filename += fname[i]; - } + if (fname[0] != 0) { + std::string filename = lexString(driver, yylloc, fname); + /* recognized C preprocessor flags: + * 0 (or no flag) => update location + * 1 => enter file (include push) + * 2 => return to file (include pop) + */ + + if (flag == 0) { + // update + yyinfo.pop(); + yyinfo.push(filename, yylloc); + yycolumn = 1; + yylineno = lineno-1; + } else if (flag == 1) { + yyinfo.push(filename, yylloc); + yycolumn = 1; + yylineno = lineno-1; + } else if (flag == 2) { + yyinfo.pop(); // leave + // update + yyinfo.setReported(filename); + yycolumn = 1; + yylineno = lineno-1; } - std::copy(filename.begin(), filename.end(), fname); - fname[filename.size()] = '\0'; + } else { + yycolumn = 1; + yylineno = lineno-1; } - - yycolumn = 1; yylineno = lineno-1; - yyfilename = fname; } } -"//".*$ { } -"/*" { BEGIN(COMMENT); } +"//".*$ { + yyinfo.CommentExtent = yylloc; + yyinfo.CommentContent.str(yytext); + driver.addComment(yyinfo.CommentExtent, yyinfo.CommentContent); + yyinfo.CommentContent.str(""); + } +"/*" { + yyinfo.CommentContent.str(""); + yyinfo.CommentExtent = yylloc; + yyinfo.CommentContent << yytext; + BEGIN(COMMENT); + } { -"*/" { BEGIN(INITIAL); } -[^*\n]+ { } -"*" { } -\n { } +"*/" { + yyinfo.CommentExtent += yylloc; + std::string X(yytext); + yyinfo.CommentContent << X; + driver.addComment(yyinfo.CommentExtent, yyinfo.CommentContent); + yyinfo.CommentContent.str(""); + BEGIN(INITIAL); + } +[^*\n]+ { yyinfo.CommentExtent += yylloc; yyinfo.CommentContent << yytext; } +"*" { yyinfo.CommentExtent += yylloc; yyinfo.CommentContent << yytext; } +\n { yyinfo.CommentExtent += yylloc; yyinfo.CommentContent << yytext; } +} +{ +{WS}+ { } +\"(\\.|[^"\\])*\" { /* include file name */ + std::string path = lexString(driver, yylloc, yytext); + std::optional maybePath = driver.searchIncludePath(path, yylloc); + yyin = nullptr; + if (maybePath) { + yyin = fopen(maybePath->string().c_str(), "r"); + } + if (!yyin) { + driver.error(yylloc, std::string("cannot find include file ") + yytext); + return yy::parser::make_END(yylloc); + } else { + yyinfo.push(maybePath->string(), yyinfo.LastIncludeDirectiveLoc); + yypush_buffer_state(yy_create_buffer(yyin, YY_BUF_SIZE, yyscanner), yyscanner); + } + BEGIN(INITIAL); + } +. { driver.error(yylloc, std::string("unexpected ") + yytext); } } \n { yycolumn = 1; } {WS}+ { } -<> { return yy::parser::make_END(yylloc); } +<> { + yypop_buffer_state(yyscanner); + yyinfo.pop(); + if (!YY_CURRENT_BUFFER) { + return yy::parser::make_END(yylloc); + } + } . { driver.error(yylloc, std::string("unexpected ") + yytext); } %% +// vim: filetype=lex diff --git a/tests/syntactic/CMakeLists.txt b/tests/syntactic/CMakeLists.txt index d76864c875e..90caee185f7 100644 --- a/tests/syntactic/CMakeLists.txt +++ b/tests/syntactic/CMakeLists.txt @@ -69,3 +69,19 @@ if (NOT MSVC) # does not pass with Visual Studio pre-processor because it preserves all whitespaces positive_test(whitespaces) endif () +souffle_run_test( + TEST_NAME include_directive1 + CATEGORY syntactic + NO_PREPROCESSOR + ) +souffle_run_test( + TEST_NAME include_directive2 + CATEGORY syntactic + NO_PREPROCESSOR + NEGATIVE + ) +souffle_run_test( + TEST_NAME include_directive3 + CATEGORY syntactic + NO_PREPROCESSOR + ) diff --git a/tests/syntactic/include_directive1/baz.dl b/tests/syntactic/include_directive1/baz.dl new file mode 100644 index 00000000000..0426f75a8ed --- /dev/null +++ b/tests/syntactic/include_directive1/baz.dl @@ -0,0 +1,6 @@ +// baz.dl +location("baz:2",__LINE__). + +.include "foo.dl" + +location("baz:6",__LINE__). diff --git a/tests/syntactic/include_directive1/foo.dl b/tests/syntactic/include_directive1/foo.dl new file mode 100644 index 00000000000..03f35825683 --- /dev/null +++ b/tests/syntactic/include_directive1/foo.dl @@ -0,0 +1,2 @@ +// foo.dl +location("foo:2",__LINE__). diff --git a/tests/syntactic/include_directive1/include_directive1.dl b/tests/syntactic/include_directive1/include_directive1.dl new file mode 100644 index 00000000000..3b8f0c8d066 --- /dev/null +++ b/tests/syntactic/include_directive1/include_directive1.dl @@ -0,0 +1,5 @@ +// include_directive1 +.decl location(tag:symbol, line:number) +.include "baz.dl" +location("include_directive1:4",__LINE__). +.output location diff --git a/tests/syntactic/include_directive1/include_directive1.err b/tests/syntactic/include_directive1/include_directive1.err new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/syntactic/include_directive1/include_directive1.out b/tests/syntactic/include_directive1/include_directive1.out new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/syntactic/include_directive1/location.csv b/tests/syntactic/include_directive1/location.csv new file mode 100644 index 00000000000..d20af0ce730 --- /dev/null +++ b/tests/syntactic/include_directive1/location.csv @@ -0,0 +1,4 @@ +baz:2 2 +baz:6 6 +foo:2 2 +include_directive1:4 4 diff --git a/tests/syntactic/include_directive2/include_directive2.dl b/tests/syntactic/include_directive2/include_directive2.dl new file mode 100644 index 00000000000..d85121b253c --- /dev/null +++ b/tests/syntactic/include_directive2/include_directive2.dl @@ -0,0 +1,2 @@ +// try to include a file that does not exist +.include "non-existant.dl" diff --git a/tests/syntactic/include_directive2/include_directive2.err b/tests/syntactic/include_directive2/include_directive2.err new file mode 100644 index 00000000000..1be32e6eb4e --- /dev/null +++ b/tests/syntactic/include_directive2/include_directive2.err @@ -0,0 +1,4 @@ +Error: cannot find include file "non-existant.dl" in file include_directive2.dl at line 2 +.include "non-existant.dl" +---------^----------------- +1 errors generated, evaluation aborted diff --git a/tests/syntactic/include_directive2/include_directive2.out b/tests/syntactic/include_directive2/include_directive2.out new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/syntactic/include_directive3/include_directive3.dl b/tests/syntactic/include_directive3/include_directive3.dl new file mode 100644 index 00000000000..c4dea7ae7fd --- /dev/null +++ b/tests/syntactic/include_directive3/include_directive3.dl @@ -0,0 +1,7 @@ +// prevent recusive inclusion +.once +// self-inclusion +.include "include_directive3.dl" +.decl ok() +.output ok +ok(). diff --git a/tests/syntactic/include_directive3/include_directive3.err b/tests/syntactic/include_directive3/include_directive3.err new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/syntactic/include_directive3/include_directive3.out b/tests/syntactic/include_directive3/include_directive3.out new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/syntactic/include_directive3/ok.csv b/tests/syntactic/include_directive3/ok.csv new file mode 100644 index 00000000000..6a452c185a8 --- /dev/null +++ b/tests/syntactic/include_directive3/ok.csv @@ -0,0 +1 @@ +()