From ed7052290df3be9f4720b4cd6d577b8dfff03494 Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Fri, 6 Jun 2025 06:23:26 +0900 Subject: [PATCH 1/3] main,refactor: use LMAP_* enumerators when adding or removeing maps The original code used a boolean value to toggle how filenames were mapped to the parser by glob-like pattern or by extension. To support the third way mapping a file name to a parser, by regular expression pattern, we will use an enum value instead of Boolean. Signed-off-by: Masatake YAMATO --- main/options.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/main/options.c b/main/options.c index 469da5efc1..ffde5d94a8 100644 --- a/main/options.c +++ b/main/options.c @@ -1757,7 +1757,7 @@ static char* skipPastMap (char* p) static char* extractMapFromParameter (const langType language, char* parameter, char** tail, - bool* pattern_p, + langmapType *mapType, char* (* skip) (char *)) { char* p = NULL; @@ -1767,7 +1767,7 @@ static char* extractMapFromParameter (const langType language, if (first == EXTENSION_SEPARATOR) /* extension map */ { - *pattern_p = false; + *mapType = LMAP_EXTENSION; ++parameter; p = (* skip) (parameter); @@ -1789,7 +1789,7 @@ static char* extractMapFromParameter (const langType language, } else if (first == PATTERN_START) /* pattern map */ { - *pattern_p = true; + *mapType = LMAP_PATTERN; ++parameter; for (p = parameter ; *p != PATTERN_STOP && *p != '\0' ; ++p) @@ -1818,13 +1818,13 @@ static char* addLanguageMap (const langType language, char* map_parameter, bool exclusiveInAllLanguages) { char* p = NULL; - bool pattern_p; + langmapType map_type; char* map; - map = extractMapFromParameter (language, map_parameter, &p, &pattern_p, skipPastMap); - if (map && pattern_p == false) + map = extractMapFromParameter (language, map_parameter, &p, &map_type, skipPastMap); + if (map && map_type == LMAP_EXTENSION) addLanguageExtensionMap (language, map, exclusiveInAllLanguages); - else if (map && pattern_p == true) + else if (map && map_type == LMAP_PATTERN) addLanguagePatternMap (language, map, exclusiveInAllLanguages); else error (FATAL, "Badly formed language map for %s language", @@ -1838,13 +1838,13 @@ static char* addLanguageMap (const langType language, char* map_parameter, static char* removeLanguageMap (const langType language, char* map_parameter) { char* p = NULL; - bool pattern_p; + langmapType map_type; char* map; - map = extractMapFromParameter (language, map_parameter, &p, &pattern_p, skipPastMap); - if (map && pattern_p == false) + map = extractMapFromParameter (language, map_parameter, &p, &map_type, skipPastMap); + if (map && map_type == LMAP_EXTENSION) removeLanguageExtensionMap (language, map); - else if (map && pattern_p == true) + else if (map && map_type == LMAP_PATTERN) removeLanguagePatternMap (language, map); else error (FATAL, "Badly formed language map for %s language", From caf6daa8b8e7aa1f2171bd7116c78f43e82d878c Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Fri, 6 Jun 2025 06:30:01 +0900 Subject: [PATCH 2/3] main,refactor: don't use else if if-block does return always Signed-off-by: Masatake YAMATO --- main/options.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/main/options.c b/main/options.c index ffde5d94a8..148c0e390c 100644 --- a/main/options.c +++ b/main/options.c @@ -1777,17 +1777,16 @@ static char* extractMapFromParameter (const langType language, *tail = parameter + strlen (parameter); return result; } - else - { - tmp = *p; - *p = '\0'; - result = eStrdup (parameter); - *p = tmp; - *tail = p; - return result; - } + + tmp = *p; + *p = '\0'; + result = eStrdup (parameter); + *p = tmp; + *tail = p; + return result; } - else if (first == PATTERN_START) /* pattern map */ + + if (first == PATTERN_START) /* pattern map */ { *mapType = LMAP_PATTERN; @@ -1800,15 +1799,13 @@ static char* extractMapFromParameter (const langType language, if (*p == '\0') error (FATAL, "Unterminated file name pattern for %s language", getLanguageName (language)); - else - { - tmp = *p; - *p = '\0'; - result = eStrdup (parameter); - *p = tmp; - *tail = p + 1; - return result; - } + + tmp = *p; + *p = '\0'; + result = eStrdup (parameter); + *p = tmp; + *tail = p + 1; + return result; } return NULL; From 24ae704557fd5cc1a23c91bca334685da9fe66bc Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Mon, 12 May 2025 23:15:04 +0900 Subject: [PATCH 3/3] main: using regex for choosing a parser for the given file name This change extends --map- option to support regular expression matching with the full file name. The original --map- option supports the glob based matching and the extension comparison with the file basename. However, two methods are not enough if the file names are too generic. See https://github.com/universal-ctags/ctags/pull/3287 . The regular expression passed to --map- must be surround by % character like --map-RpmMacros='%(.*/)?macros\.d/macros\.([^/]+)$%' If you want to match in a case-insensitive way, append `i' after the second % like --map-RpmMacros='%(.*/)?macros\.d/macros\.([^/]+)$%i' If you want to use % as part of an expression, put \ before % for escaping. TODO: - [ ] reconsider name regex, rxpr, or something - [ ] update ctags.1 - [ ] add Tmain test cases - [ ] add description to --help - [X] extend optlib2c - [X] add --list-map-regex - [X] add --list-maps - [ ] add pcre backend Signed-off-by: Masatake YAMATO --- main/options.c | 61 ++++++++- main/parse.c | 203 ++++++++++++++++++++++++++--- main/parse.h | 9 ++ main/parse_p.h | 14 +- main/rexprcode.c | 98 ++++++++++++++ main/rexprcode_p.h | 36 +++++ misc/optlib2c | 40 +++++- optlib/rpmMacros.c | 9 ++ optlib/rpmMacros.ctags | 7 +- source.mak | 2 + win32/ctags_vs2013.vcxproj | 2 + win32/ctags_vs2013.vcxproj.filters | 6 + 12 files changed, 456 insertions(+), 31 deletions(-) create mode 100644 main/rexprcode.c create mode 100644 main/rexprcode_p.h diff --git a/main/options.c b/main/options.c index 148c0e390c..6bd6ddb0e3 100644 --- a/main/options.c +++ b/main/options.c @@ -59,6 +59,9 @@ /* The following separators are permitted for list options. */ #define EXTENSION_SEPARATOR '.' +#define REXPR_START '%' +#define REXPR_STOP '%' +#define REXPR_ICASE 'i' #define PATTERN_START '(' #define PATTERN_STOP ')' #define IGNORE_SEPARATORS ", \t\n" @@ -303,10 +306,10 @@ static optionDescription LongOptionDescription [] = { {1,0," --langmap=[,[...]]"}, {1,0," Override default mapping of language to input file extension."}, {1,0," e.g. --langmap=c:.c.x,java:+.j,make:([Mm]akefile).mak"}, - {1,0," --map-=[+|-]|"}, + {1,0," --map-=[+|-]||"}, {1,0," Set, add(+) or remove(-) the map for ."}, - {1,0," Unlike --langmap, this doesn't take a list; only one file name "}, - {1,0," or one file can be specified at once."}, + {1,0," Unlike --langmap, this doesn't take a list; only one file name ,"}, + {1,0," one file name , or one file can be specified at once."}, {1,0," Unlike --langmap the change with this option affects mapping of only."}, {1,0,""}, {1,0,"Tags File Contents Options"}, @@ -436,6 +439,8 @@ static optionDescription LongOptionDescription [] = { {1,0," Output list of language extensions in mapping."}, {1,0," --list-map-patterns[=(|all)]"}, {1,0," Output list of language patterns in mapping."}, + {1,0," --list-map-regex[=(|all)]"}, + {1,0," Output list of language regular expressions in mapping."}, {1,0," --list-maps[=(|all)]"}, {1,0," Output list of language mappings (both extensions and patterns)."}, {1,0," --list-mline-regex-flags"}, @@ -1793,6 +1798,7 @@ static char* extractMapFromParameter (const langType language, ++parameter; for (p = parameter ; *p != PATTERN_STOP && *p != '\0' ; ++p) { + /* ??? */ if (*p == '\\' && *(p + 1) == PATTERN_STOP) ++p; } @@ -1808,6 +1814,26 @@ static char* extractMapFromParameter (const langType language, return result; } + if (first == REXPR_START) + { + *mapType = LMAP_REXPR; + + ++parameter; + vString *rexpr = vStringNew (); + for (p = parameter ; *p != REXPR_STOP && *p != '\0' ; ++p) + { + if (*p == '\\' && *(p + 1) == REXPR_STOP) + continue; + vStringPut (rexpr, *p); + } + if (*p == '\0') + error (FATAL, "Unterminated file name regex for %s language", + getLanguageName (language)); + + *tail = p + 1; + return vStringDeleteUnwrap (rexpr); + } + return NULL; } @@ -1823,6 +1849,13 @@ static char* addLanguageMap (const langType language, char* map_parameter, addLanguageExtensionMap (language, map, exclusiveInAllLanguages); else if (map && map_type == LMAP_PATTERN) addLanguagePatternMap (language, map, exclusiveInAllLanguages); + else if (map && map_type == LMAP_REXPR) + { + bool icase = (*p == REXPR_ICASE); + addLanguageRexprMap (language, map, icase, exclusiveInAllLanguages); + if (icase) + p++; + } else error (FATAL, "Badly formed language map for %s language", getLanguageName (language)); @@ -1843,6 +1876,13 @@ static char* removeLanguageMap (const langType language, char* map_parameter) removeLanguageExtensionMap (language, map); else if (map && map_type == LMAP_PATTERN) removeLanguagePatternMap (language, map); + else if (map && map_type == LMAP_REXPR) + { + bool icase = (*p == REXPR_ICASE); + removeLanguageRexprMap (language, map, icase); + if (icase) + p++; + } else error (FATAL, "Badly formed language map for %s language", getLanguageName (language)); @@ -2164,6 +2204,13 @@ static void processListMapPatternsOption (const char *const option, processListMapsOptionForType (option, parameter, LMAP_PATTERN|LMAP_TABLE_OUTPUT); } +static void processListMapRegularExpressionsOption (const char *const option, + const char *const parameter) +{ + processListMapsOptionForType (option, parameter, LMAP_REXPR|LMAP_TABLE_OUTPUT); +} + + static void processListMapsOption ( const char *const option CTAGS_ATTR_UNUSED, const char *const parameter CTAGS_ATTR_UNUSED) @@ -2327,6 +2374,13 @@ static void processDescribeLanguage(const char *const option, getLanguageVersionCurrent (language), getLanguageVersionAge (language)); + puts(""); + puts("Mappings/rexprs"); + puts("-------------------------------------------------------"); + printLanguageMaps (language, LMAP_REXPR|LMAP_NO_LANG_PREFIX, + localOption.withListHeader, localOption.machinable, + stdout); + puts(""); puts("Mappings/patterns"); puts("-------------------------------------------------------"); @@ -2999,6 +3053,7 @@ static parametricOption ParametricOptions [] = { { "list-maps", processListMapsOption, true, STAGE_ANY }, { "list-map-extensions", processListMapExtensionsOption, true, STAGE_ANY }, { "list-map-patterns", processListMapPatternsOption, true, STAGE_ANY }, + { "list-map-regex", processListMapRegularExpressionsOption, true, STAGE_ANY }, { "list-mline-regex-flags", processListMultilineRegexFlagsOption, true, STAGE_ANY }, { "list-output-formats", processListOutputFormatsOption, true, STAGE_ANY }, { "list-params", processListParametersOption, true, STAGE_ANY }, diff --git a/main/parse.c b/main/parse.c index ffc63d7431..c8edfbe0a8 100644 --- a/main/parse.c +++ b/main/parse.c @@ -38,6 +38,7 @@ #include "ptrarray.h" #include "read.h" #include "read_p.h" +#include "rexprcode_p.h" #include "routines.h" #include "routines_p.h" #include "stats_p.h" @@ -62,9 +63,10 @@ enum specType { SPEC_ALIAS = SPEC_NAME, SPEC_EXTENSION, SPEC_PATTERN, + SPEC_REXPR, }; const char *specTypeName [] = { - "none", "name", "extension", "pattern" + "none", "name", "extension", "pattern", "regex" }; typedef struct { @@ -81,6 +83,7 @@ typedef struct sParserObject { stringList* currentPatterns; /* current list of file name patterns */ stringList* currentExtensions; /* current list of extensions */ stringList* currentAliases; /* current list of aliases */ + ptrArray* currentRegularExpressions; unsigned int initialized:1; /* initialize() is called or not */ unsigned int dontEmit:1; /* run but don't emit tags. @@ -471,7 +474,40 @@ extern langType getLanguageForCommand (const char *const command, langType start &tmp_specType); } -static langType getPatternLanguageAndSpec (const char *const baseName, langType start_index, +static ptrArray* rExpressionsNew (void) +{ + return ptrArrayNew ((ptrArrayDeleteFunc)rExprCodeDelete); +} + +static void rExpressionsDelete (ptrArray* rexprs) +{ + ptrArrayDelete (rexprs); +} + +static void rExpressionsAddFromArray (ptrArray* rexprs, const struct rExprSrc *const array) +{ + for (unsigned int i = 0; array[i].expr; i++) + { + struct rExprCode *rxcode = rExprCodeNew (array[i].expr, array[i].iCase); + if (rxcode) + ptrArrayAdd (rexprs, rxcode); + } +} + +static struct rExprCode *rExpressionsFinds(ptrArray *rexprs, const char *fullName) +{ + for (unsigned int i = 0; i < ptrArrayCount (rexprs); i++) + { + struct rExprCode *rxcode = ptrArrayItem (rexprs, i); + if (rExprCodeMatch (rxcode, fullName)) + return rxcode; + } + return NULL; +} + +static langType getPatternLanguageAndSpec (const char *const baseName, + const char *const fullName, + langType start_index, const char **const spec, enum specType *specType) { langType result = LANG_IGNORE; @@ -483,6 +519,29 @@ static langType getPatternLanguageAndSpec (const char *const baseName, langType return result; *spec = NULL; + + if (fullName == NULL) + goto classical_methods; + + for (i = start_index ; i < LanguageCount && result == LANG_IGNORE ; ++i) + { + if (! isLanguageEnabled (i)) + continue; + + parserObject *parser = LanguageTable + i; + ptrArray* const rexprs = parser->currentRegularExpressions; + struct rExprCode *rxcode; + + if (rexprs != NULL && (rxcode = rExpressionsFinds (rexprs, fullName))) + { + result = i; + *spec = rExprCodeGetSource (rxcode); + *specType = SPEC_REXPR; + goto found; + } + } + + classical_methods: for (i = start_index ; i < LanguageCount && result == LANG_IGNORE ; ++i) { if (! isLanguageEnabled (i)) @@ -529,7 +588,7 @@ extern langType getLanguageForFilename (const char *const filename, langType sta char *tmp_spec; enum specType tmp_specType; - return getPatternLanguageAndSpec (tmp_filename, startFrom, + return getPatternLanguageAndSpec (tmp_filename, filename, startFrom, (const char **const)&tmp_spec, &tmp_specType); } @@ -695,7 +754,8 @@ static parserCandidate* parserCandidateNew(unsigned int count CTAGS_ATTR_UNUSED) } /* If multiple parsers are found, return LANG_AUTO */ -static unsigned int nominateLanguageCandidates (const char *const key, parserCandidate** candidates) +static unsigned int nominateLanguageCandidates (const char *const key, const char *const fullKey CTAGS_ATTR_UNUSED, + parserCandidate** candidates) { unsigned int count; langType i; @@ -719,7 +779,8 @@ static unsigned int nominateLanguageCandidates (const char *const key, parserCan } static unsigned int -nominateLanguageCandidatesForPattern(const char *const baseName, parserCandidate** candidates) +nominateLanguageCandidatesForPattern(const char *const baseName, const char *const fullName, + parserCandidate** candidates) { unsigned int count; langType i; @@ -730,7 +791,7 @@ nominateLanguageCandidatesForPattern(const char *const baseName, parserCandidate for (count = 0, i = LANG_AUTO; i != LANG_IGNORE; ) { - i = getPatternLanguageAndSpec (baseName, i, &spec, &specType); + i = getPatternLanguageAndSpec (baseName, fullName, i, &spec, &specType); if (i != LANG_IGNORE) { (*candidates)[count].lang = i++; @@ -1325,8 +1386,8 @@ static bool doesCandidatesRequireMemoryStream(const parserCandidate *candidates, return false; } -static langType getSpecLanguageCommon (const char *const spec, struct getLangCtx *glc, - unsigned int nominate (const char *const, parserCandidate**), +static langType getSpecLanguageCommon (const char *const spec, const char *const fullSpec, struct getLangCtx *glc, + unsigned int nominate (const char *const, const char *const, parserCandidate**), langType *fallback) { langType language; @@ -1336,7 +1397,7 @@ static langType getSpecLanguageCommon (const char *const spec, struct getLangCtx if (fallback) *fallback = LANG_IGNORE; - n_candidates = (*nominate)(spec, &candidates); + n_candidates = (*nominate)(spec, fullSpec, &candidates); verboseReportCandidate ("candidates", candidates, n_candidates); @@ -1384,15 +1445,16 @@ static langType getSpecLanguage (const char *const spec, struct getLangCtx *glc, langType *fallback) { - return getSpecLanguageCommon(spec, glc, nominateLanguageCandidates, + return getSpecLanguageCommon(spec, NULL, glc, nominateLanguageCandidates, fallback); } static langType getPatternLanguage (const char *const baseName, + const char *const fullName, struct getLangCtx *glc, langType *fallback) { - return getSpecLanguageCommon(baseName, glc, + return getSpecLanguageCommon(baseName, fullName, glc, nominateLanguageCandidatesForPattern, fallback); } @@ -1486,7 +1548,7 @@ getFileLanguageForRequestInternal (struct GetLanguageRequest *req) verbose ("Get file language for %s\n", fileName); verbose (" pattern: %s\n", baseName); - language = getPatternLanguage (baseName, &glc, + language = getPatternLanguage (baseName, fileName, &glc, fallback + HINT_FILENAME); if (language != LANG_IGNORE || glc.err) goto cleanup; @@ -1499,7 +1561,7 @@ getFileLanguageForRequestInternal (struct GetLanguageRequest *req) verbose (" pattern + template(%s): %s\n", tExt, templateBaseName); GLC_FOPEN_IF_NECESSARY(&glc, cleanup, false); mio_rewind(glc.input); - language = getPatternLanguage(templateBaseName, &glc, + language = getPatternLanguage (templateBaseName, NULL, &glc, fallback + HINT_TEMPLATE); if (language != LANG_IGNORE) goto cleanup; @@ -1633,11 +1695,19 @@ extern void installLanguageMapDefault (const langType language) parserObject* parser; Assert (0 <= language && language < (int) LanguageCount); parser = LanguageTable + language; + + if (parser->currentRegularExpressions != NULL) + rExpressionsDelete (parser->currentRegularExpressions); if (parser->currentPatterns != NULL) stringListDelete (parser->currentPatterns); if (parser->currentExtensions != NULL) stringListDelete (parser->currentExtensions); + parser->currentRegularExpressions = rExpressionsNew (); + if (parser->def->rexprs) + rExpressionsAddFromArray (parser->currentRegularExpressions, + parser->def->rexprs); + if (parser->def->patterns == NULL) parser->currentPatterns = stringListNew (); else @@ -1799,6 +1869,61 @@ extern void addLanguageExtensionMap ( stringListAdd ((LanguageTable + language)->currentExtensions, str); } +static bool removeLanguageRexprMap1(const langType language, const char *const rexpr, bool iCase) +{ + bool result = false; + ptrArray* const rexprs = (LanguageTable + language)->currentRegularExpressions; + + for (unsigned int i = 0; i < ptrArrayCount (rexprs); i++) + { + struct rExprCode *rxcode = ptrArrayItem (rexprs, i); + if (strcmp (rExprCodeGetSource (rxcode), rexpr) == 0) + { + ptrArrayDeleteItem (rexprs, i); + verbose (" (removed from %s)", getLanguageName (language)); + result = true; + break; + } + } + return result; +} + +extern bool removeLanguageRexprMap (const langType language, const char *const rexpr, bool iCase) +{ + bool result = false; + + if (language == LANG_AUTO) + { + unsigned int i; + for (i = 0; i < LanguageCount && ! result ; ++i) + result = removeLanguageRexprMap1 (i, rexpr, iCase) || result; + } + else + result = removeLanguageRexprMap1 (language, rexpr, iCase); + + return result; +} + +extern void addLanguageRexprMap (const langType language, const char* rexpr, bool iCase, + bool exclusiveInAllLanguages) +{ + Assert (0 <= language && language < (int) LanguageCount); + + struct rExprCode *rxcode = rExprCodeNew (rexpr,iCase); + if (rxcode) + { + if (exclusiveInAllLanguages) + removeLanguageRexprMap (LANG_AUTO, rexpr, iCase); + + parserObject* parser = LanguageTable + language; + if (!parser->currentRegularExpressions) + parser->currentRegularExpressions = rExpressionsNew (); + ptrArray* const rexprs = parser->currentRegularExpressions; + + ptrArrayAdd (rexprs, rxcode); + } +} + extern void addLanguageAlias (const langType language, const char* alias) { vString* const str = vStringNewInit (alias); @@ -2131,6 +2256,11 @@ extern void freeParserResources (void) freeList (&parser->currentPatterns); freeList (&parser->currentExtensions); + if (parser->currentRegularExpressions != NULL) + { + rExpressionsDelete (parser->currentRegularExpressions); + parser->currentRegularExpressions = NULL; + } freeList (&parser->currentAliases); eFree (parser->def->name); @@ -3764,6 +3894,18 @@ static void printMaps (const langType language, langmapType type) parser = LanguageTable + language; if (! (LMAP_NO_LANG_PREFIX & type)) printf ("%-8s", parser->def->name); + if (parser->currentRegularExpressions != NULL && (type & LMAP_REXPR)) + { + for (i = 0 ; i < ptrArrayCount (parser->currentRegularExpressions) ; ++i) + { + struct rExprCode *rxcode = ptrArrayItem (parser->currentRegularExpressions, + i); + vString *encodedSource = rExprCodeNewEncodedSource (rxcode); + printf (" %s", vStringValue (encodedSource)); + vStringDelete (encodedSource); + } + } + if (parser->currentPatterns != NULL && (type & LMAP_PATTERN)) for (i = 0 ; i < stringListCount (parser->currentPatterns) ; ++i) printf (" %s", vStringValue ( @@ -3783,6 +3925,8 @@ static struct colprintTable *mapColprintTableNew (langmapType type) return colprintTableNew ("L:LANGUAGE", "L:PATTERN", NULL); else if (type & LMAP_EXTENSION) return colprintTableNew ("L:LANGUAGE", "L:EXTENSION", NULL); + else if (type & LMAP_REXPR) + return colprintTableNew ("L:LANGUAGE", "L:EXPRESSION", "L:CASE", NULL); else { AssertNotReached (); @@ -3798,6 +3942,35 @@ static void mapColprintAddLanguage (struct colprintTable * table, unsigned int count; unsigned int i; + if ((type & LMAP_REXPR) + && parser->currentRegularExpressions + && (0 < (count = ptrArrayCount (parser->currentRegularExpressions)))) + { + for (i = 0; i < count; i++) + { + line = colprintTableGetNewLine (table); + struct rExprCode *rxcode = ptrArrayItem (parser->currentRegularExpressions, + i); + + colprintLineAppendColumnCString (line, parser->def->name); + if ((type & LMAP_ALL) != LMAP_REXPR) + { + colprintLineAppendColumnCString (line, "regex"); + vString *encodedSource = rExprCodeNewEncodedSource (rxcode); + colprintLineAppendColumnVString (line, encodedSource); + vStringDelete (encodedSource); + } + else + { + const char *rxsrc = rExprCodeGetSource (rxcode); + bool iCase = rExprCodeGetICase (rxcode); + + colprintLineAppendColumnCString (line, rxsrc); + colprintLineAppendColumnCString (line, iCase? "insensitive": "sensitive"); + } + } + } + if ((type & LMAP_PATTERN) && (0 < (count = stringListCount (parser->currentPatterns)))) { for (i = 0; i < count; i++) @@ -3806,7 +3979,7 @@ static void mapColprintAddLanguage (struct colprintTable * table, vString *pattern = stringListItem (parser->currentPatterns, i); colprintLineAppendColumnCString (line, parser->def->name); - if (type & LMAP_EXTENSION) + if ((type & LMAP_ALL) != LMAP_PATTERN) colprintLineAppendColumnCString (line, "pattern"); colprintLineAppendColumnVString (line, pattern); } @@ -3820,7 +3993,7 @@ static void mapColprintAddLanguage (struct colprintTable * table, vString *extension = stringListItem (parser->currentExtensions, i); colprintLineAppendColumnCString (line, parser->def->name); - if (type & LMAP_PATTERN) + if ((type & LMAP_ALL) != LMAP_EXTENSION) colprintLineAppendColumnCString (line, "extension"); colprintLineAppendColumnVString (line, extension); } diff --git a/main/parse.h b/main/parse.h index 8758b09069..2d94ffb92c 100644 --- a/main/parse.h +++ b/main/parse.h @@ -77,6 +77,13 @@ enum scriptHook { SCRIPT_HOOK_MAX, }; +/* --map-=[+|-|]%reguar-expresson%[i] */ +struct rExprSrc { + const char *expr; /* The last element must be NULL. */ + bool iCase; +}; +#define REXPR_LAST_ENTRY { .expr = NULL, } + struct sParserDefinition { /* defined by parser */ char* name; /* name of language */ @@ -104,6 +111,8 @@ struct sParserDefinition { const char *const *extensions; /* list of default extensions */ const char *const *patterns; /* list of default file name patterns */ const char *const *aliases; /* list of default aliases (alternative names) */ + const struct rExprSrc * rexprs; /* list of default file name regex patterns */ + parserInitialize initialize; /* initialization routine, if needed */ parserFinalize finalize; /* finalize routine, if needed */ simpleParser parser; /* simple parser (common case) */ diff --git a/main/parse_p.h b/main/parse_p.h index f26cd6a8af..0e52e687fa 100644 --- a/main/parse_p.h +++ b/main/parse_p.h @@ -31,11 +31,12 @@ * DATA DECLARATIONS */ typedef enum { - LMAP_PATTERN = 1 << 0, - LMAP_EXTENSION = 1 << 1, - LMAP_ALL = LMAP_PATTERN | LMAP_EXTENSION, - LMAP_TABLE_OUTPUT = 1 << 2, - LMAP_NO_LANG_PREFIX = 1 << 3, + LMAP_REXPR = 1 << 0, + LMAP_PATTERN = 1 << 1, + LMAP_EXTENSION = 1 << 2, + LMAP_ALL = LMAP_PATTERN | LMAP_EXTENSION | LMAP_REXPR, + LMAP_TABLE_OUTPUT = 1 << 3, + LMAP_NO_LANG_PREFIX = 1 << 4, } langmapType; enum parserCategory @@ -103,6 +104,9 @@ extern void addLanguageExtensionMap (const langType language, const char* extens extern bool removeLanguagePatternMap (const langType language, const char *const pattern); extern void addLanguagePatternMap (const langType language, const char* ptrn, bool exclusiveInAllLanguages); +extern bool removeLanguageRexprMap (const langType language, const char *const rexpr, bool iCase); +extern void addLanguageRexprMap (const langType language, const char* rexpr, bool iCase, + bool exclusiveInAllLanguages); extern void installLanguageAliasesDefault (const langType language); extern void installLanguageAliasesDefaults (void); diff --git a/main/rexprcode.c b/main/rexprcode.c new file mode 100644 index 0000000000..2e1564ad7e --- /dev/null +++ b/main/rexprcode.c @@ -0,0 +1,98 @@ +/* +* Copyright (c) 2025, Red Hat, Inc. +* Copyright (c) 2025, Masatake YAMATO +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. + +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" + +#include "routines.h" +#include "rexprcode_p.h" + +#include +#include + +/* +* DATA DECLARATIONS +*/ +struct rExprCode { + regex_t *code; + char *src; + bool iCase; +}; + +/* +* FUNCTION DECLARATIONS +*/ +extern const char *rExprCodeGetSource (const struct rExprCode *rxcode) +{ + return rxcode->src; +} + +extern const bool rExprCodeGetICase (const struct rExprCode *rxcode) +{ + return rxcode->iCase; +} + +extern vString *rExprCodeNewEncodedSource (const struct rExprCode *rxcode) +{ + vString *encoded_src = vStringNew(); + + vStringPut (encoded_src, '%'); + + for (const char *c = rExprCodeGetSource (rxcode); *c != '\0'; c++) + { + if (*c == '%') + vStringPut (encoded_src, '\\'); + vStringPut (encoded_src, *c); + } + + vStringPut (encoded_src, '%'); + if (rExprCodeGetICase (rxcode)) + vStringPut (encoded_src, 'i'); + + return encoded_src; +} + +extern struct rExprCode *rExprCodeNew(const char *rxsrc, bool iCase) +{ + regex_t *regex_code = xMalloc (1, regex_t); + int errcode = regcomp (regex_code, rxsrc, + REG_EXTENDED|REG_NOSUB|(iCase? REG_ICASE: 0)); + if (errcode != 0) + { + char errmsg[256]; + regerror (errcode, regex_code, errmsg, 256); + error (WARNING, "regcomp: %s", errmsg); + regfree (regex_code); + eFree (regex_code); + return NULL; + } + + struct rExprCode *rxcode = xMalloc (1, struct rExprCode); + + rxcode->code = regex_code; + rxcode->src = eStrdup (rxsrc); + rxcode->iCase = iCase; + + return rxcode; +} + +extern void rExprCodeDelete (struct rExprCode *rxcode) +{ + regfree (rxcode->code); + eFree (rxcode->code); + eFree (rxcode->src); + eFree (rxcode); +} + +extern bool rExprCodeMatch (struct rExprCode *rxcode, const char *fname) +{ + return (regexec (rxcode->code, fname, strlen(fname), 0, 0) == 0); +} diff --git a/main/rexprcode_p.h b/main/rexprcode_p.h new file mode 100644 index 0000000000..28d996fe94 --- /dev/null +++ b/main/rexprcode_p.h @@ -0,0 +1,36 @@ +/* +* Copyright (c) 2025, Red Hat, Inc. +* Copyright (c) 2025, Masatake YAMATO +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. + +*/ + +#ifndef CTAGS_MAIN_REXPRCODE_H +#define CTAGS_MAIN_REXPRCODE_H + +/* +* INCLUDE FILES +*/ +#include "general.h" + +#include "vstring.h" + +/* +* DATA DECLARATIONS +*/ + +struct rExprCode; + +/* +* FUNCTION DECLARATIONS +*/ +extern const char *rExprCodeGetSource (const struct rExprCode *rxcode); +extern vString *rExprCodeNewEncodedSource (const struct rExprCode *rxcode); +extern const bool rExprCodeGetICase (const struct rExprCode *rxcode); +extern struct rExprCode *rExprCodeNew(const char *rxsrc, bool iCase); +extern void rExprCodeDelete (struct rExprCode *rxcode); +extern bool rExprCodeMatch (struct rExprCode *rxcode, const char *fname); + +#endif /* CTAGS_MAIN_REXPRCODE_H */ diff --git a/misc/optlib2c b/misc/optlib2c index e8e96473ba..0c1fe24217 100755 --- a/misc/optlib2c +++ b/misc/optlib2c @@ -293,7 +293,10 @@ my $options = die "Adding a map is allowed only to the language specified with --langdef: $1" unless ($_[0]->{'langdef'} eq $1); my $spec = $2; - if ($spec =~ /\((.*)\)/) { + if ($spec =~ /%(.+)%(i)?/) { + my $rexpr = { expr => $1, iCase => (defined $2 && $2 eq 'i')? 1: 0 }; + push @{$_[0]->{'rexprs'}}, $rexpr; + } elsif ($spec =~ /\((.*)\)/) { push @{$_[0]->{'patterns'}}, $1; } elsif ($spec =~ /\.(.*)/) { push @{$_[0]->{'extensions'}}, $1; @@ -894,6 +897,31 @@ sub emit_patterns { emit_list $_[0], "patterns"; } +sub emit_rexprs { + my $opts = shift; + + return if (! @{$opts->{'rexprs'}}); + + printf <{'rexprs'}}) { + my $expr = escape_as_cstr ("$_->{'expr'}"); + my $iCase = $_->{'iCase'}? "true": "false"; + printf <{'kinddefs'}}) { next unless @{$_->{'roles'}}; my $Kind = capitalize($_->{'name'}); @@ -1256,6 +1283,13 @@ sub emit_fields_initialization { def->enabled = ${enabled}; def->extensions = extensions; def->patterns = patterns; +EOF + if (@{$opts->{'rexprs'}}) { + print <rexprs = rexprs; +EOF + } + print <aliases = aliases; EOF if (defined $opts->{'selector'}) { @@ -1358,6 +1392,7 @@ EOF emit_extensions $opts; emit_aliases $opts; emit_patterns $opts; + emit_rexprs $opts; emit_roledefs $opts; emit_scopeseps $opts; emit_kinddefs $opts; @@ -1418,6 +1453,7 @@ sub main { disabled => 0, patterns => [], extensions => [], + rexprs => [], aliases => [], regexs => [# { regex => "", name => "", kind => "", flags => "", mline => 1|0, optscript => "" }, ], diff --git a/optlib/rpmMacros.c b/optlib/rpmMacros.c index 1d9c5a4c02..4f280a46a0 100644 --- a/optlib/rpmMacros.c +++ b/optlib/rpmMacros.c @@ -113,6 +113,14 @@ extern parserDefinition* RpmMacrosParser (void) NULL }; + static const struct rExprSrc rexprs [] = { + { + .expr = "(.*/)?macros\\.d/macros\\.([^/]+)$", + .iCase = false, + }, + REXPR_LAST_ENTRY + }; + static kindDefinition RpmMacrosKindTable [] = { { true, 'm', "macro", "macros", @@ -126,6 +134,7 @@ extern parserDefinition* RpmMacrosParser (void) def->enabled = true; def->extensions = extensions; def->patterns = patterns; + def->rexprs = rexprs; def->aliases = aliases; def->method = METHOD_NOT_CRAFTED|METHOD_REGEX; def->useCork = CORK_QUEUE; diff --git a/optlib/rpmMacros.ctags b/optlib/rpmMacros.ctags index 43a3c3f912..80dfe61d2e 100644 --- a/optlib/rpmMacros.ctags +++ b/optlib/rpmMacros.ctags @@ -17,12 +17,7 @@ # --langdef=RpmMacros -# This map is too generic. -# e.g. "macros.h" of C language input matches this pattern. -# --map-RpmMacros=+(macros.*) - -# This one is too general. -# --map-RpmMacros=+(macros) +--map-RpmMacros=+%(.*/)?macros\.d/macros\.([^/]+)$% --kinddef-RpmMacros=m,macro,macros diff --git a/source.mak b/source.mak index 521a2a37a7..bb27a5ccfe 100644 --- a/source.mak +++ b/source.mak @@ -126,6 +126,7 @@ LIB_PRIVATE_HEADS = \ main/promise_p.h \ main/ptag_p.h \ main/read_p.h \ + main/rexprcode_p.h \ main/script_p.h \ main/sort_p.h \ main/stats_p.h \ @@ -174,6 +175,7 @@ LIB_SRCS = \ main/ptag.c \ main/rbtree.c \ main/read.c \ + main/rexprcode.c \ main/script.c \ main/seccomp.c \ main/selectors.c \ diff --git a/win32/ctags_vs2013.vcxproj b/win32/ctags_vs2013.vcxproj index 088e606811..d4e203b6ae 100644 --- a/win32/ctags_vs2013.vcxproj +++ b/win32/ctags_vs2013.vcxproj @@ -217,6 +217,7 @@ + @@ -447,6 +448,7 @@ + diff --git a/win32/ctags_vs2013.vcxproj.filters b/win32/ctags_vs2013.vcxproj.filters index ae70a81c10..11fb40672c 100644 --- a/win32/ctags_vs2013.vcxproj.filters +++ b/win32/ctags_vs2013.vcxproj.filters @@ -174,6 +174,9 @@ Source Files\main + + Source Files\main + Source Files\main @@ -860,6 +863,9 @@ Header Files + + Header Files + Header Files