Skip to content

Commit 653958b

Browse files
committed
add PATH command line argument
fix #1
1 parent ec4fee9 commit 653958b

File tree

2 files changed

+50
-14
lines changed

2 files changed

+50
-14
lines changed

man/ugrep-indexer.1

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.TH UGREP-INDEXER "1" "August 09, 2023" "ugrep-indexer 0.9" "User Commands"
1+
.TH UGREP-INDEXER "1" "August 12, 2023" "ugrep-indexer 0.9.1" "User Commands"
22
.SH NAME
33
\fBugrep-indexer\fR -- file indexer for accelerated ugrep search
44
.SH SYNOPSIS
@@ -8,6 +8,10 @@ The \fBugrep-indexer\fR utility recursively indexes files to accelerate ugrep
88
recursive searches with \fBugrep\fR option \fB--index\fR.
99
.PP
1010
The following options are available:
11+
Usage:
12+
ugrep\-indexer [\fB\-0\fR|...|\fB\-9\fR] [\fB\-.\fR] [\fB\-c\fR|\fB\-d\fR|\fB\-f\fR] [\fB\-I\fR] [\fB\-q\fR] [\fB\-S\fR] [\fB\-s\fR] [\fB\-X\fR] [\fB\-z\fR] [\fIPATH\fR]
13+
.TP
14+
PATH Optional pathname to the root of the directory tree to index.
1115
.TP
1216
\fB\-0\fR, \fB\-1\fR, \fB\-2\fR, \fB\-3\fR, ..., \fB\-9\fR, \fB\-\-accuracy\fR=\fIDIGIT\fR
1317
Specifies indexing accuracy. A low accuracy reduces the indexing
@@ -59,7 +63,7 @@ encountered during indexing. The default FILE is `.gitignore'.
5963
\fB\-z\fR, \fB\-\-decompress\fR
6064
Index the contents of compressed files and archives.
6165
This option is not yet available in this version.
62-
ugrep\-indexer 0.9 beta
66+
ugrep\-indexer 0.9.1 beta
6367
License BSD\-3\-Clause: <https://opensource.org/licenses/BSD\-3\-Clause>
6468
Written by Robert van Engelen and others: <https://github.com/Genivia/ugrep>
6569
.SH "EXIT STATUS"

src/ugrep-indexer.cpp

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
@copyright (c) BSD-3 License - see LICENSE.txt
3535
*/
3636

37-
#define UGREP_INDEXER_VERSION "0.9 beta"
37+
#define UGREP_INDEXER_VERSION "0.9.1 beta"
3838

3939
// check if we are compiling for a windows OS, but not Cygwin or MinGW
4040
#if (defined(__WIN32__) || defined(_WIN32) || defined(WIN32) || defined(__BORLANDC__)) && !defined(__CYGWIN__) && !defined(__MINGW32__) && !defined(__MINGW64__)
@@ -104,14 +104,23 @@
104104
#include <vector>
105105
#include <stack>
106106

107+
// number of bytes to gulp into the buffer to index a file
107108
#define BUF_SIZE 65536
109+
110+
// smallest possible power-of-two size of an index of a file, shoud be > 61
108111
#define MIN_SIZE 128
109112

113+
// default --ignore-files=FILE argument
110114
#define DEFAULT_IGNORE_FILE ".gitignore"
111115

116+
// fixed constant strings
112117
const char ugrep_index_filename[] = "._UG#_Store";
113118
const char ugrep_index_file_magic[5] = "UG#\x03";
114119

120+
// command-line optional PATH argument
121+
const char *arg_pathname = NULL;
122+
123+
// command-line options
115124
int flag_accuracy = 6;
116125
bool flag_check = false;
117126
bool flag_decompress = false;
@@ -131,20 +140,26 @@ struct Ignore {
131140
std::vector<std::string> dirs;
132141
};
133142

134-
// stack of ignore files/dirs
143+
// stack of ignore file/dir globs per ignore-file found
135144
std::stack<Ignore> ignore_stack;
136145

137146
// entry data extracted from directory contents, moves pathname to this entry
138147
struct Entry {
139148

149+
// indexing is initiated with the pathname to the root of the directory to index
140150
Entry(const char *pathname = ".")
141151
:
142152
pathname(pathname), // the working dir by default
143153
base(0),
144154
mtime(~0ULL), // max time to make sure we check the working directory for updates
145155
size(0)
146-
{ }
156+
{
157+
const char *sep = strrchr(pathname, PATHSEPCHR);
158+
if (sep != NULL)
159+
base = strlen(sep) - 1;
160+
}
147161

162+
// new pathname entry, note this moves the pathname to the entry that owns it now
148163
Entry(std::string& pathname, size_t base, uint64_t mtime, off_t size)
149164
:
150165
pathname(std::move(pathname)),
@@ -175,7 +190,7 @@ struct Entry {
175190
}
176191

177192
std::string pathname; // full pathname
178-
size_t base; // size of the basename in the pathname
193+
size_t base; // length of the basename in the pathname
179194
uint64_t mtime; // modification time
180195
off_t size; // file size
181196

@@ -193,7 +208,8 @@ void version()
193208
// display a help message and exit
194209
void help()
195210
{
196-
std::cout << "Usage: ugrep-indexer [-0|...|-9] [-.] [-c|-d|-f] [-I] [-q] [-S] [-s] [-X] [-z]\n\n\
211+
std::cout << "\nUsage:\n\nugrep-indexer [-0|...|-9] [-.] [-c|-d|-f] [-I] [-q] [-S] [-s] [-X] [-z] [PATH]\n\n\
212+
PATH Optional pathname to the root of the directory tree to index.\n\n\
197213
-0, -1, -2, -3, ..., -9, --accuracy=DIGIT\n\
198214
Specifies indexing accuracy. A low accuracy reduces the indexing\n\
199215
storage overhead at the cost of a higher rate of false positive\n\
@@ -392,6 +408,14 @@ void options(int argc, const char **argv)
392408
}
393409
}
394410
}
411+
else if (arg_pathname == NULL)
412+
{
413+
arg_pathname = arg;
414+
}
415+
else
416+
{
417+
usage("argument PATH already specified as ", arg_pathname);
418+
}
395419
}
396420

397421
if (flag_check)
@@ -408,7 +432,6 @@ inline int fopenw_s(FILE **file, const char *filename, const char *mode)
408432
#if defined(HAVE_F_RDAHEAD)
409433
if (strchr(mode, 'a') == NULL && strchr(mode, 'w') == NULL)
410434
{
411-
// removed O_NOATIME which may fail
412435
#if defined(O_NOCTTY)
413436
int fd = open(filename, O_RDONLY | O_NOCTTY);
414437
#else
@@ -874,7 +897,7 @@ void cat(const std::string& pathname, std::stack<Entry>& dir_entries, std::vecto
874897
}
875898

876899
// recursively delete index files
877-
void deleter()
900+
void deleter(const char *pathname)
878901
{
879902
flag_no_messages = true;
880903

@@ -891,7 +914,11 @@ void deleter()
891914
uint64_t index_time;
892915
uint64_t last_time;
893916

894-
dir_entries.emplace();
917+
// pathname to the directory tree to index or .
918+
if (pathname == NULL)
919+
dir_entries.emplace();
920+
else
921+
dir_entries.emplace(pathname);
895922

896923
// recurse subdirectories breadth-first to remove index files
897924
while (!dir_entries.empty())
@@ -901,6 +928,7 @@ void deleter()
901928

902929
cat(visit.pathname, dir_entries, file_entries, num_dirs, num_links, num_other, ign_dirs, ign_files, index_time, last_time, true);
903930

931+
// if index time is nonzero, there is a valid index file in this directory we should remove
904932
if (index_time > 0)
905933
{
906934
index_filename.assign(visit.pathname).append(PATHSEPSTR).append(ugrep_index_filename);
@@ -910,7 +938,7 @@ void deleter()
910938
}
911939

912940
// recursively index files
913-
void indexer()
941+
void indexer(const char *pathname)
914942
{
915943
std::stack<Entry> dir_entries;
916944
std::vector<Entry> file_entries;
@@ -933,7 +961,11 @@ void indexer()
933961
float sum_noise = 0;
934962
uint8_t hashes[65536];
935963

936-
dir_entries.emplace();
964+
// pathname to the directory tree to index or .
965+
if (pathname == NULL)
966+
dir_entries.emplace();
967+
else
968+
dir_entries.emplace(pathname);
937969

938970
// recurse subdirectories
939971
while (!dir_entries.empty())
@@ -1209,9 +1241,9 @@ int main(int argc, const char **argv)
12091241
options(argc, argv);
12101242

12111243
if (flag_delete)
1212-
deleter();
1244+
deleter(arg_pathname);
12131245
else
1214-
indexer();
1246+
indexer(arg_pathname);
12151247

12161248
return EXIT_SUCCESS;
12171249
}

0 commit comments

Comments
 (0)