-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathFileSearcherAsync.cs
More file actions
236 lines (209 loc) · 10.1 KB
/
FileSearcherAsync.cs
File metadata and controls
236 lines (209 loc) · 10.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace SimpleFileSearch
{
/// <summary>
/// Provides functionality for searching files by name and optionally searching for text within files.
/// The search can be case-sensitive and/or accent-insensitive.
/// </summary>
public class FileSearcherAsync
{
/// <summary>
/// Searches for files in the specified directory based on the given filename patterns.
/// Optionally, it searches for specific text within the found files.
/// </summary>
/// <param name="directoryPath">The root directory to search.</param>
/// <param name="filenamePatterns">File name patterns separated by ';' (e.g., "*.txt;*.log").</param>
/// <param name="searchText">Text to search for inside the files (optional).</param>
/// <param name="isCaseSensitive">Determines if text search should be case-sensitive.</param>
/// <param name="ignoreAccents">Determines if accentuation should be ignored in the search.</param>
/// <param name="statusCallback">Callback function to report status updates.</param>
/// <returns>A list of matching <see cref="FileInfo"/> objects.</returns>
public async Task<List<FileInfo>> SearchFilesAsync(string directoryPath, string filenamePatterns, string searchText, bool isCaseSensitive, bool ignoreAccents, Action<string> statusCallback)
{
List<FileInfo> matchingFiles = new List<FileInfo>();
bool searchInsideFiles = !string.IsNullOrEmpty(searchText);
try
{
// Split filename patterns by ';' and remove empty entries
string[] patterns = filenamePatterns.Split(';', StringSplitOptions.RemoveEmptyEntries);
HashSet<string> allFiles = new HashSet<string>();
// Collect all matching files based on patterns
foreach (string pattern in patterns)
{
try
{
foreach (string file in Directory.GetFiles(directoryPath, pattern, SearchOption.AllDirectories))
{
allFiles.Add(file);
}
}
catch (UnauthorizedAccessException) { /* Skip folders without permission */ }
catch (DirectoryNotFoundException) { /* Skip missing folders */ }
}
// Initial status update
statusCallback?.Invoke("Verifying files...");
int totalFiles = allFiles.Count;
int processedCount = 0;
long lastUpdateTime = Environment.TickCount64;
const long updateIntervalMs = 500; // UI update interval in milliseconds
// Iterate through all files found
foreach (string filePath in allFiles)
{
processedCount++;
// Check if the file contains the search text (if applicable)
if (searchInsideFiles)
{
if (await ContainsTextInFileAsync(filePath, searchText, isCaseSensitive, ignoreAccents))
{
matchingFiles.Add(new FileInfo(filePath));
}
}
else
{
matchingFiles.Add(new FileInfo(filePath));
}
// Limit UI updates to avoid excessive updates
long currentTime = Environment.TickCount64;
if (currentTime - lastUpdateTime >= updateIntervalMs)
{
statusCallback?.Invoke($"Verifying files, {processedCount} of {totalFiles} ...");
lastUpdateTime = currentTime;
}
}
}
catch (Exception ex)
{
Console.WriteLine($"Error: {ex.Message}");
}
return matchingFiles;
}
/// <summary>
/// Asynchronously checks if a given text exists within a file.
/// </summary>
/// <param name="filePath">The file to search.</param>
/// <param name="searchText">The text to find.</param>
/// <param name="isCaseSensitive">Determines if the search should be case-sensitive.</param>
/// <param name="ignoreAccents">Determines if accentuation should be ignored.</param>
/// <returns>True if the text is found, otherwise false.</returns>
private async Task<bool> ContainsTextInFileAsync(string filePath, string searchText, bool isCaseSensitive, bool ignoreAccents)
{
try
{
// Set comparison type
StringComparison comparisonType = isCaseSensitive ? StringComparison.InvariantCulture : StringComparison.InvariantCultureIgnoreCase;
using StreamReader reader = new StreamReader(filePath);
List<string> buffer = new List<string>();
// Read file line by line asynchronously
while (!reader.EndOfStream)
{
string line = await reader.ReadLineAsync();
if (line != null)
{
buffer.Add(line); // Defer processing to parallel execution
// Process batch when buffer reaches CPU core count
if (buffer.Count >= Environment.ProcessorCount)
{
if (await CheckLinesInParallel(buffer, searchText, comparisonType, ignoreAccents))
return true;
buffer.Clear();
}
}
}
// Process remaining lines
return await CheckLinesInParallel(buffer, searchText, comparisonType, ignoreAccents);
}
catch (IOException) { /* Ignore file access errors */ }
catch (UnauthorizedAccessException) { /* Ignore permission errors */ }
return false;
}
///// <summary>
///// Checks if a given text exists within a file.
///// </summary>
///// <param name="filePath">The file to search.</param>
///// <param name="searchText">The text to find.</param>
///// <param name="isCaseSensitive">Determines if the search should be case-sensitive.</param>
///// <param name="ignoreAccents">Determines if accentuation should be ignored.</param>
///// <returns>True if the text is found, otherwise false.</returns>
//private bool ContainsTextInFile(string filePath, string searchText, bool isCaseSensitive, bool ignoreAccents)
//{
// try
// {
// StringComparison comparisonType = isCaseSensitive
// ? StringComparison.InvariantCulture
// : StringComparison.InvariantCultureIgnoreCase;
// // Normalize the search text if ignoring accents
// if (ignoreAccents)
// {
// searchText = RemoveDiacritics(searchText);
// }
// using StreamReader reader = new StreamReader(filePath);
// while (!reader.EndOfStream)
// {
// string line = reader.ReadLine();
// if (line != null)
// {
// string lineToCompare = ignoreAccents ? RemoveDiacritics(line) : line;
// if (lineToCompare.Contains(searchText, comparisonType))
// {
// return true;
// }
// }
// }
// }
// catch (IOException) { /* Ignore file access errors */ }
// catch (UnauthorizedAccessException) { /* Ignore permission errors */ }
// return false;
//}
/// <summary>
/// Performs parallel text comparison on a batch of lines.
/// </summary>
/// <param name="lines">List of lines to process.</param>
/// <param name="searchText">Text to search for.</param>
/// <param name="comparisonType">Case-sensitive or case-insensitive comparison.</param>
/// <param name="ignoreAccents">Determines if accentuation should be ignored.</param>
/// <returns>True if a match is found, otherwise false.</returns>
private Task<bool> CheckLinesInParallel(List<string> lines, string searchText, StringComparison comparisonType, bool ignoreAccents)
{
return Task.Run(() =>
{
return Parallel.ForEach(lines, (line, state) =>
{
// Convert line inside parallel loop to optimize performance
string lineToCompare = ignoreAccents ? RemoveDiacritics(line) : line;
if (lineToCompare.Contains(searchText, comparisonType))
{
state.Break();
}
}).IsCompleted == false;
});
}
/// <summary>
/// Removes diacritics (accents) from a string to enable accent-insensitive searches.
/// </summary>
/// <param name="text">The input string.</param>
/// <returns>The normalized string without diacritics.</returns>
private string RemoveDiacritics(string text)
{
if (string.IsNullOrEmpty(text))
return text;
// Normalize to FormD (decomposed) and remove non-spacing marks
try
{
return string.Concat(text
.Normalize(NormalizationForm.FormD)
.Where(c => CharUnicodeInfo.GetUnicodeCategory(c) != UnicodeCategory.NonSpacingMark))
.Normalize(NormalizationForm.FormC); // Normalize back to FormC
}
catch (Exception)
{
return text;
}
}
}
}