diff --git a/.github/workflows/bearer.yaml b/.github/workflows/bearer.yaml index 6a2bedd..9e28b32 100644 --- a/.github/workflows/bearer.yaml +++ b/.github/workflows/bearer.yaml @@ -18,3 +18,5 @@ jobs: - name: Bearer uses: bearer/bearer-action@v2 + with: + skip-rule: "java_lang_information_leakage" diff --git a/.github/workflows/spellcheck.yaml b/.github/workflows/spellcheck.yaml new file mode 100644 index 0000000..c05c231 --- /dev/null +++ b/.github/workflows/spellcheck.yaml @@ -0,0 +1,19 @@ +name: spellcheck + +on: + pull_request: + branches: [main] + +permissions: + contents: read + +jobs: + spellcheck: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - uses: streetsidesoftware/cspell-action@v7 + with: + config: .vscode/cspell.json diff --git a/.gitignore b/.gitignore index a0f652a..b3aedc6 100644 --- a/.gitignore +++ b/.gitignore @@ -129,8 +129,10 @@ dmypy.json .pyre/ .history -# VSCode -.vscode/ +# Visual Studio code +.vscode/* +!.vscode/cspell.json +*.code-workspace .history # Output folder used by examples diff --git a/.vscode/cspell.json b/.vscode/cspell.json new file mode 100644 index 0000000..27a511c --- /dev/null +++ b/.vscode/cspell.json @@ -0,0 +1,89 @@ +{ + // Version of the setting file. Always 0.2 + "version": "0.2", + // language - current active spelling language + "language": "en", + // words - list of words to be always considered correct + "words": [ + "accessibilities", + "analysing", + "Axxxx", + "CCLA", + "cfgp", + "codehaus", + "CODEOWNER", + "configmanager", + "configmgr", + "configparser", + "CONFIGPATH", + "Dorg", + "Dsenzing", + "DYLD", + "findsecbugs", + "fpvs", + "glassfish", + "ICLA", + "ipascalcase", + "isort", + "javax", + "jdbc", + "kwargs", + "lukehinds", + "Makio", + "mypy", + "mysenzproj", + "optionxform", + "pascalcase", + "Pcheckstyle", + "pushback", + "pydevproject", + "pylint", + "pypa", + "pytest", + "RESOURCEPATH", + "Retryable", + "roslynator", + "Rotorville", + "schemaname", + "Senzing", + "senzingsdk", + "setuptools", + "shellcheck", + "shuf", + "signum", + "spotbugs", + "stackoverflow", + "SUPPORTPATH", + "szconfig", + "szconfigmanager", + "szcore", + "szengineflags", + "taskkill", + "temurin", + "terashuf", + "tpascalcase", + "truthset", + "unresolve", + "unresolving", + "USERPROFILE", + "venv", + "watchlist", + "WATCHLIST", + "xerial", + "Xlint", + "xstream", + "Yamanaka", + "YESPURGESENZING" + ], + "ignorePaths": [ + ".git/**", + ".gitignore", + ".mypy_cache/**", + "csharp/runner/**/bin/**", + "csharp/runner/**/obj/**", + "csharp/snippets/**/bin/**", + "csharp/snippets/**/obj/**", + "java/target/**", + "resources/data/**" + ] +} diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index b037f46..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true, - "terminal.integrated.env.linux": { - "PYTHONPATH": ".:/home/ant/Work/Senzing/git/sz-sdk-python/src:/home/ant/Work/Senzing/git/sz-sdk-python/src/senzing:/home/ant/Work/Senzing/git/sz-sdk-python-core/src:/home/ant/Work/Senzing/git/sz-sdk-python-core/src/senzing", - // "PYTHONPATH": ".:/home/ant/Work/Senzing/git/sz-sdk-python-core/src:/home/ant/Work/Senzing/git/sz-sdk-python-core/src/senzing:./sz_tools", - "LD_LIBRARY_PATH": "/opt/senzing/er/lib/", - // "SENZING_ENGINE_CONFIGURATION_JSON": "{\"PIPELINE\":{\"CONFIGPATH\":\"/etc/opt/senzing\",\"RESOURCEPATH\":\"/opt/senzing/er/resources\",\"SUPPORTPATH\":\"/opt/senzing/data\"},\"SQL\":{\"CONNECTION\":\"sqlite3://na:na@/tmp/sqlite/G2C.db\"}}" - "SENZING_ENGINE_CONFIGURATION_JSON": "{\"PIPELINE\":{\"CONFIGPATH\":\"/etc/opt/senzing\",\"RESOURCEPATH\":\"/opt/senzing/er/resources\",\"SUPPORTPATH\":\"/opt/senzing/data\"},\"SQL\":{\"CONNECTION\":\"sqlite3://na:na@/tmp/sqlite/G2C.db\"}, \"LOGGING\":{\"CONFIG\":\"console://stdout/?style=jsonl *.TRCE;*.CRIT;*.ERR\"}}" - // "SENZING_ENGINE_CONFIGURATION_JSON": "{\"PIPELINE\":{\"CONFIGPATH\":\"/etc/opt/senzing\",\"LICENSESTRINGBASE64\": \"AQAAADgCAAAAAAAAU2VuemluZyBJbnRlcm5hbAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAU2VuemluZyBJbnRlcm5hbAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADIwMjQtMDUtMDIAAAAAAAAAAAAARVZBTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFNUQU5EQVJEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC4mH8AAEBCDwAAAAAAMjAyNS0wNS0wMgAAAAAAAAAAAABZRUFSTFkAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFXwDOdVd1TL+0dJRXnE9ykJJyJYnGhUN1QqoS8ASfNaDioankisRviWuB3I5uQ20EEh9tjNzzOszGf1+khWl5cb+XqE+GoMMW0rrSi6ScZmgrfh2oHrRpEbnfb4uejMrl3XGdTPdHUGNSkTKDgEQrlimVt04W5gsFVcBHBiUbKoZCghI+qaYGocsNZLh1yWOklu8Fh02CWkXXQZSKvq/PsXpkHtbsxPbActcMmZRYPZNiRXq0BK3ChyCRM0zbl4mZCPBfNL9zAx6v2HLUmDp4lNEVIyS86T9/enSrsK1udnJq09jnP8gBzY6kBxpoYyxr5o2u1VX3DC9ySHiwtio6NQMo0ckGultNqYpSBejXm10YCYH6eCsnnC5z49Gp+2NYIRcgRz/N93uLd7PrkLyLreayF8HCQOg7CBZeUGcFsufdf0304eJHCsoRy1w2dUT8N2auYJxuzjwzAMvZIYrYamjiG6Mc4Wdcpuktlcht+pjhqk9vwqQI0AzjMq2oXDGYL6KlFcOAojAIZu8bl30pZGGkq2n9NFuuO4gMiRjIwYkBpwHNmBq3QT21owPb4urlidmQelmXtzk9+BNMZL34bUK7R509Rt3GTmjb2c5TDqyIatGfnBsh3658ce8ohnBJ/ZmUgJifcorgLDawDqr8spClKfwLtcwzbkNPDKHJ/e\",\"RESOURCEPATH\":\"/opt/senzing/g2/resources\",\"SUPPORTPATH\":\"/opt/senzing/data\"},\"SQL\":{\"CONNECTION\":\"sqlite3://na:na@/tmp/sqlite/G2C.db\"}}" - }, - "python.testing.pytestArgs": [], - "python.autoComplete.extraPaths": [ - "/home/ant/Work/Senzing/git/sz-sdk-python-core/src", - "/home/ant/Work/Senzing/git/sz-sdk-python-core/src/senzing", - "/home/ant/Work/Senzing/git/sz-sdk-python/src", - "/home/ant/Work/Senzing/git/sz-sdk-python/src/senzing" - ], - "python.analysis.extraPaths": [ - "/home/ant/Work/Senzing/git/sz-sdk-python-core/src", - "/home/ant/Work/Senzing/git/sz-sdk-python-core/src/senzing", - "/home/ant/Work/Senzing/git/sz-sdk-python/src", - "/home/ant/Work/Senzing/git/sz-sdk-python/src/senzing" - ], - "pylint.importStrategy": "useBundled", - "java.configuration.updateBuildConfiguration": "automatic", - "java.project.sourcePaths": [ - "." - ], - "java.project.referencedLibraries": [], - "java.format.settings.url": ".vscode/java-formatter.xml", -} \ No newline at end of file diff --git a/csharp/runner/SnippetRunner/Program.cs b/csharp/runner/SnippetRunner/Program.cs index 9246a8e..ec7f529 100644 --- a/csharp/runner/SnippetRunner/Program.cs +++ b/csharp/runner/SnippetRunner/Program.cs @@ -40,448 +40,448 @@ DirectoryInfo? runnerDir = null; switch (dir.Name) { - case "snippets": - snippetDir = dir; - break; - case "runner": - runnerDir = dir; - break; - case "csharp": - csharpDir = dir; - break; - default: - HandleWrongDirectory(); - break; + case "snippets": + snippetDir = dir; + break; + case "runner": + runnerDir = dir; + break; + case "csharp": + csharpDir = dir; + break; + default: + HandleWrongDirectory(); + break; } // if no snippet dir, try to find the csharp dir from the runner dir if (snippetDir == null && runnerDir != null) { - csharpDir = Directory.GetParent(runnerDir.FullName); - if (!"csharp".Equals(csharpDir?.Name, Ordinal)) - { - HandleWrongDirectory(); - } + csharpDir = Directory.GetParent(runnerDir.FullName); + if (!"csharp".Equals(csharpDir?.Name, Ordinal)) + { + HandleWrongDirectory(); + } } // if no snippet dir, try to find it using the csharp dir if (snippetDir == null && csharpDir != null) { - snippetDir = new DirectoryInfo(Path.Combine(csharpDir.FullName, "snippets")); - if (!snippetDir.Exists) - { - HandleWrongDirectory(); - } + snippetDir = new DirectoryInfo(Path.Combine(csharpDir.FullName, "snippets")); + if (!snippetDir.Exists) + { + HandleWrongDirectory(); + } } if (snippetDir == null) { - HandleWrongDirectory(); - Environment.Exit(1); - return; + HandleWrongDirectory(); + Environment.Exit(1); + return; } try { - SortedDictionary> snippetsMap - = GetSnippetsMap(snippetDir); - - SortedDictionary> snippetOptions - = new SortedDictionary>(); - foreach (KeyValuePair> entry in snippetsMap) + SortedDictionary> snippetsMap + = GetSnippetsMap(snippetDir); + + SortedDictionary> snippetOptions + = new SortedDictionary>(); + foreach (KeyValuePair> entry in snippetsMap) + { + string group = entry.Key; + IDictionary snippetMap = entry.Value; + List<(string, string, string)> tuples + = new List<(string, string, string)>(snippetMap.Count); + + foreach (KeyValuePair subEntry in snippetMap) { - string group = entry.Key; - IDictionary snippetMap = entry.Value; - List<(string, string, string)> tuples - = new List<(string, string, string)>(snippetMap.Count); - - foreach (KeyValuePair subEntry in snippetMap) - { - string snippet = subEntry.Key; - string snippetPath = subEntry.Value; - tuples.Add((group, snippet, snippetPath)); - } - snippetOptions.Add(group, tuples.AsReadOnly()); + string snippet = subEntry.Key; + string snippetPath = subEntry.Value; + tuples.Add((group, snippet, snippetPath)); } - - foreach (KeyValuePair> entry in snippetsMap) + snippetOptions.Add(group, tuples.AsReadOnly()); + } + + foreach (KeyValuePair> entry in snippetsMap) + { + string group = entry.Key; + IDictionary snippetMap = entry.Value; + foreach (KeyValuePair subEntry in snippetMap) { - string group = entry.Key; - IDictionary snippetMap = entry.Value; - foreach (KeyValuePair subEntry in snippetMap) - { - string snippet = subEntry.Key; - string snippetPath = subEntry.Value; - List<(string, string, string)> tuples = new List<(string, string, string)>(1); - tuples.Add((group, snippet, snippetPath)); - snippetOptions.Add(snippet, tuples.AsReadOnly()); - } + string snippet = subEntry.Key; + string snippetPath = subEntry.Value; + List<(string, string, string)> tuples = new List<(string, string, string)>(1); + tuples.Add((group, snippet, snippetPath)); + snippetOptions.Add(snippet, tuples.AsReadOnly()); } + } - if (args.Length == 0) - { - PrintUsage(snippetsMap); - Environment.Exit(1); - } - - // check for settings in the environment - string? settings - = Environment.GetEnvironmentVariable("SENZING_ENGINE_CONFIGURATION_JSON"); + if (args.Length == 0) + { + PrintUsage(snippetsMap); + Environment.Exit(1); + } - // validate the settings if we have them - if (settings != null) - { - settings = settings.Trim(); - JsonObject? settingsJson = null; - try - { - settingsJson = JsonNode.Parse(settings)?.AsObject(); - if (settingsJson == null) - { - throw new ArgumentNullException("Setting must be a JSON object: " + settings); - } - } - catch (Exception e) - { - Console.Error.WriteLine(e); - Console.Error.WriteLine("The provided Senzing settings were not valid JSON:"); - Console.Error.WriteLine(); - Environment.Exit(1); - throw; - } - } + // check for settings in the environment + string? settings + = Environment.GetEnvironmentVariable("SENZING_ENGINE_CONFIGURATION_JSON"); - // validate the SENZING_DIR - InstallLocations? installLocations = null; + // validate the settings if we have them + if (settings != null) + { + settings = settings.Trim(); + JsonObject? settingsJson = null; try { - installLocations = InstallLocations.FindLocations(); - + settingsJson = JsonNode.Parse(settings)?.AsObject(); + if (settingsJson == null) + { + throw new ArgumentNullException("Setting must be a JSON object: " + settings); + } } catch (Exception e) { - Console.Error.WriteLine(e); - Environment.Exit(1); - throw; - } - if (installLocations == null) - { - Console.Error.WriteLine("Could not find the Senzing installation."); - Console.Error.WriteLine("Try setting the SENZING_DIR environment variable."); - Environment.Exit(1); - return; + Console.Error.WriteLine(e); + Console.Error.WriteLine("The provided Senzing settings were not valid JSON:"); + Console.Error.WriteLine(); + Environment.Exit(1); + throw; } + } + + // validate the SENZING_DIR + InstallLocations? installLocations = null; + try + { + installLocations = InstallLocations.FindLocations(); - List<(string, string)> snippets = new List<(string, string)>(100); - for (int index = 0; index < args.Length; index++) + } + catch (Exception e) + { + Console.Error.WriteLine(e); + Environment.Exit(1); + throw; + } + if (installLocations == null) + { + Console.Error.WriteLine("Could not find the Senzing installation."); + Console.Error.WriteLine("Try setting the SENZING_DIR environment variable."); + Environment.Exit(1); + return; + } + + List<(string, string)> snippets = new List<(string, string)>(100); + for (int index = 0; index < args.Length; index++) + { + string arg = args[index]; + if (arg.Equals("all", Ordinal)) { - string arg = args[index]; - if (arg.Equals("all", Ordinal)) - { - foreach (IDictionary snippetMap in snippetsMap.Values) - { - foreach (KeyValuePair entry in snippetMap) - { - string snippet = entry.Key; - string snippetPath = entry.Value; - if (!snippets.Contains((snippet, snippetPath))) - { - snippets.Add((snippet, snippetPath)); - } - } - } - continue; - } - if (!snippetOptions.ContainsKey(arg)) + foreach (IDictionary snippetMap in snippetsMap.Values) + { + foreach (KeyValuePair entry in snippetMap) { - Console.Error.WriteLine("Unrecognized code snippet or snippet group: " + arg); - Environment.Exit(1); - } - IList<(string, string, string)> tuples = snippetOptions[arg]; - foreach ((string group, string snippet, string path) in tuples) - { - if (!snippets.Contains((snippet, path))) - { - snippets.Add((snippet, path)); - } + string snippet = entry.Key; + string snippetPath = entry.Value; + if (!snippets.Contains((snippet, snippetPath))) + { + snippets.Add((snippet, snippetPath)); + } } + } + continue; } - - // check if we do not have settings and if not setup a temporary repository - if (settings == null) + if (!snippetOptions.ContainsKey(arg)) { - settings = SetupTempRepository(installLocations); + Console.Error.WriteLine("Unrecognized code snippet or snippet group: " + arg); + Environment.Exit(1); } - - long defaultConfigID; - - SzEnvironment env = SzCoreEnvironment.NewBuilder().Settings(settings).Build(); - try + IList<(string, string, string)> tuples = snippetOptions[arg]; + foreach ((string group, string snippet, string path) in tuples) { - SzConfigManager configMgr = env.GetConfigManager(); - defaultConfigID = configMgr.GetDefaultConfigID(); - + if (!snippets.Contains((snippet, path))) + { + snippets.Add((snippet, path)); + } } - catch (SzException e) - { - Console.Error.WriteLine(e); - Environment.Exit(1); - return; + } - } - finally - { - env.Destroy(); - } + // check if we do not have settings and if not setup a temporary repository + if (settings == null) + { + settings = SetupTempRepository(installLocations); + } - foreach ((string snippet, string snippetPath) in snippets) - { - Console.WriteLine(); - Stopwatch stopwatch = Stopwatch.StartNew(); - Dictionary properties = new Dictionary(); - string resourceName = $"""{assemblyName}.Resources.{snippet}.properties"""; - LoadProperties(properties, resourceName); - Console.WriteLine("Preparing repository for " + snippet + "..."); - env = SzCoreEnvironment.NewBuilder().Settings(settings).Build(); - try - { - // first purge the repository - SzDiagnostic diagnostic = env.GetDiagnostic(); - diagnostic.PurgeRepository(); - - // now set the configuration - SzConfigManager configMgr = env.GetConfigManager(); - // check if we need to configure sources - if (properties.ContainsKey(SourceKeyPrefix + 0)) - { - SzConfig config = configMgr.CreateConfig(); - for (int index = 0; - properties.ContainsKey(SourceKeyPrefix + index); - index++) - { - string sourceKey = SourceKeyPrefix + index; - string source = properties[sourceKey]; - source = source.Trim(); - Console.WriteLine("Adding data source: " + source); - config.AddDataSource(source); - } - string snippetConfig = config.Export(); + long defaultConfigID; - // register the config - configMgr.SetDefaultConfig(snippetConfig); - } - else - { - // set the default config to the initial default - configMgr.SetDefaultConfigID(defaultConfigID); - } + SzEnvironment env = SzCoreEnvironment.NewBuilder().Settings(settings).Build(); + try + { + SzConfigManager configMgr = env.GetConfigManager(); + defaultConfigID = configMgr.GetDefaultConfigID(); - // check if there are files we need to load - if (properties.ContainsKey(LoadKeyPrefix + 0)) - { - SzEngine engine = env.GetEngine(); - for (int index = 0; properties.ContainsKey(LoadKeyPrefix + index); index++) - { - string loadKey = LoadKeyPrefix + index; - string fileName = properties[loadKey]; - fileName = fileName.Trim(); - Console.WriteLine("Loading records from file resource: " + fileName); - Stream? stream = assembly.GetManifestResourceStream(fileName); - if (stream == null) - { - throw new ArgumentException( - "Missing resource (" + fileName + ") for load file (" - + loadKey + ") for snippet (" + snippet + ")"); - } - StreamReader rdr = new StreamReader(stream, Encoding.UTF8); - try - { - for (string? line = rdr.ReadLine(); line != null; line = rdr.ReadLine()) - { - line = line.Trim(); - if (line.Length == 0) continue; - if (line.StartsWith('#')) continue; - JsonObject? record = JsonNode.Parse(line)?.AsObject(); - if (record == null) - { - throw new JsonException("Failed to parse line as JSON: " + line); - } - string dataSource = record.ContainsKey(DataSource) - ? record[DataSource]?.GetValue() ?? TestSource : TestSource; - string? recordID = record.ContainsKey(RecordID) - ? record[RecordID]?.GetValue() : null; - engine.AddRecord(dataSource, recordID, line, SzNoFlags); - } - } - finally - { - rdr.Close(); - stream.Close(); - } + } + catch (SzException e) + { + Console.Error.WriteLine(e); + Environment.Exit(1); + return; - } - } + } + finally + { + env.Destroy(); + } - } - catch (SzException e) + foreach ((string snippet, string snippetPath) in snippets) + { + Console.WriteLine(); + Stopwatch stopwatch = Stopwatch.StartNew(); + Dictionary properties = new Dictionary(); + string resourceName = $"""{assemblyName}.Resources.{snippet}.properties"""; + LoadProperties(properties, resourceName); + Console.WriteLine("Preparing repository for " + snippet + "..."); + env = SzCoreEnvironment.NewBuilder().Settings(settings).Build(); + try + { + // first purge the repository + SzDiagnostic diagnostic = env.GetDiagnostic(); + diagnostic.PurgeRepository(); + + // now set the configuration + SzConfigManager configMgr = env.GetConfigManager(); + // check if we need to configure sources + if (properties.ContainsKey(SourceKeyPrefix + 0)) + { + SzConfig config = configMgr.CreateConfig(); + for (int index = 0; + properties.ContainsKey(SourceKeyPrefix + index); + index++) { - Console.Error.WriteLine(e); - Environment.Exit(1); - return; + string sourceKey = SourceKeyPrefix + index; + string source = properties[sourceKey]; + source = source.Trim(); + Console.WriteLine("Adding data source: " + source); + config.AddDataSource(source); } - finally + string snippetConfig = config.Export(); + + // register the config + configMgr.SetDefaultConfig(snippetConfig); + } + else + { + // set the default config to the initial default + configMgr.SetDefaultConfigID(defaultConfigID); + } + + // check if there are files we need to load + if (properties.ContainsKey(LoadKeyPrefix + 0)) + { + SzEngine engine = env.GetEngine(); + for (int index = 0; properties.ContainsKey(LoadKeyPrefix + index); index++) { - env.Destroy(); + string loadKey = LoadKeyPrefix + index; + string fileName = properties[loadKey]; + fileName = fileName.Trim(); + Console.WriteLine("Loading records from file resource: " + fileName); + Stream? stream = assembly.GetManifestResourceStream(fileName); + if (stream == null) + { + throw new ArgumentException( + "Missing resource (" + fileName + ") for load file (" + + loadKey + ") for snippet (" + snippet + ")"); + } + StreamReader rdr = new StreamReader(stream, Encoding.UTF8); + try + { + for (string? line = rdr.ReadLine(); line != null; line = rdr.ReadLine()) + { + line = line.Trim(); + if (line.Length == 0) continue; + if (line.StartsWith('#')) continue; + JsonObject? record = JsonNode.Parse(line)?.AsObject(); + if (record == null) + { + throw new JsonException("Failed to parse line as JSON: " + line); + } + string dataSource = record.ContainsKey(DataSource) + ? record[DataSource]?.GetValue() ?? TestSource : TestSource; + string? recordID = record.ContainsKey(RecordID) + ? record[RecordID]?.GetValue() : null; + engine.AddRecord(dataSource, recordID, line, SzNoFlags); + } + } + finally + { + rdr.Close(); + stream.Close(); + } + } - long duration = stopwatch.ElapsedMilliseconds; - Console.WriteLine("Prepared repository for " + snippet + ". (" + duration + "ms)"); + } - ExecuteSnippet(snippet, snippetPath, installLocations, settings, properties); } + catch (SzException e) + { + Console.Error.WriteLine(e); + Environment.Exit(1); + return; + } + finally + { + env.Destroy(); + } + long duration = stopwatch.ElapsedMilliseconds; + Console.WriteLine("Prepared repository for " + snippet + ". (" + duration + "ms)"); - Console.WriteLine(); + ExecuteSnippet(snippet, snippetPath, installLocations, settings, properties); + } + + Console.WriteLine(); } catch (Exception e) { - Console.Error.WriteLine(e); - Environment.Exit(1); - throw; + Console.Error.WriteLine(e); + Environment.Exit(1); + throw; } static void LoadProperties(IDictionary properties, String resourceName) { - Assembly assembly = Assembly.GetExecutingAssembly(); - Stream? stream = assembly.GetManifestResourceStream(resourceName); - if (stream != null) + Assembly assembly = Assembly.GetExecutingAssembly(); + Stream? stream = assembly.GetManifestResourceStream(resourceName); + if (stream != null) + { + StreamReader rdr = new StreamReader(stream, Encoding.UTF8); + try { - StreamReader rdr = new StreamReader(stream, Encoding.UTF8); - try + for (string? line = rdr.ReadLine(); line != null; line = rdr.ReadLine()) + { + if (line.Trim().Length == 0) continue; + if (line.StartsWith('#')) continue; + if (line.StartsWith('!')) continue; + int index = line.IndexOf('=', Ordinal); + if (index < 1) continue; + string key = line.Substring(0, index).Trim(); + string value = ""; + if (index < line.Length - 1) { - for (string? line = rdr.ReadLine(); line != null; line = rdr.ReadLine()) - { - if (line.Trim().Length == 0) continue; - if (line.StartsWith('#')) continue; - if (line.StartsWith('!')) continue; - int index = line.IndexOf('=', Ordinal); - if (index < 1) continue; - string key = line.Substring(0, index).Trim(); - string value = ""; - if (index < line.Length - 1) - { - value = line.Substring(index + 1); - } - value = value.Trim(); - while (value.EndsWith('\\')) - { - line = rdr.ReadLine(); - if (line == null) break; - line = line.Trim(); - value = string.Concat(value.AsSpan(0, value.Length - 1), line); - } - properties[key] = value; - } + value = line.Substring(index + 1); } - finally + value = value.Trim(); + while (value.EndsWith('\\')) { - rdr.Close(); - stream.Close(); + line = rdr.ReadLine(); + if (line == null) break; + line = line.Trim(); + value = string.Concat(value.AsSpan(0, value.Length - 1), line); } + properties[key] = value; + } } + finally + { + rdr.Close(); + stream.Close(); + } + } } static SortedDictionary> GetSnippetsMap(DirectoryInfo snippetDir) { - SortedDictionary> snippetsMap - = new SortedDictionary>(); + SortedDictionary> snippetsMap + = new SortedDictionary>(); - foreach (string dir in Directory.GetDirectories(snippetDir.FullName)) + foreach (string dir in Directory.GetDirectories(snippetDir.FullName)) + { + string? group = Path.GetFileName(dir); + if (group == null) { - string? group = Path.GetFileName(dir); - if (group == null) - { - continue; - } - snippetsMap.TryGetValue(group, out SortedDictionary? snippetMap); - if (snippetMap == null) - { - snippetMap = new SortedDictionary(); - snippetsMap.Add(group, snippetMap); - } + continue; + } + snippetsMap.TryGetValue(group, out SortedDictionary? snippetMap); + if (snippetMap == null) + { + snippetMap = new SortedDictionary(); + snippetsMap.Add(group, snippetMap); + } - foreach (string subdir in Directory.GetDirectories(dir)) - { - string? snippet = Path.GetFileName(subdir); - if (snippet == null) - { - continue; - } - string csprojPath = Path.Combine(subdir, snippet + ".csproj"); - if (!File.Exists(csprojPath)) - { - continue; - } - snippetMap.Add(group + "." + snippet, subdir); - } + foreach (string subdir in Directory.GetDirectories(dir)) + { + string? snippet = Path.GetFileName(subdir); + if (snippet == null) + { + continue; + } + string csprojPath = Path.Combine(subdir, snippet + ".csproj"); + if (!File.Exists(csprojPath)) + { + continue; + } + snippetMap.Add(group + "." + snippet, subdir); } - return snippetsMap; + } + return snippetsMap; } static void PrintUsage(SortedDictionary> snippetsMap) { - Assembly assembly = Assembly.GetExecutingAssembly(); - string? assemblyName = assembly.GetName().Name; - Console.Error.WriteLine($"""dotnet run --project {assemblyName} [ all | | ]*"""); - Console.Error.WriteLine(); - Console.Error.WriteLine(" - Specifying no arguments will print this message"); - Console.Error.WriteLine(" - Specifying \"all\" will run all snippets"); - Console.Error.WriteLine(" - Specifying one or more groups will run all snippets in those groups"); - Console.Error.WriteLine(" - Specifying one or more snippets will run those snippet"); - Console.Error.WriteLine(); - Console.Error.WriteLine("Examples:"); - Console.Error.WriteLine(); - Console.Error.WriteLine($""" dotnet run --project {assemblyName} all"""); - Console.Error.WriteLine(); - Console.Error.WriteLine($""" dotnet run --project {assemblyName} loading.LoadRecords loading.LoadViaFutures"""); - Console.Error.WriteLine(); - Console.Error.WriteLine($""" dotnet run --project {assemblyName} initialization deleting loading.LoadRecords"""); - Console.Error.WriteLine(); - Console.Error.WriteLine("Snippet Group Names:"); - foreach (string group in snippetsMap.Keys) + Assembly assembly = Assembly.GetExecutingAssembly(); + string? assemblyName = assembly.GetName().Name; + Console.Error.WriteLine($"""dotnet run --project {assemblyName} [ all | | ]*"""); + Console.Error.WriteLine(); + Console.Error.WriteLine(" - Specifying no arguments will print this message"); + Console.Error.WriteLine(" - Specifying \"all\" will run all snippets"); + Console.Error.WriteLine(" - Specifying one or more groups will run all snippets in those groups"); + Console.Error.WriteLine(" - Specifying one or more snippets will run those snippet"); + Console.Error.WriteLine(); + Console.Error.WriteLine("Examples:"); + Console.Error.WriteLine(); + Console.Error.WriteLine($""" dotnet run --project {assemblyName} all"""); + Console.Error.WriteLine(); + Console.Error.WriteLine($""" dotnet run --project {assemblyName} loading.LoadRecords loading.LoadViaFutures"""); + Console.Error.WriteLine(); + Console.Error.WriteLine($""" dotnet run --project {assemblyName} initialization deleting loading.LoadRecords"""); + Console.Error.WriteLine(); + Console.Error.WriteLine("Snippet Group Names:"); + foreach (string group in snippetsMap.Keys) + { + Console.Error.WriteLine(" - " + group); + } + Console.Error.WriteLine(); + Console.Error.WriteLine("Snippet Names:"); + foreach (IDictionary snippetMap in snippetsMap.Values) + { + foreach (string snippet in snippetMap.Keys) { - Console.Error.WriteLine(" - " + group); + Console.Error.WriteLine(" - " + snippet); } - Console.Error.WriteLine(); - Console.Error.WriteLine("Snippet Names:"); - foreach (IDictionary snippetMap in snippetsMap.Values) - { - foreach (string snippet in snippetMap.Keys) - { - Console.Error.WriteLine(" - " + snippet); - } - } - Console.Error.WriteLine(); + } + Console.Error.WriteLine(); } static void HandleWrongDirectory() { - Console.Error.WriteLine( - "Must be run from the csharp, csharp/runner or csharp/snippets directory"); - Environment.Exit(1); + Console.Error.WriteLine( + "Must be run from the csharp, csharp/runner or csharp/snippets directory"); + Environment.Exit(1); } static void SetupEnvironment(ProcessStartInfo startInfo, InstallLocations installLocations, string settings) { - System.Collections.IDictionary origEnv = Environment.GetEnvironmentVariables(); - foreach (DictionaryEntry entry in origEnv) - { - startInfo.Environment[entry.Key?.ToString() ?? ""] - = entry.Value?.ToString() ?? ""; - } - startInfo.Environment["SENZING_ENGINE_CONFIGURATION_JSON"] = settings; + System.Collections.IDictionary origEnv = Environment.GetEnvironmentVariables(); + foreach (DictionaryEntry entry in origEnv) + { + startInfo.Environment[entry.Key?.ToString() ?? ""] + = entry.Value?.ToString() ?? ""; + } + startInfo.Environment["SENZING_ENGINE_CONFIGURATION_JSON"] = settings; } static void ExecuteSnippet(string snippet, @@ -490,155 +490,155 @@ static void ExecuteSnippet(string snippet, string settings, IDictionary properties) { - ProcessStartInfo startInfo = new ProcessStartInfo( - "dotnet", - "run --project " + snippetPath); - SetupEnvironment(startInfo, senzingInstall, settings); - startInfo.WindowStyle = ProcessWindowStyle.Hidden; - startInfo.UseShellExecute = false; - startInfo.RedirectStandardInput = true; - - Console.WriteLine(); - Console.WriteLine("---------------------------------------"); - Console.WriteLine("Executing " + snippet + "..."); - Stopwatch stopWatch = Stopwatch.StartNew(); - - Process? process = Process.Start(startInfo); - if (process == null) - { - throw new ArgumentNullException("Failed to execute snippet; " + snippet); - } - - if (properties != null && properties.ContainsKey(InputKeyPrefix + 0)) + ProcessStartInfo startInfo = new ProcessStartInfo( + "dotnet", + "run --project " + snippetPath); + SetupEnvironment(startInfo, senzingInstall, settings); + startInfo.WindowStyle = ProcessWindowStyle.Hidden; + startInfo.UseShellExecute = false; + startInfo.RedirectStandardInput = true; + + Console.WriteLine(); + Console.WriteLine("---------------------------------------"); + Console.WriteLine("Executing " + snippet + "..."); + Stopwatch stopWatch = Stopwatch.StartNew(); + + Process? process = Process.Start(startInfo); + if (process == null) + { + throw new ArgumentNullException("Failed to execute snippet; " + snippet); + } + + if (properties != null && properties.ContainsKey(InputKeyPrefix + 0)) + { + // sleep for 1 second to give the process a chance to start up + Thread.Sleep(1000); + for (int index = 0; + properties.ContainsKey(InputKeyPrefix + index); + index++) { - // sleep for 1 second to give the process a chance to start up - Thread.Sleep(1000); - for (int index = 0; - properties.ContainsKey(InputKeyPrefix + index); - index++) - { - string inputLine = properties[InputKeyPrefix + index]; - Console.WriteLine(inputLine); - Console.Out.Flush(); - - inputLine = (inputLine == null) ? "" : inputLine.Trim(); - process.StandardInput.WriteLine(inputLine); - process.StandardInput.Flush(); - } - } - int exitValue = 0; - int expectedExitValue = 0; - if (properties != null && properties.ContainsKey(DestroyAfterKey)) - { - string propValue = properties[DestroyAfterKey]; - int delay = Int32.Parse(propValue, CultureInfo.InvariantCulture); - bool exited = process.WaitForExit(delay); - if (!exited && !process.HasExited) - { - expectedExitValue = (Environment.OSVersion.Platform == PlatformID.Win32NT) - ? 1 : SigtermExitCode; - Console.WriteLine(); - Console.WriteLine("Runner destroying " + snippet + " process..."); - - - ProcessStartInfo killStartInfo - = (Environment.OSVersion.Platform == PlatformID.Win32NT) - ? new ProcessStartInfo("taskkill", ["/F", "/PID", "" + process.Id]) - : new ProcessStartInfo("kill", "" + process.Id); + string inputLine = properties[InputKeyPrefix + index]; + Console.WriteLine(inputLine); + Console.Out.Flush(); - startInfo.WindowStyle = ProcessWindowStyle.Hidden; - startInfo.UseShellExecute = false; - Process? killer = Process.Start(killStartInfo); - if (killer == null) - { - process.Kill(true); - process.WaitForExit(); - } - else - { - killer.WaitForExit(); - process.WaitForExit(); - } - } - exitValue = process.ExitCode; + inputLine = (inputLine == null) ? "" : inputLine.Trim(); + process.StandardInput.WriteLine(inputLine); + process.StandardInput.Flush(); } - else + } + int exitValue = 0; + int expectedExitValue = 0; + if (properties != null && properties.ContainsKey(DestroyAfterKey)) + { + string propValue = properties[DestroyAfterKey]; + int delay = Int32.Parse(propValue, CultureInfo.InvariantCulture); + bool exited = process.WaitForExit(delay); + if (!exited && !process.HasExited) { - // wait indefinitely for the process to terminate + expectedExitValue = (Environment.OSVersion.Platform == PlatformID.Win32NT) + ? 1 : SigtermExitCode; + Console.WriteLine(); + Console.WriteLine("Runner destroying " + snippet + " process..."); + + + ProcessStartInfo killStartInfo + = (Environment.OSVersion.Platform == PlatformID.Win32NT) + ? new ProcessStartInfo("taskkill", ["/F", "/PID", "" + process.Id]) + : new ProcessStartInfo("kill", "" + process.Id); + + startInfo.WindowStyle = ProcessWindowStyle.Hidden; + startInfo.UseShellExecute = false; + Process? killer = Process.Start(killStartInfo); + if (killer == null) + { + process.Kill(true); process.WaitForExit(); - exitValue = process.ExitCode; - } - - if (exitValue != expectedExitValue) - { - throw new Exception("Failed to execute snippet; " + snippet - + " (" + exitValue + ")"); + } + else + { + killer.WaitForExit(); + process.WaitForExit(); + } } - stopWatch.Stop(); - int duration = stopWatch.Elapsed.Milliseconds; - Console.WriteLine("Executed " + snippet + ". (" + duration + "ms)"); + exitValue = process.ExitCode; + } + else + { + // wait indefinitely for the process to terminate + process.WaitForExit(); + exitValue = process.ExitCode; + } + + if (exitValue != expectedExitValue) + { + throw new Exception("Failed to execute snippet; " + snippet + + " (" + exitValue + ")"); + } + stopWatch.Stop(); + int duration = stopWatch.Elapsed.Milliseconds; + Console.WriteLine("Executed " + snippet + ". (" + duration + "ms)"); } static string SetupTempRepository(InstallLocations senzingInstall) { - DirectoryInfo? supportDir = senzingInstall.SupportDirectory; - DirectoryInfo? resourcesDir = senzingInstall.ResourceDirectory; - DirectoryInfo? templatesDir = senzingInstall.TemplatesDirectory; - DirectoryInfo? configDir = senzingInstall.ConfigDirectory; - if (supportDir == null || configDir == null - || resourcesDir == null || templatesDir == null) + DirectoryInfo? supportDir = senzingInstall.SupportDirectory; + DirectoryInfo? resourcesDir = senzingInstall.ResourceDirectory; + DirectoryInfo? templatesDir = senzingInstall.TemplatesDirectory; + DirectoryInfo? configDir = senzingInstall.ConfigDirectory; + if (supportDir == null || configDir == null + || resourcesDir == null || templatesDir == null) + { + throw new Exception( + "At least one of the required directories is missing from " + + "the installation. installLocations=[ " + + senzingInstall + " ]"); + } + + DirectoryInfo schemaDir = new DirectoryInfo( + Path.Combine(resourcesDir.FullName, "schema")); + string schemaFile = Path.Combine( + schemaDir.FullName, "szcore-schema-sqlite-create.sql"); + string configFile = Path.Combine( + templatesDir.FullName, "g2config.json"); + + // lay down the database schema + string databaseFile = Path.Combine( + Path.GetTempPath(), "G2C-" + Path.GetRandomFileName() + ".db"); + String jdbcUrl = "jdbc:sqlite:" + databaseFile; + + SqliteConnection? sqlite = null; + try + { + String connectSpec = "Data Source=" + databaseFile; + sqlite = new SqliteConnection(connectSpec); + sqlite.Open(); + SqliteCommand cmd = sqlite.CreateCommand(); + + string[] sqlLines = File.ReadAllLines(schemaFile, Encoding.UTF8); + + foreach (string sql in sqlLines) { - throw new Exception( - "At least one of the required directories is missing from " - + "the installation. installLoocations=[ " - + senzingInstall + " ]"); - } - - DirectoryInfo schemaDir = new DirectoryInfo( - Path.Combine(resourcesDir.FullName, "schema")); - string schemaFile = Path.Combine( - schemaDir.FullName, "szcore-schema-sqlite-create.sql"); - string configFile = Path.Combine( - templatesDir.FullName, "g2config.json"); - - // lay down the database schema - string databaseFile = Path.Combine( - Path.GetTempPath(), "G2C-" + Path.GetRandomFileName() + ".db"); - String jdbcUrl = "jdbc:sqlite:" + databaseFile; - - SqliteConnection? sqlite = null; - try - { - String connectSpec = "Data Source=" + databaseFile; - sqlite = new SqliteConnection(connectSpec); - sqlite.Open(); - SqliteCommand cmd = sqlite.CreateCommand(); - - string[] sqlLines = File.ReadAllLines(schemaFile, Encoding.UTF8); - - foreach (string sql in sqlLines) - { - if (sql.Trim().Length == 0) continue; + if (sql.Trim().Length == 0) continue; #pragma warning disable CA2100 - cmd.CommandText = sql.Trim(); + cmd.CommandText = sql.Trim(); #pragma warning restore CA2100 - cmd.ExecuteNonQuery(); - } + cmd.ExecuteNonQuery(); } - finally + } + finally + { + if (sqlite != null) { - if (sqlite != null) - { - sqlite.Close(); - } + sqlite.Close(); } - - string supportPath = supportDir.FullName.Replace("\\", "\\\\", Ordinal); - string configPath = configDir.FullName.Replace("\\", "\\\\", Ordinal); - string resourcePath = resourcesDir.FullName.Replace("\\", "\\\\", Ordinal); - string baseConfig = File.ReadAllText(configFile).Replace("\\", "\\\\", Ordinal); - string databasePath = databaseFile.Replace("\\", "\\\\", Ordinal); - string settings = $$""" + } + + string supportPath = supportDir.FullName.Replace("\\", "\\\\", Ordinal); + string configPath = configDir.FullName.Replace("\\", "\\\\", Ordinal); + string resourcePath = resourcesDir.FullName.Replace("\\", "\\\\", Ordinal); + string baseConfig = File.ReadAllText(configFile).Replace("\\", "\\\\", Ordinal); + string databasePath = databaseFile.Replace("\\", "\\\\", Ordinal); + string settings = $$""" { "PIPELINE": { "SUPPORTPATH": "{{supportPath}}", @@ -651,21 +651,21 @@ static string SetupTempRepository(InstallLocations senzingInstall) } """.Trim(); - SzEnvironment env = SzCoreEnvironment.NewBuilder().Settings(settings).Build(); - try - { - env.GetConfigManager().SetDefaultConfig(baseConfig); + SzEnvironment env = SzCoreEnvironment.NewBuilder().Settings(settings).Build(); + try + { + env.GetConfigManager().SetDefaultConfig(baseConfig); - } - catch (Exception) - { - Console.Error.WriteLine(settings); - throw; - } - finally - { - env.Destroy(); - } + } + catch (Exception) + { + Console.Error.WriteLine(settings); + throw; + } + finally + { + env.Destroy(); + } - return settings; + return settings; } diff --git a/csharp/snippets/deleting/DeleteViaFutures/Program.cs b/csharp/snippets/deleting/DeleteViaFutures/Program.cs index f2ee5d1..792c048 100644 --- a/csharp/snippets/deleting/DeleteViaFutures/Program.cs +++ b/csharp/snippets/deleting/DeleteViaFutures/Program.cs @@ -18,8 +18,8 @@ string? settings = Environment.GetEnvironmentVariable("SENZING_ENGINE_CONFIGURATION_JSON"); if (settings == null) { - Console.Error.WriteLine("Unable to get settings."); - throw new ArgumentException("Unable to get settings"); + Console.Error.WriteLine("Unable to get settings."); + throw new ArgumentException("Unable to get settings"); } // create a descriptive instance name (can be anything) @@ -39,7 +39,7 @@ // execution to a specific limited pool of threads. In order to // improve performance and conserve memory we want to use the same // threads for Senzing work. The TaskScheduler implementation used -// here is directly pulled from Mirosoft's TaskScheduler documentation +// here is directly pulled from Microsoft's TaskScheduler documentation TaskScheduler taskScheduler = new LimitedConcurrencyLevelTaskScheduler(ThreadCount); @@ -56,225 +56,225 @@ TaskScheduler taskScheduler try { - // get the engine from the environment - SzEngine engine = env.GetEngine(); + // get the engine from the environment + SzEngine engine = env.GetEngine(); - int lineNumber = 0; - bool eof = false; + int lineNumber = 0; + bool eof = false; - while (!eof) + while (!eof) + { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.Count < MaximumBacklog) { - // loop through the example records and queue them up so long - // as we have more records and backlog is not too large - while (pendingFutures.Count < MaximumBacklog) + // read the next line + string? line = rdr.ReadLine(); + lineNumber++; + + // check for EOF + if (line == null) + { + eof = true; + break; + } + + // trim the line + line = line.Trim(); + + // skip any blank lines + if (line.Length == 0) continue; + + // skip any commented lines + if (line.StartsWith('#')) continue; + + // construct the Record instance + Record record = new Record(lineNumber, line); + + try + { + // parse the line as a JSON object + JsonObject? recordJson = JsonNode.Parse(line)?.AsObject(); + if (recordJson == null) { - // read the next line - string? line = rdr.ReadLine(); - lineNumber++; - - // check for EOF - if (line == null) - { - eof = true; - break; - } - - // trim the line - line = line.Trim(); - - // skip any blank lines - if (line.Length == 0) continue; - - // skip any commented lines - if (line.StartsWith('#')) continue; - - // construct the Record instance - Record record = new Record(lineNumber, line); - - try - { - // parse the line as a JSON object - JsonObject? recordJson = JsonNode.Parse(line)?.AsObject(); - if (recordJson == null) - { - // parsed JSON null - throw new SzBadInputException("Record must be a JSON object: " + line); - } - - // extract the data source code and record ID - string? dataSourceCode = recordJson[DataSource]?.GetValue(); - string? recordID = recordJson[RecordID]?.GetValue(); - - Task task = factory.StartNew(() => - { - // call the DeleteRecord() function with no flags - engine.DeleteRecord(dataSourceCode, recordID); - }, - CancellationToken.None, - TaskCreationOptions.None, - taskScheduler); - - // add the future to the pending future list - pendingFutures.Add((task, record)); - - } - catch (SzBadInputException e) - { - LogFailedRecord(Error, e, lineNumber, line); - errorCount++; // increment the error count - } + // parsed JSON null + throw new SzBadInputException("Record must be a JSON object: " + line); } - do - { - // handle any pending futures WITHOUT blocking to reduce the backlog - HandlePendingFutures(pendingFutures, false); + // extract the data source code and record ID + string? dataSourceCode = recordJson[DataSource]?.GetValue(); + string? recordID = recordJson[RecordID]?.GetValue(); - // if we still have exceeded the backlog size then pause - // briefly before trying again - if (pendingFutures.Count >= MaximumBacklog) + Task task = factory.StartNew(() => { - Thread.Sleep(PauseTimeout); - } - } while (pendingFutures.Count >= MaximumBacklog); + // call the DeleteRecord() function with no flags + engine.DeleteRecord(dataSourceCode, recordID); + }, + CancellationToken.None, + TaskCreationOptions.None, + taskScheduler); + + // add the future to the pending future list + pendingFutures.Add((task, record)); + + } + catch (SzBadInputException e) + { + LogFailedRecord(Error, e, lineNumber, line); + errorCount++; // increment the error count + } } - // after we have submitted all records we need to handle the remaining - // pending futures so this time we block on each future - HandlePendingFutures(pendingFutures, true); + do + { + // handle any pending futures WITHOUT blocking to reduce the backlog + HandlePendingFutures(pendingFutures, false); + + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.Count >= MaximumBacklog) + { + Thread.Sleep(PauseTimeout); + } + } while (pendingFutures.Count >= MaximumBacklog); + } + + // after we have submitted all records we need to handle the remaining + // pending futures so this time we block on each future + HandlePendingFutures(pendingFutures, true); } catch (Exception e) { - Console.Error.WriteLine(); - Console.Error.WriteLine("*** Terminated due to critical error ***"); - Console.Error.WriteLine(e); - Console.Error.Flush(); - throw; + Console.Error.WriteLine(); + Console.Error.WriteLine("*** Terminated due to critical error ***"); + Console.Error.WriteLine(e); + Console.Error.Flush(); + throw; } finally { - rdr.Close(); - fs.Close(); - - // IMPORTANT: make sure to destroy the environment - env.Destroy(); - - Console.WriteLine(); - Console.WriteLine("Successful delete operations : " + successCount); - Console.WriteLine("Failed delete operations : " + errorCount); - - // check on any retry records - if (retryWriter != null) - { - retryWriter.Flush(); - retryWriter.Close(); - } - if (retryCount > 0) - { - Console.WriteLine(retryCount + " deletions to be retried in " + retryFile); - } - Console.Out.Flush(); + rdr.Close(); + fs.Close(); + + // IMPORTANT: make sure to destroy the environment + env.Destroy(); + + Console.WriteLine(); + Console.WriteLine("Successful delete operations : " + successCount); + Console.WriteLine("Failed delete operations : " + errorCount); + + // check on any retry records + if (retryWriter != null) + { + retryWriter.Flush(); + retryWriter.Close(); + } + if (retryCount > 0) + { + Console.WriteLine(retryCount + " deletions to be retried in " + retryFile); + } + Console.Out.Flush(); } static void HandlePendingFutures(IList<(Task, Record)> pendingFutures, bool blocking) { - // loop through the pending futures - for (int index = 0; index < pendingFutures.Count; index++) - { - // get the next pending future - (Task task, Record record) = pendingFutures[index]; + // loop through the pending futures + for (int index = 0; index < pendingFutures.Count; index++) + { + // get the next pending future + (Task task, Record record) = pendingFutures[index]; - // if not blocking and this one is not done then continue - if (!blocking && !task.IsCompleted) continue; + // if not blocking and this one is not done then continue + if (!blocking && !task.IsCompleted) continue; - // remove the pending future from the list - pendingFutures.RemoveAt(index--); + // remove the pending future from the list + pendingFutures.RemoveAt(index--); - try + try + { + try + { + // wait for completion -- if non-blocking then this + // task is already completed and this will just + // throw any exception that might have occurred + if (blocking && !task.IsCompleted) { - try - { - // wait for completion -- if non-bocking then this - // task is already completed and this will just - // throw any exception that might have occurred - if (blocking && !task.IsCompleted) - { - task.Wait(); - } - - // if we get here then increment the success count - successCount++; - - } - catch (AggregateException e) - when (e.InnerException is TaskCanceledException - || e.InnerException is ThreadInterruptedException) - { - throw new SzRetryableException(e.InnerException); - } - catch (ThreadInterruptedException e) - { - throw new SzRetryableException(e.InnerException); - } - catch (AggregateException e) - { - if (e.InnerException != null) - { - // get the inner exception - throw e.InnerException; - } - else - { - throw; - } - } - + task.Wait(); } - catch (SzBadInputException e) - { - LogFailedRecord(Error, e, record.LineNumber, record.Line); - errorCount++; // increment the error count - } - catch (SzRetryableException e) + // if we get here then increment the success count + successCount++; + + } + catch (AggregateException e) + when (e.InnerException is TaskCanceledException + || e.InnerException is ThreadInterruptedException) + { + throw new SzRetryableException(e.InnerException); + } + catch (ThreadInterruptedException e) + { + throw new SzRetryableException(e.InnerException); + } + catch (AggregateException e) + { + if (e.InnerException != null) { - // handle thread interruption and cancellation as retries - LogFailedRecord(Warning, e, record.LineNumber, record.Line); - errorCount++; // increment the error count - retryCount++; // increment the retry count - - // track the retry record so it can be retried later - if (retryFile == null) - { - retryFile = new FileInfo( - Path.Combine( - Path.GetTempPath(), - RetryPrefix + Path.GetRandomFileName() + RetrySuffix)); - - retryWriter = new StreamWriter( - new FileStream(retryFile.FullName, - FileMode.Open, - FileAccess.Write), - Encoding.UTF8); - } - if (retryWriter != null) - { - retryWriter.WriteLine(record.Line); - } - + // get the inner exception + throw e.InnerException; } - catch (Exception e) + else { - // catch any other exception (incl. SzException) here - LogFailedRecord(Critical, e, record.LineNumber, record.Line); - errorCount++; - throw; // rethrow since exception is critical + throw; } + } + + } + catch (SzBadInputException e) + { + LogFailedRecord(Error, e, record.LineNumber, record.Line); + errorCount++; // increment the error count + + } + catch (SzRetryableException e) + { + // handle thread interruption and cancellation as retries + LogFailedRecord(Warning, e, record.LineNumber, record.Line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) + { + retryFile = new FileInfo( + Path.Combine( + Path.GetTempPath(), + RetryPrefix + Path.GetRandomFileName() + RetrySuffix)); + + retryWriter = new StreamWriter( + new FileStream(retryFile.FullName, + FileMode.Open, + FileAccess.Write), + Encoding.UTF8); + } + if (retryWriter != null) + { + retryWriter.WriteLine(record.Line); + } + + } + catch (Exception e) + { + // catch any other exception (incl. SzException) here + LogFailedRecord(Critical, e, record.LineNumber, record.Line); + errorCount++; + throw; // rethrow since exception is critical } + } } /// @@ -292,46 +292,46 @@ static void LogFailedRecord(string errorType, int lineNumber, string recordJson) { - Console.Error.WriteLine(); - Console.Error.WriteLine( - "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " - + lineNumber + ": "); - Console.Error.WriteLine(recordJson); - Console.Error.WriteLine(exception); - Console.Error.Flush(); + Console.Error.WriteLine(); + Console.Error.WriteLine( + "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + + lineNumber + ": "); + Console.Error.WriteLine(recordJson); + Console.Error.WriteLine(exception); + Console.Error.Flush(); } public partial class Program { - private const string DefaultFilePath = "../../resources/data/del-500.jsonl"; + private const string DefaultFilePath = "../../resources/data/del-500.jsonl"; - private const string RetryPrefix = "retry-"; + private const string RetryPrefix = "retry-"; - private const string RetrySuffix = ".jsonl"; + private const string RetrySuffix = ".jsonl"; - private const string DataSource = "DATA_SOURCE"; + private const string DataSource = "DATA_SOURCE"; - private const string RecordID = "RECORD_ID"; + private const string RecordID = "RECORD_ID"; - private const int ThreadCount = 8; + private const int ThreadCount = 8; - private const int BacklogFactor = 10; + private const int BacklogFactor = 10; - private const int MaximumBacklog = ThreadCount * BacklogFactor; + private const int MaximumBacklog = ThreadCount * BacklogFactor; - private const int PauseTimeout = 100; + private const int PauseTimeout = 100; - private const string Error = "ERROR"; + private const string Error = "ERROR"; - private const string Warning = "WARNING"; + private const string Warning = "WARNING"; - private const string Critical = "CRITICAL"; + private const string Critical = "CRITICAL"; - private static int errorCount; - private static int successCount; - private static int retryCount; - private static FileInfo? retryFile; - private static StreamWriter? retryWriter; + private static int errorCount; + private static int successCount; + private static int retryCount; + private static FileInfo? retryFile; + private static StreamWriter? retryWriter; } diff --git a/csharp/snippets/loading/LoadRecords/Program.cs b/csharp/snippets/loading/LoadRecords/Program.cs index bd62cf5..d4d872b 100644 --- a/csharp/snippets/loading/LoadRecords/Program.cs +++ b/csharp/snippets/loading/LoadRecords/Program.cs @@ -11,8 +11,8 @@ string? settings = Environment.GetEnvironmentVariable("SENZING_ENGINE_CONFIGURATION_JSON"); if (settings == null) { - Console.Error.WriteLine("Unable to get settings."); - throw new ArgumentException("Unable to get settings"); + Console.Error.WriteLine("Unable to get settings."); + throw new ArgumentException("Unable to get settings"); } // create a descriptive instance name (can be anything) @@ -28,44 +28,44 @@ try { - // get the engine from the environment - SzEngine engine = env.GetEngine(); + // get the engine from the environment + SzEngine engine = env.GetEngine(); - // loop through the example records and add them to the repository - foreach (KeyValuePair<(string, string), string> pair in GetRecords()) - { - (string dataSourceCode, string recordID) = pair.Key; - string recordDefinition = pair.Value; + // loop through the example records and add them to the repository + foreach (KeyValuePair<(string, string), string> pair in GetRecords()) + { + (string dataSourceCode, string recordID) = pair.Key; + string recordDefinition = pair.Value; - // call the addRecord() function with no flags - engine.AddRecord(dataSourceCode, recordID, recordDefinition, SzNoFlags); + // call the addRecord() function with no flags + engine.AddRecord(dataSourceCode, recordID, recordDefinition, SzNoFlags); - Console.WriteLine("Record " + recordID + " added"); - Console.Out.Flush(); - } + Console.WriteLine("Record " + recordID + " added"); + Console.Out.Flush(); + } } catch (SzException e) { - // handle any exception that may have occurred - Console.Error.WriteLine("Senzing Error Message : " + e.Message); - Console.Error.WriteLine("Senzing Error Code : " + e.ErrorCode); - Console.Error.WriteLine(e); - throw; + // handle any exception that may have occurred + Console.Error.WriteLine("Senzing Error Message : " + e.Message); + Console.Error.WriteLine("Senzing Error Code : " + e.ErrorCode); + Console.Error.WriteLine(e); + throw; } catch (Exception e) { - Console.Error.WriteLine(); - Console.Error.WriteLine("*** Terminated due to critical error ***"); - Console.Error.WriteLine(e); - Console.Error.Flush(); - throw; + Console.Error.WriteLine(); + Console.Error.WriteLine("*** Terminated due to critical error ***"); + Console.Error.WriteLine(e); + Console.Error.Flush(); + throw; } finally { - // IMPORTANT: make sure to destroy the environment - env.Destroy(); + // IMPORTANT: make sure to destroy the environment + env.Destroy(); } /// @@ -74,16 +74,16 @@ /// /// /// A of record key tuple keys -/// to string JSON text values desribing the records to be added. +/// to string JSON text values describing the records to be added. /// static IDictionary<(string, string), string> GetRecords() { - IDictionary<(string, string), string> records - = new SortedDictionary<(string, string), string>(); + IDictionary<(string, string), string> records + = new SortedDictionary<(string, string), string>(); - records.Add( - ("TEST", "1001"), - """ + records.Add( + ("TEST", "1001"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "1001", @@ -99,9 +99,9 @@ } """); - records.Add( - ("TEST", "1002"), - """ + records.Add( + ("TEST", "1002"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "1002", @@ -120,9 +120,9 @@ } """); - records.Add( - ("TEST", "1003"), - """ + records.Add( + ("TEST", "1003"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "1003", @@ -135,9 +135,9 @@ } """); - records.Add( - ("TEST", "1004"), - """ + records.Add( + ("TEST", "1004"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "1004", @@ -153,9 +153,9 @@ } """); - records.Add( - ("TEST", "1005"), - """ + records.Add( + ("TEST", "1005"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "1005", @@ -173,5 +173,5 @@ } """); - return records; + return records; } diff --git a/csharp/snippets/loading/LoadViaFutures/Program.cs b/csharp/snippets/loading/LoadViaFutures/Program.cs index 605ace2..f8894e1 100644 --- a/csharp/snippets/loading/LoadViaFutures/Program.cs +++ b/csharp/snippets/loading/LoadViaFutures/Program.cs @@ -18,8 +18,8 @@ string? settings = Environment.GetEnvironmentVariable("SENZING_ENGINE_CONFIGURATION_JSON"); if (settings == null) { - Console.Error.WriteLine("Unable to get settings."); - throw new ArgumentException("Unable to get settings"); + Console.Error.WriteLine("Unable to get settings."); + throw new ArgumentException("Unable to get settings"); } // create a descriptive instance name (can be anything) @@ -39,7 +39,7 @@ // execution to a specific limited pool of threads. In order to // improve performance and conserve memory we want to use the same // threads for Senzing work. The TaskScheduler implementation used -// here is directly pulled from Mirosoft's TaskScheduler documentation +// here is directly pulled from Microsoft's TaskScheduler documentation TaskScheduler taskScheduler = new LimitedConcurrencyLevelTaskScheduler(ThreadCount); @@ -56,227 +56,227 @@ TaskScheduler taskScheduler try { - // get the engine from the environment - SzEngine engine = env.GetEngine(); + // get the engine from the environment + SzEngine engine = env.GetEngine(); - int lineNumber = 0; - bool eof = false; + int lineNumber = 0; + bool eof = false; - while (!eof) + while (!eof) + { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.Count < MaximumBacklog) { - // loop through the example records and queue them up so long - // as we have more records and backlog is not too large - while (pendingFutures.Count < MaximumBacklog) + // read the next line + string? line = rdr.ReadLine(); + lineNumber++; + + // check for EOF + if (line == null) + { + eof = true; + break; + } + + // trim the line + line = line.Trim(); + + // skip any blank lines + if (line.Length == 0) continue; + + // skip any commented lines + if (line.StartsWith('#')) continue; + + // construct the Record instance + Record record = new Record(lineNumber, line); + + try + { + // parse the line as a JSON object + JsonObject? recordJson = JsonNode.Parse(line)?.AsObject(); + if (recordJson == null) { - // read the next line - string? line = rdr.ReadLine(); - lineNumber++; - - // check for EOF - if (line == null) - { - eof = true; - break; - } - - // trim the line - line = line.Trim(); - - // skip any blank lines - if (line.Length == 0) continue; - - // skip any commented lines - if (line.StartsWith('#')) continue; - - // construct the Record instance - Record record = new Record(lineNumber, line); - - try - { - // parse the line as a JSON object - JsonObject? recordJson = JsonNode.Parse(line)?.AsObject(); - if (recordJson == null) - { - // parsed JSON null - throw new SzBadInputException("Record must be a JSON object: " + line); - } - - // extract the data source code and record ID - string? dataSourceCode = recordJson[DataSource]?.GetValue(); - string? recordID = recordJson[RecordID]?.GetValue(); - - Task task = factory.StartNew(() => - { - // call the addRecord() function with no flags - engine.AddRecord(dataSourceCode, recordID, record.Line); - }, - CancellationToken.None, - TaskCreationOptions.None, - taskScheduler); - - // add the future to the pending future list - pendingFutures.Add((task, record)); - - } - catch (SzBadInputException e) - { - LogFailedRecord(Error, e, lineNumber, line); - errorCount++; // increment the error count - } + // parsed JSON null + throw new SzBadInputException("Record must be a JSON object: " + line); } - do - { - // handle any pending futures WITHOUT blocking to reduce the backlog - HandlePendingFutures(pendingFutures, false); + // extract the data source code and record ID + string? dataSourceCode = recordJson[DataSource]?.GetValue(); + string? recordID = recordJson[RecordID]?.GetValue(); - // if we still have exceeded the backlog size then pause - // briefly before trying again - if (pendingFutures.Count >= MaximumBacklog) + Task task = factory.StartNew(() => { - Thread.Sleep(PauseTimeout); - } - } while (pendingFutures.Count >= MaximumBacklog); + // call the addRecord() function with no flags + engine.AddRecord(dataSourceCode, recordID, record.Line); + }, + CancellationToken.None, + TaskCreationOptions.None, + taskScheduler); + + // add the future to the pending future list + pendingFutures.Add((task, record)); + + } + catch (SzBadInputException e) + { + LogFailedRecord(Error, e, lineNumber, line); + errorCount++; // increment the error count + } } - // after we have submitted all records we need to handle the remaining - // pending futures so this time we block on each future - HandlePendingFutures(pendingFutures, true); + do + { + // handle any pending futures WITHOUT blocking to reduce the backlog + HandlePendingFutures(pendingFutures, false); + + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.Count >= MaximumBacklog) + { + Thread.Sleep(PauseTimeout); + } + } while (pendingFutures.Count >= MaximumBacklog); + } + + // after we have submitted all records we need to handle the remaining + // pending futures so this time we block on each future + HandlePendingFutures(pendingFutures, true); } catch (Exception e) { - Console.Error.WriteLine(); - Console.Error.WriteLine("*** Terminated due to critical error ***"); - Console.Error.WriteLine(e); - Console.Error.Flush(); - throw; + Console.Error.WriteLine(); + Console.Error.WriteLine("*** Terminated due to critical error ***"); + Console.Error.WriteLine(e); + Console.Error.Flush(); + throw; } finally { - // close the reader - rdr.Close(); - - // close the file stream - fs.Close(); - - // IMPORTANT: make sure to destroy the environment - env.Destroy(); - - Console.WriteLine(); - Console.WriteLine("Records successfully added : " + successCount); - Console.WriteLine("Records failed with errors : " + errorCount); - - // check on any retry records - if (retryWriter != null) - { - retryWriter.Flush(); - retryWriter.Close(); - } - if (retryCount > 0) - { - Console.WriteLine(retryCount + " records to be retried in " + retryFile); - } - Console.Out.Flush(); + // close the reader + rdr.Close(); + + // close the file stream + fs.Close(); + + // IMPORTANT: make sure to destroy the environment + env.Destroy(); + + Console.WriteLine(); + Console.WriteLine("Records successfully added : " + successCount); + Console.WriteLine("Records failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) + { + retryWriter.Flush(); + retryWriter.Close(); + } + if (retryCount > 0) + { + Console.WriteLine(retryCount + " records to be retried in " + retryFile); + } + Console.Out.Flush(); } static void HandlePendingFutures(IList<(Task, Record)> pendingFutures, bool blocking) { - // loop through the pending futures - for (int index = 0; index < pendingFutures.Count; index++) - { - // get the next pending future - (Task task, Record record) = pendingFutures[index]; + // loop through the pending futures + for (int index = 0; index < pendingFutures.Count; index++) + { + // get the next pending future + (Task task, Record record) = pendingFutures[index]; - // if not blocking and this one is not done then continue - if (!blocking && !task.IsCompleted) continue; + // if not blocking and this one is not done then continue + if (!blocking && !task.IsCompleted) continue; - // remove the pending future from the list - pendingFutures.RemoveAt(index--); + // remove the pending future from the list + pendingFutures.RemoveAt(index--); - try + try + { + try + { + // wait for completion -- if non-blocking then this + // task is already completed and this will just + // throw any exception that might have occurred + if (blocking && !task.IsCompleted) { - try - { - // wait for completion -- if non-bocking then this - // task is already completed and this will just - // throw any exception that might have occurred - if (blocking && !task.IsCompleted) - { - task.Wait(); - } - - // if we get here then increment the success count - successCount++; - - } - catch (AggregateException e) - when (e.InnerException is TaskCanceledException - || e.InnerException is ThreadInterruptedException) - { - throw new SzRetryableException(e.InnerException); - } - catch (ThreadInterruptedException e) - { - throw new SzRetryableException(e.InnerException); - } - catch (AggregateException e) - { - if (e.InnerException != null) - { - // get the inner exception - throw e.InnerException; - } - else - { - throw; - } - } - + task.Wait(); } - catch (SzBadInputException e) - { - LogFailedRecord(Error, e, record.LineNumber, record.Line); - errorCount++; // increment the error count - } - catch (SzRetryableException e) + // if we get here then increment the success count + successCount++; + + } + catch (AggregateException e) + when (e.InnerException is TaskCanceledException + || e.InnerException is ThreadInterruptedException) + { + throw new SzRetryableException(e.InnerException); + } + catch (ThreadInterruptedException e) + { + throw new SzRetryableException(e.InnerException); + } + catch (AggregateException e) + { + if (e.InnerException != null) { - // handle thread interruption and cancellation as retries - LogFailedRecord(Warning, e, record.LineNumber, record.Line); - errorCount++; // increment the error count - retryCount++; // increment the retry count - - // track the retry record so it can be retried later - if (retryFile == null) - { - retryFile = new FileInfo( - Path.Combine( - Path.GetTempPath(), - RetryPrefix + Path.GetRandomFileName() + RetrySuffix)); - - retryWriter = new StreamWriter( - new FileStream(retryFile.FullName, - FileMode.Open, - FileAccess.Write), - Encoding.UTF8); - } - if (retryWriter != null) - { - retryWriter.WriteLine(record.Line); - } - + // get the inner exception + throw e.InnerException; } - catch (Exception e) + else { - // catch any other exception (incl. SzException) here - LogFailedRecord(Critical, e, record.LineNumber, record.Line); - errorCount++; - throw; // rethrow since exception is critical + throw; } + } + + } + catch (SzBadInputException e) + { + LogFailedRecord(Error, e, record.LineNumber, record.Line); + errorCount++; // increment the error count + + } + catch (SzRetryableException e) + { + // handle thread interruption and cancellation as retries + LogFailedRecord(Warning, e, record.LineNumber, record.Line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) + { + retryFile = new FileInfo( + Path.Combine( + Path.GetTempPath(), + RetryPrefix + Path.GetRandomFileName() + RetrySuffix)); + + retryWriter = new StreamWriter( + new FileStream(retryFile.FullName, + FileMode.Open, + FileAccess.Write), + Encoding.UTF8); + } + if (retryWriter != null) + { + retryWriter.WriteLine(record.Line); + } + + } + catch (Exception e) + { + // catch any other exception (incl. SzException) here + LogFailedRecord(Critical, e, record.LineNumber, record.Line); + errorCount++; + throw; // rethrow since exception is critical } + } } /// @@ -294,46 +294,46 @@ static void LogFailedRecord(string errorType, int lineNumber, string recordJson) { - Console.Error.WriteLine(); - Console.Error.WriteLine( - "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " - + lineNumber + ": "); - Console.Error.WriteLine(recordJson); - Console.Error.WriteLine(exception); - Console.Error.Flush(); + Console.Error.WriteLine(); + Console.Error.WriteLine( + "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + + lineNumber + ": "); + Console.Error.WriteLine(recordJson); + Console.Error.WriteLine(exception); + Console.Error.Flush(); } public partial class Program { - private const string DefaultFilePath = "../../resources/data/load-500.jsonl"; + private const string DefaultFilePath = "../../resources/data/load-500.jsonl"; - private const string RetryPrefix = "retry-"; + private const string RetryPrefix = "retry-"; - private const string RetrySuffix = ".jsonl"; + private const string RetrySuffix = ".jsonl"; - private const string DataSource = "DATA_SOURCE"; + private const string DataSource = "DATA_SOURCE"; - private const string RecordID = "RECORD_ID"; + private const string RecordID = "RECORD_ID"; - private const int ThreadCount = 8; + private const int ThreadCount = 8; - private const int BacklogFactor = 10; + private const int BacklogFactor = 10; - private const int MaximumBacklog = ThreadCount * BacklogFactor; + private const int MaximumBacklog = ThreadCount * BacklogFactor; - private const int PauseTimeout = 100; + private const int PauseTimeout = 100; - private const string Error = "ERROR"; + private const string Error = "ERROR"; - private const string Warning = "WARNING"; + private const string Warning = "WARNING"; - private const string Critical = "CRITICAL"; + private const string Critical = "CRITICAL"; - private static int errorCount; - private static int successCount; - private static int retryCount; - private static FileInfo? retryFile; - private static StreamWriter? retryWriter; + private static int errorCount; + private static int successCount; + private static int retryCount; + private static FileInfo? retryFile; + private static StreamWriter? retryWriter; } diff --git a/csharp/snippets/loading/LoadWithInfoViaFutures/Program.cs b/csharp/snippets/loading/LoadWithInfoViaFutures/Program.cs index 809e7dd..56030bc 100644 --- a/csharp/snippets/loading/LoadWithInfoViaFutures/Program.cs +++ b/csharp/snippets/loading/LoadWithInfoViaFutures/Program.cs @@ -18,8 +18,8 @@ string? settings = Environment.GetEnvironmentVariable("SENZING_ENGINE_CONFIGURATION_JSON"); if (settings == null) { - Console.Error.WriteLine("Unable to get settings."); - throw new ArgumentException("Unable to get settings"); + Console.Error.WriteLine("Unable to get settings."); + throw new ArgumentException("Unable to get settings"); } // create a descriptive instance name (can be anything) @@ -39,7 +39,7 @@ // execution to a specific limited pool of threads. In order to // improve performance and conserve memory we want to use the same // threads for Senzing work. The TaskScheduler implementation used -// here is directly pulled from Mirosoft's TaskScheduler documentation +// here is directly pulled from Microsoft's TaskScheduler documentation TaskScheduler taskScheduler = new LimitedConcurrencyLevelTaskScheduler(ThreadCount); @@ -57,128 +57,128 @@ TaskScheduler taskScheduler try { - // get the engine from the environment - SzEngine engine = env.GetEngine(); + // get the engine from the environment + SzEngine engine = env.GetEngine(); - int lineNumber = 0; - bool eof = false; + int lineNumber = 0; + bool eof = false; - while (!eof) + while (!eof) + { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.Count < MaximumBacklog) { - // loop through the example records and queue them up so long - // as we have more records and backlog is not too large - while (pendingFutures.Count < MaximumBacklog) + // read the next line + string? line = rdr.ReadLine(); + lineNumber++; + + // check for EOF + if (line == null) + { + eof = true; + break; + } + + // trim the line + line = line.Trim(); + + // skip any blank lines + if (line.Length == 0) continue; + + // skip any commented lines + if (line.StartsWith('#')) continue; + + // construct the Record instance + Record record = new Record(lineNumber, line); + + try + { + // parse the line as a JSON object + JsonObject? recordJson = JsonNode.Parse(line)?.AsObject(); + if (recordJson == null) { - // read the next line - string? line = rdr.ReadLine(); - lineNumber++; - - // check for EOF - if (line == null) - { - eof = true; - break; - } - - // trim the line - line = line.Trim(); - - // skip any blank lines - if (line.Length == 0) continue; - - // skip any commented lines - if (line.StartsWith('#')) continue; - - // construct the Record instance - Record record = new Record(lineNumber, line); - - try - { - // parse the line as a JSON object - JsonObject? recordJson = JsonNode.Parse(line)?.AsObject(); - if (recordJson == null) - { - // parsed JSON null - throw new SzBadInputException("Record must be a JSON object: " + line); - } - - // extract the data source code and record ID - string? dataSourceCode = recordJson[DataSource]?.GetValue(); - string? recordID = recordJson[RecordID]?.GetValue(); - - Task task = factory.StartNew(() => - { - // call the addRecord() function with info flags - return engine.AddRecord( - dataSourceCode, recordID, record.Line, SzWithInfo); - }, - CancellationToken.None, - TaskCreationOptions.None, - taskScheduler); - - // add the future to the pending future list - pendingFutures.Add((task, record)); - - } - catch (SzBadInputException e) - { - LogFailedRecord(Error, e, lineNumber, line); - errorCount++; // increment the error count - } + // parsed JSON null + throw new SzBadInputException("Record must be a JSON object: " + line); } - do - { - // handle any pending futures WITHOUT blocking to reduce the backlog - HandlePendingFutures(engine, pendingFutures, false); + // extract the data source code and record ID + string? dataSourceCode = recordJson[DataSource]?.GetValue(); + string? recordID = recordJson[RecordID]?.GetValue(); - // if we still have exceeded the backlog size then pause - // briefly before trying again - if (pendingFutures.Count >= MaximumBacklog) + Task task = factory.StartNew(() => { - Thread.Sleep(PauseTimeout); - } - } while (pendingFutures.Count >= MaximumBacklog); + // call the addRecord() function with info flags + return engine.AddRecord( + dataSourceCode, recordID, record.Line, SzWithInfo); + }, + CancellationToken.None, + TaskCreationOptions.None, + taskScheduler); + + // add the future to the pending future list + pendingFutures.Add((task, record)); + + } + catch (SzBadInputException e) + { + LogFailedRecord(Error, e, lineNumber, line); + errorCount++; // increment the error count + } } - // after we have submitted all records we need to handle the remaining - // pending futures so this time we block on each future - HandlePendingFutures(engine, pendingFutures, true); + do + { + // handle any pending futures WITHOUT blocking to reduce the backlog + HandlePendingFutures(engine, pendingFutures, false); + + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.Count >= MaximumBacklog) + { + Thread.Sleep(PauseTimeout); + } + } while (pendingFutures.Count >= MaximumBacklog); + } + + // after we have submitted all records we need to handle the remaining + // pending futures so this time we block on each future + HandlePendingFutures(engine, pendingFutures, true); } catch (Exception e) { - Console.Error.WriteLine(); - Console.Error.WriteLine("*** Terminated due to critical error ***"); - Console.Error.WriteLine(e); - Console.Error.Flush(); - throw; + Console.Error.WriteLine(); + Console.Error.WriteLine("*** Terminated due to critical error ***"); + Console.Error.WriteLine(e); + Console.Error.Flush(); + throw; } finally { - rdr.Close(); - fs.Close(); - - // IMPORTANT: make sure to destroy the environment - env.Destroy(); - - Console.WriteLine(); - Console.WriteLine("Records successfully added : " + successCount); - Console.WriteLine("Total entities created : " + entityIDSet.Count); - Console.WriteLine("Records failed with errors : " + errorCount); - - // check on any retry records - if (retryWriter != null) - { - retryWriter.Flush(); - retryWriter.Close(); - } - if (retryCount > 0) - { - Console.WriteLine(retryCount + " records to be retried in " + retryFile); - } - Console.Out.Flush(); + rdr.Close(); + fs.Close(); + + // IMPORTANT: make sure to destroy the environment + env.Destroy(); + + Console.WriteLine(); + Console.WriteLine("Records successfully added : " + successCount); + Console.WriteLine("Total entities created : " + entityIDSet.Count); + Console.WriteLine("Records failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) + { + retryWriter.Flush(); + retryWriter.Close(); + } + if (retryCount > 0) + { + Console.WriteLine(retryCount + " records to be retried in " + retryFile); + } + Console.Out.Flush(); } @@ -186,99 +186,99 @@ static void HandlePendingFutures(SzEngine engine, IList<(Task, Record)> pendingFutures, bool blocking) { - // loop through the pending futures - for (int index = 0; index < pendingFutures.Count; index++) - { - // get the next pending future - (Task task, Record record) = pendingFutures[index]; + // loop through the pending futures + for (int index = 0; index < pendingFutures.Count; index++) + { + // get the next pending future + (Task task, Record record) = pendingFutures[index]; - // if not blocking and this one is not done then continue - if (!blocking && !task.IsCompleted) continue; + // if not blocking and this one is not done then continue + if (!blocking && !task.IsCompleted) continue; - // remove the pending future from the list - pendingFutures.RemoveAt(index--); + // remove the pending future from the list + pendingFutures.RemoveAt(index--); - try + try + { + try + { + // this will block if the task is not yet completed, + // however we only get here with a pending task if + // the blocking parameter is true + string info = task.Result; + + // if we get here then increment the success count + successCount++; + + // process the info + ProcessInfo(engine, info); + + } + catch (AggregateException e) + when (e.InnerException is TaskCanceledException + || e.InnerException is ThreadInterruptedException) + { + throw new SzRetryableException(e.InnerException); + } + catch (ThreadInterruptedException e) + { + throw new SzRetryableException(e.InnerException); + } + catch (AggregateException e) + { + if (e.InnerException != null) { - try - { - // this will block if the task is not yet completed, - // however we only get here with a pending task if - // the blocking parameter is true - string info = task.Result; - - // if we get here then increment the success count - successCount++; - - // process the info - ProcessInfo(engine, info); - - } - catch (AggregateException e) - when (e.InnerException is TaskCanceledException - || e.InnerException is ThreadInterruptedException) - { - throw new SzRetryableException(e.InnerException); - } - catch (ThreadInterruptedException e) - { - throw new SzRetryableException(e.InnerException); - } - catch (AggregateException e) - { - if (e.InnerException != null) - { - // get the inner exception - throw e.InnerException; - } - else - { - throw; - } - } - + // get the inner exception + throw e.InnerException; } - catch (SzBadInputException e) + else { - LogFailedRecord(Error, e, record.LineNumber, record.Line); - errorCount++; // increment the error count - + throw; } - catch (SzRetryableException e) - { - // handle thread interruption and cancellation as retries - LogFailedRecord(Warning, e, record.LineNumber, record.Line); - errorCount++; // increment the error count - retryCount++; // increment the retry count + } - // track the retry record so it can be retried later - if (retryFile == null) - { - retryFile = new FileInfo( - Path.Combine( - Path.GetTempPath(), - RetryPrefix + Path.GetRandomFileName() + RetrySuffix)); - - retryWriter = new StreamWriter( - new FileStream(retryFile.FullName, - FileMode.Open, - FileAccess.Write), - Encoding.UTF8); - } - if (retryWriter != null) - { - retryWriter.WriteLine(record.Line); - } + } + catch (SzBadInputException e) + { + LogFailedRecord(Error, e, record.LineNumber, record.Line); + errorCount++; // increment the error count - } - catch (Exception e) - { - // catch any other exception (incl. SzException) here - LogFailedRecord(Critical, e, record.LineNumber, record.Line); - errorCount++; - throw; // rethrow since exception is critical - } } + catch (SzRetryableException e) + { + // handle thread interruption and cancellation as retries + LogFailedRecord(Warning, e, record.LineNumber, record.Line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) + { + retryFile = new FileInfo( + Path.Combine( + Path.GetTempPath(), + RetryPrefix + Path.GetRandomFileName() + RetrySuffix)); + + retryWriter = new StreamWriter( + new FileStream(retryFile.FullName, + FileMode.Open, + FileAccess.Write), + Encoding.UTF8); + } + if (retryWriter != null) + { + retryWriter.WriteLine(record.Line); + } + + } + catch (Exception e) + { + // catch any other exception (incl. SzException) here + LogFailedRecord(Critical, e, record.LineNumber, record.Line); + errorCount++; + throw; // rethrow since exception is critical + } + } } /// @@ -296,39 +296,39 @@ static void HandlePendingFutures(SzEngine engine, /// The info message static void ProcessInfo(SzEngine engine, string info) { - JsonObject? jsonObject = JsonNode.Parse(info)?.AsObject(); - if (jsonObject == null) return; - if (!jsonObject.ContainsKey(AffectedEntities)) return; + JsonObject? jsonObject = JsonNode.Parse(info)?.AsObject(); + if (jsonObject == null) return; + if (!jsonObject.ContainsKey(AffectedEntities)) return; - JsonArray? affectedArr = jsonObject[AffectedEntities]?.AsArray(); - if (affectedArr == null) return; + JsonArray? affectedArr = jsonObject[AffectedEntities]?.AsArray(); + if (affectedArr == null) return; - for (int index = 0; index < affectedArr.Count; index++) - { - JsonObject? affected = affectedArr[index]?.AsObject(); - long entityID = affected?[EntityID]?.GetValue() ?? 0L; - if (entityID == 0L) continue; + for (int index = 0; index < affectedArr.Count; index++) + { + JsonObject? affected = affectedArr[index]?.AsObject(); + long entityID = affected?[EntityID]?.GetValue() ?? 0L; + if (entityID == 0L) continue; - try - { - engine.GetEntity(entityID, null); - entityIDSet.Add(entityID); + try + { + engine.GetEntity(entityID, null); + entityIDSet.Add(entityID); - } - catch (SzNotFoundException) - { - entityIDSet.Remove(entityID); + } + catch (SzNotFoundException) + { + entityIDSet.Remove(entityID); - } - catch (SzException e) - { - // simply log the exception, do not rethrow - Console.Error.WriteLine(); - Console.Error.WriteLine("**** FAILED TO RETRIEVE ENTITY: " + entityID); - Console.Error.WriteLine(e); - Console.Error.Flush(); - } } + catch (SzException e) + { + // simply log the exception, do not rethrow + Console.Error.WriteLine(); + Console.Error.WriteLine("**** FAILED TO RETRIEVE ENTITY: " + entityID); + Console.Error.WriteLine(e); + Console.Error.Flush(); + } + } } /// @@ -346,56 +346,56 @@ static void LogFailedRecord(string errorType, int lineNumber, string recordJson) { - Console.Error.WriteLine(); - Console.Error.WriteLine( - "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " - + lineNumber + ": "); - Console.Error.WriteLine(recordJson); - Console.Error.WriteLine(exception); - Console.Error.Flush(); + Console.Error.WriteLine(); + Console.Error.WriteLine( + "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + + lineNumber + ": "); + Console.Error.WriteLine(recordJson); + Console.Error.WriteLine(exception); + Console.Error.Flush(); } public partial class Program { - private const string DefaultFilePath = "../../resources/data/load-500.jsonl"; + private const string DefaultFilePath = "../../resources/data/load-500.jsonl"; - private const string RetryPrefix = "retry-"; + private const string RetryPrefix = "retry-"; - private const string RetrySuffix = ".jsonl"; + private const string RetrySuffix = ".jsonl"; - private const string DataSource = "DATA_SOURCE"; + private const string DataSource = "DATA_SOURCE"; - private const string RecordID = "RECORD_ID"; + private const string RecordID = "RECORD_ID"; - private const string AffectedEntities = "AFFECTED_ENTITIES"; + private const string AffectedEntities = "AFFECTED_ENTITIES"; - private const string EntityID = "ENTITY_ID"; + private const string EntityID = "ENTITY_ID"; - private const int ThreadCount = 8; + private const int ThreadCount = 8; - private const int BacklogFactor = 10; + private const int BacklogFactor = 10; - private const int MaximumBacklog = ThreadCount * BacklogFactor; + private const int MaximumBacklog = ThreadCount * BacklogFactor; - private const int PauseTimeout = 100; + private const int PauseTimeout = 100; - private const string Error = "ERROR"; + private const string Error = "ERROR"; - private const string Warning = "WARNING"; + private const string Warning = "WARNING"; - private const string Critical = "CRITICAL"; + private const string Critical = "CRITICAL"; - private static int errorCount; + private static int errorCount; - private static int successCount; + private static int successCount; - private static int retryCount; + private static int retryCount; - private static FileInfo? retryFile; + private static FileInfo? retryFile; - private static StreamWriter? retryWriter; + private static StreamWriter? retryWriter; - private static readonly ISet entityIDSet = new HashSet(); + private static readonly ISet entityIDSet = new HashSet(); } internal sealed record Record(int LineNumber, string Line) { } diff --git a/csharp/snippets/loading/LoadWithStatsViaLoop/Program.cs b/csharp/snippets/loading/LoadWithStatsViaLoop/Program.cs index 66845c3..8db6944 100644 --- a/csharp/snippets/loading/LoadWithStatsViaLoop/Program.cs +++ b/csharp/snippets/loading/LoadWithStatsViaLoop/Program.cs @@ -14,8 +14,8 @@ string? settings = Environment.GetEnvironmentVariable("SENZING_ENGINE_CONFIGURATION_JSON"); if (settings == null) { - Console.Error.WriteLine("Unable to get settings."); - throw new ArgumentException("Unable to get settings"); + Console.Error.WriteLine("Unable to get settings."); + throw new ArgumentException("Unable to get settings"); } // create a descriptive instance name (can be anything) @@ -37,142 +37,142 @@ StreamReader rdr = new StreamReader(fs, Encoding.UTF8); try { - // get the engine from the environment - SzEngine engine = env.GetEngine(); + // get the engine from the environment + SzEngine engine = env.GetEngine(); - int lineNumber = 0; + int lineNumber = 0; - // loop through the example records and add them to the repository - for (string? line = rdr.ReadLine(); line != null; line = rdr.ReadLine()) - { - // increment the line number - lineNumber++; + // loop through the example records and add them to the repository + for (string? line = rdr.ReadLine(); line != null; line = rdr.ReadLine()) + { + // increment the line number + lineNumber++; - // trim the line - line = line.Trim(); + // trim the line + line = line.Trim(); - // skip any blank lines - if (line.Length == 0) continue; + // skip any blank lines + if (line.Length == 0) continue; - // skip any commented lines - if (line.StartsWith('#')) continue; + // skip any commented lines + if (line.StartsWith('#')) continue; + try + { + // parse the line as a JSON object + JsonObject? recordJson = JsonNode.Parse(line)?.AsObject(); + if (recordJson == null) + { + // parsed JSON null + throw new SzBadInputException("Record must be a JSON object: " + line); + } + + // extract the data source code and record ID + string? dataSourceCode = recordJson[DataSource]?.GetValue(); + string? recordID = recordJson[RecordID]?.GetValue(); + + // call the addRecord() function with no flags + engine.AddRecord(dataSourceCode, recordID, line, SzNoFlags); + + successCount++; + + // check if it is time obtain stats + if ((successCount % StatsInterval) == 0) + { try { - // parse the line as a JSON object - JsonObject? recordJson = JsonNode.Parse(line)?.AsObject(); - if (recordJson == null) - { - // parsed JSON null - throw new SzBadInputException("Record must be a JSON object: " + line); - } - - // extract the data source code and record ID - string? dataSourceCode = recordJson[DataSource]?.GetValue(); - string? recordID = recordJson[RecordID]?.GetValue(); - - // call the addRecord() function with no flags - engine.AddRecord(dataSourceCode, recordID, line, SzNoFlags); - - successCount++; - - // check if it is time obtain stats - if ((successCount % StatsInterval) == 0) - { - try - { - string stats = engine.GetStats(); - if (stats.Length > StatsTruncate) - { - stats = string.Concat(stats.AsSpan(0, StatsTruncate), " ..."); - } - Console.WriteLine("* STATS: " + stats); - - } - catch (SzException e) - { - // trap the stats exeption so it is not misinterpreted - // as an exception from engine.addRecord() - Console.WriteLine("**** FAILED TO OBTAIN STATS: " + e); - } - } + string stats = engine.GetStats(); + if (stats.Length > StatsTruncate) + { + stats = string.Concat(stats.AsSpan(0, StatsTruncate), " ..."); + } + Console.WriteLine("* STATS: " + stats); } - catch (SzBadInputException e) + catch (SzException e) { - LogFailedRecord(Error, e, lineNumber, line); - errorCount++; // increment the error count - + // trap the stats exception so it is not misinterpreted + // as an exception from engine.addRecord() + Console.WriteLine("**** FAILED TO OBTAIN STATS: " + e); } - catch (SzRetryableException e) - { - LogFailedRecord(Warning, e, lineNumber, line); - errorCount++; // increment the error count - retryCount++; // increment the retry count - - // track the retry record so it can be retried later - if (retryFile == null) - { - retryFile = new FileInfo( - Path.Combine( - Path.GetTempPath(), - RetryPrefix + Path.GetRandomFileName() + RetrySuffix)); - - retryWriter = new StreamWriter( - new FileStream(retryFile.FullName, - FileMode.Open, - FileAccess.Write), - Encoding.UTF8); - } - if (retryWriter != null) - { - retryWriter.WriteLine(line); - } + } + + } + catch (SzBadInputException e) + { + LogFailedRecord(Error, e, lineNumber, line); + errorCount++; // increment the error count - } - catch (Exception e) - { - // catch any other exception (incl. SzException) here - LogFailedRecord(Critical, e, lineNumber, line); - errorCount++; - throw; // rethrow since exception is critical - } } + catch (SzRetryableException e) + { + LogFailedRecord(Warning, e, lineNumber, line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) + { + retryFile = new FileInfo( + Path.Combine( + Path.GetTempPath(), + RetryPrefix + Path.GetRandomFileName() + RetrySuffix)); + + retryWriter = new StreamWriter( + new FileStream(retryFile.FullName, + FileMode.Open, + FileAccess.Write), + Encoding.UTF8); + } + if (retryWriter != null) + { + retryWriter.WriteLine(line); + } + + } + catch (Exception e) + { + // catch any other exception (incl. SzException) here + LogFailedRecord(Critical, e, lineNumber, line); + errorCount++; + throw; // rethrow since exception is critical + } + } } catch (Exception e) { - Console.Error.WriteLine(); - Console.Error.WriteLine("*** Terminated due to critical error ***"); - Console.Error.WriteLine(e); - Console.Error.Flush(); - throw; + Console.Error.WriteLine(); + Console.Error.WriteLine("*** Terminated due to critical error ***"); + Console.Error.WriteLine(e); + Console.Error.Flush(); + throw; } finally { - rdr.Close(); - - fs.Close(); - - // IMPORTANT: make sure to destroy the environment - env.Destroy(); - - Console.WriteLine(); - Console.WriteLine("Records successfully added : " + successCount); - Console.WriteLine("Records failed with errors : " + errorCount); - - // check on any retry records - if (retryWriter != null) - { - retryWriter.Flush(); - retryWriter.Close(); - } - if (retryCount > 0) - { - Console.WriteLine(retryCount + " records to be retried in " + retryFile); - } - Console.Out.Flush(); + rdr.Close(); + + fs.Close(); + + // IMPORTANT: make sure to destroy the environment + env.Destroy(); + + Console.WriteLine(); + Console.WriteLine("Records successfully added : " + successCount); + Console.WriteLine("Records failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) + { + retryWriter.Flush(); + retryWriter.Close(); + } + if (retryCount > 0) + { + Console.WriteLine(retryCount + " records to be retried in " + retryFile); + } + Console.Out.Flush(); } /// @@ -190,40 +190,40 @@ static void LogFailedRecord(string errorType, int lineNumber, string recordJson) { - Console.Error.WriteLine(); - Console.Error.WriteLine( - "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " - + lineNumber + ": "); - Console.Error.WriteLine(recordJson); - Console.Error.WriteLine(exception); - Console.Error.Flush(); + Console.Error.WriteLine(); + Console.Error.WriteLine( + "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + + lineNumber + ": "); + Console.Error.WriteLine(recordJson); + Console.Error.WriteLine(exception); + Console.Error.Flush(); } public partial class Program { - private const string DefaultFilePath = "../../resources/data/load-500.jsonl"; + private const string DefaultFilePath = "../../resources/data/load-500.jsonl"; - private const string RetryPrefix = "retry-"; + private const string RetryPrefix = "retry-"; - private const string RetrySuffix = ".jsonl"; + private const string RetrySuffix = ".jsonl"; - private const string DataSource = "DATA_SOURCE"; + private const string DataSource = "DATA_SOURCE"; - private const string RecordID = "RECORD_ID"; + private const string RecordID = "RECORD_ID"; - private const string Error = "ERROR"; + private const string Error = "ERROR"; - private const string Warning = "WARNING"; + private const string Warning = "WARNING"; - private const string Critical = "CRITICAL"; + private const string Critical = "CRITICAL"; - private const int StatsInterval = 100; + private const int StatsInterval = 100; - private const int StatsTruncate = 70; + private const int StatsTruncate = 70; - private static int errorCount; - private static int successCount; - private static int retryCount; - private static FileInfo? retryFile; - private static StreamWriter? retryWriter; + private static int errorCount; + private static int successCount; + private static int retryCount; + private static FileInfo? retryFile; + private static StreamWriter? retryWriter; } \ No newline at end of file diff --git a/csharp/snippets/loading/README.md b/csharp/snippets/loading/README.md index 8f40059..2febf72 100644 --- a/csharp/snippets/loading/README.md +++ b/csharp/snippets/loading/README.md @@ -7,9 +7,9 @@ The loading snippets outline adding new source records. Adding source records in - **LoadRecords** - Basic iteration over a few records, adding each one - **LoadTruthSetWithInfoViaLoop** - - Read and load from multiple source files, adding a sample truth + - Read and load from multiple source files, adding a sample truth - Collect the response using the [SzWithInfo flag](../../../README.md#with-info) on the `AddRecord()` method and track the entity ID's for the records. -- **LoaeViaFutures** +- **LoadViaFutures** - Read and load source records from a file using multiple threads - **LoadViaLoop** - Basic read and add source records from a file diff --git a/csharp/snippets/redo/LoadWithRedoViaLoop/Program.cs b/csharp/snippets/redo/LoadWithRedoViaLoop/Program.cs index 308a5f8..d22cf47 100644 --- a/csharp/snippets/redo/LoadWithRedoViaLoop/Program.cs +++ b/csharp/snippets/redo/LoadWithRedoViaLoop/Program.cs @@ -14,8 +14,8 @@ string? settings = Environment.GetEnvironmentVariable("SENZING_ENGINE_CONFIGURATION_JSON"); if (settings == null) { - Console.Error.WriteLine("Unable to get settings."); - throw new ArgumentException("Unable to get settings"); + Console.Error.WriteLine("Unable to get settings."); + throw new ArgumentException("Unable to get settings"); } // create a descriptive instance name (can be anything) @@ -31,145 +31,145 @@ try { - // get the engine from the environment - SzEngine engine = env.GetEngine(); + // get the engine from the environment + SzEngine engine = env.GetEngine(); - // loop through the input files - foreach (string filePath in InputFiles) + // loop through the input files + foreach (string filePath in InputFiles) + { + FileStream fs = new FileStream(filePath, FileMode.Open, FileAccess.Read); + + StreamReader rdr = new StreamReader(fs, Encoding.UTF8); + + try { - FileStream fs = new FileStream(filePath, FileMode.Open, FileAccess.Read); + int lineNumber = 0; + // loop through the example records and add them to the repository + for (string? line = rdr.ReadLine(); line != null; line = rdr.ReadLine()) + { + // increment the line number + lineNumber++; - StreamReader rdr = new StreamReader(fs, Encoding.UTF8); + // trim the line + line = line.Trim(); - try - { - int lineNumber = 0; - // loop through the example records and add them to the repository - for (string? line = rdr.ReadLine(); line != null; line = rdr.ReadLine()) - { - // increment the line number - lineNumber++; - - // trim the line - line = line.Trim(); - - // skip any blank lines - if (line.Length == 0) continue; - - // skip any commented lines - if (line.StartsWith('#')) continue; - - try - { - // parse the line as a JSON object - JsonObject? recordJson = JsonNode.Parse(line)?.AsObject(); - if (recordJson == null) - { - // parsed JSON null - throw new SzBadInputException("Record must be a JSON object: " + line); - } - - // extract the data source code and record ID - string? dataSourceCode = recordJson[DataSource]?.GetValue(); - string? recordID = recordJson[RecordID]?.GetValue(); - - // call the addRecord() function with info flags - engine.AddRecord(dataSourceCode, recordID, line, SzNoFlags); - - successCount++; - } - catch (SzBadInputException e) - { - LogFailedRecord(Error, e, filePath, lineNumber, line); - errorCount++; // increment the error count - - } - catch (SzRetryableException e) - { - LogFailedRecord(Warning, e, filePath, lineNumber, line); - errorCount++; // increment the error count - retryCount++; // increment the retry count - TrackRetryRecord(line); - - } - catch (Exception e) - { - // catch any other exception (incl. SzException) here - LogFailedRecord(Critical, e, filePath, lineNumber, line); - errorCount++; - throw; // rethrow since exception is critical - } - } - } - finally - { - rdr.Close(); - fs.Close(); - } - } + // skip any blank lines + if (line.Length == 0) continue; - // now that we have loaded the records, check for redos and handle them - while (engine.CountRedoRecords() > 0) - { - // get the next redo record - string redo = engine.GetRedoRecord(); + // skip any commented lines + if (line.StartsWith('#')) continue; try { - // process the redo record - engine.ProcessRedoRecord(redo, SzNoFlags); - - // increment the redone count - redoneCount++; + // parse the line as a JSON object + JsonObject? recordJson = JsonNode.Parse(line)?.AsObject(); + if (recordJson == null) + { + // parsed JSON null + throw new SzBadInputException("Record must be a JSON object: " + line); + } + + // extract the data source code and record ID + string? dataSourceCode = recordJson[DataSource]?.GetValue(); + string? recordID = recordJson[RecordID]?.GetValue(); + + // call the addRecord() function with info flags + engine.AddRecord(dataSourceCode, recordID, line, SzNoFlags); + + successCount++; + } + catch (SzBadInputException e) + { + LogFailedRecord(Error, e, filePath, lineNumber, line); + errorCount++; // increment the error count } catch (SzRetryableException e) { - LogFailedRedo(Warning, e, redo); - errorCount++; - retryCount++; - TrackRetryRecord(redo); + LogFailedRecord(Warning, e, filePath, lineNumber, line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + TrackRetryRecord(line); } catch (Exception e) { - LogFailedRedo(Critical, e, redo); - errorCount++; - throw; + // catch any other exception (incl. SzException) here + LogFailedRecord(Critical, e, filePath, lineNumber, line); + errorCount++; + throw; // rethrow since exception is critical } + } + } + finally + { + rdr.Close(); + fs.Close(); } + } -} -catch (Exception e) -{ - Console.Error.WriteLine(); - Console.Error.WriteLine("*** Terminated due to critical error ***"); - Console.Error.WriteLine(e); - Console.Error.Flush(); - throw; + // now that we have loaded the records, check for redos and handle them + while (engine.CountRedoRecords() > 0) + { + // get the next redo record + string redo = engine.GetRedoRecord(); -} -finally -{ - // IMPORTANT: make sure to destroy the environment - env.Destroy(); + try + { + // process the redo record + engine.ProcessRedoRecord(redo, SzNoFlags); - Console.WriteLine(); - Console.WriteLine("Records successfully added : " + successCount); - Console.WriteLine("Redos successfully processed : " + redoneCount); - Console.WriteLine("Records failed with errors : " + errorCount); + // increment the redone count + redoneCount++; - // check on any retry records - if (retryWriter != null) + } + catch (SzRetryableException e) { - retryWriter.Flush(); - retryWriter.Close(); + LogFailedRedo(Warning, e, redo); + errorCount++; + retryCount++; + TrackRetryRecord(redo); + } - if (retryCount > 0) + catch (Exception e) { - Console.WriteLine(retryCount + " records to be retried in " + retryFile); + LogFailedRedo(Critical, e, redo); + errorCount++; + throw; } - Console.Out.Flush(); + } + +} +catch (Exception e) +{ + Console.Error.WriteLine(); + Console.Error.WriteLine("*** Terminated due to critical error ***"); + Console.Error.WriteLine(e); + Console.Error.Flush(); + throw; + +} +finally +{ + // IMPORTANT: make sure to destroy the environment + env.Destroy(); + + Console.WriteLine(); + Console.WriteLine("Records successfully added : " + successCount); + Console.WriteLine("Redos successfully processed : " + redoneCount); + Console.WriteLine("Records failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) + { + retryWriter.Flush(); + retryWriter.Close(); + } + if (retryCount > 0) + { + Console.WriteLine(retryCount + " records to be retried in " + retryFile); + } + Console.Out.Flush(); } /// @@ -177,28 +177,28 @@ /// in a retry file. /// /// -/// The JSON text definining the record to be retried +/// The JSON text defining the record to be retried /// static void TrackRetryRecord(string recordJson) { - // track the retry record so it can be retried later - if (retryFile == null) - { - retryFile = new FileInfo( - Path.Combine( - Path.GetTempPath(), - RetryPrefix + Path.GetRandomFileName() + RetrySuffix)); - - retryWriter = new StreamWriter( - new FileStream(retryFile.FullName, - FileMode.Open, - FileAccess.Write), - Encoding.UTF8); - } - if (retryWriter != null) - { - retryWriter.WriteLine(recordJson); - } + // track the retry record so it can be retried later + if (retryFile == null) + { + retryFile = new FileInfo( + Path.Combine( + Path.GetTempPath(), + RetryPrefix + Path.GetRandomFileName() + RetrySuffix)); + + retryWriter = new StreamWriter( + new FileStream(retryFile.FullName, + FileMode.Open, + FileAccess.Write), + Encoding.UTF8); + } + if (retryWriter != null) + { + retryWriter.WriteLine(recordJson); + } } /// @@ -218,15 +218,15 @@ static void LogFailedRecord(string errorType, int lineNumber, string recordJson) { - string fileName = Path.GetFileName(filePath); - - Console.Error.WriteLine(); - Console.Error.WriteLine( - "** " + errorType + " ** FAILED TO ADD RECORD IN " + fileName - + " AT LINE " + lineNumber + ": "); - Console.Error.WriteLine(recordJson); - Console.Error.WriteLine(exception); - Console.Error.Flush(); + string fileName = Path.GetFileName(filePath); + + Console.Error.WriteLine(); + Console.Error.WriteLine( + "** " + errorType + " ** FAILED TO ADD RECORD IN " + fileName + + " AT LINE " + lineNumber + ": "); + Console.Error.WriteLine(recordJson); + Console.Error.WriteLine(exception); + Console.Error.Flush(); } /// @@ -240,35 +240,35 @@ static void LogFailedRedo(string errorType, Exception exception, string redoRecord) { - Console.Error.WriteLine(); - Console.Error.WriteLine("** " + errorType + " ** FAILED TO PROCESS REDO: "); - Console.Error.WriteLine(redoRecord); - Console.Error.WriteLine(exception); - Console.Error.Flush(); + Console.Error.WriteLine(); + Console.Error.WriteLine("** " + errorType + " ** FAILED TO PROCESS REDO: "); + Console.Error.WriteLine(redoRecord); + Console.Error.WriteLine(exception); + Console.Error.Flush(); } public partial class Program { - private static readonly IList InputFiles = new ReadOnlyCollection( - new string[] { + private static readonly IList InputFiles = new ReadOnlyCollection( + new string[] { "../../resources/data/truthset/customers.jsonl", "../../resources/data/truthset/reference.jsonl", "../../resources/data/truthset/watchlist.jsonl" - }); - - private const string RetryPrefix = "retry-"; - private const string RetrySuffix = ".jsonl"; - private const string DataSource = "DATA_SOURCE"; - private const string RecordID = "RECORD_ID"; - private const string Error = "ERROR"; - private const string Warning = "WARNING"; - private const string Critical = "CRITICAL"; - - // setup some class-wide variables - private static int errorCount; - private static int successCount; - private static int redoneCount; - private static int retryCount; - private static FileInfo? retryFile; - private static StreamWriter? retryWriter; + }); + + private const string RetryPrefix = "retry-"; + private const string RetrySuffix = ".jsonl"; + private const string DataSource = "DATA_SOURCE"; + private const string RecordID = "RECORD_ID"; + private const string Error = "ERROR"; + private const string Warning = "WARNING"; + private const string Critical = "CRITICAL"; + + // setup some class-wide variables + private static int errorCount; + private static int successCount; + private static int redoneCount; + private static int retryCount; + private static FileInfo? retryFile; + private static StreamWriter? retryWriter; } diff --git a/csharp/snippets/redo/RedoContinuous/Program.cs b/csharp/snippets/redo/RedoContinuous/Program.cs index 403fede..efedff1 100644 --- a/csharp/snippets/redo/RedoContinuous/Program.cs +++ b/csharp/snippets/redo/RedoContinuous/Program.cs @@ -15,8 +15,8 @@ string? settings = Environment.GetEnvironmentVariable("SENZING_ENGINE_CONFIGURATION_JSON"); if (settings == null) { - Console.Error.WriteLine("Unable to get settings."); - throw new ArgumentException("Unable to get settings"); + Console.Error.WriteLine("Unable to get settings."); + throw new ArgumentException("Unable to get settings"); } // create a descriptive instance name (can be anything) @@ -32,102 +32,102 @@ AppDomain.CurrentDomain.ProcessExit += (s, e) => { - // IMPORTANT: make sure to destroy the environment - env.Destroy(); - OutputRedoStatistics(); + // IMPORTANT: make sure to destroy the environment + env.Destroy(); + OutputRedoStatistics(); }; try { - // get the engine from the environment - SzEngine engine = env.GetEngine(); + // get the engine from the environment + SzEngine engine = env.GetEngine(); - while (true) + while (true) + { + // get the next redo record + string redo = engine.GetRedoRecord(); + + // check if no redo records are available + if (redo == null) + { + OutputRedoStatistics(); + Console.WriteLine(); + Console.WriteLine( + "No redo records to process. Pausing for " + + RedoPauseDescription + "...."); + Console.WriteLine("Press CTRL-C to exit."); + try + { + Thread.Sleep(RedoPauseTimeout); + } + catch (ThreadInterruptedException) + { + // ignore the exception + } + continue; + } + + try + { + // process the redo record + engine.ProcessRedoRecord(redo, SzNoFlags); + + // increment the redone count + redoneCount++; + + } + catch (SzRetryableException e) + { + LogFailedRedo(Warning, e, redo); + errorCount++; + retryCount++; + TrackRetryRecord(redo); + + } + catch (Exception e) { - // get the next redo record - string redo = engine.GetRedoRecord(); - - // check if no redo reords are available - if (redo == null) - { - OutputRedoStatistics(); - Console.WriteLine(); - Console.WriteLine( - "No redo records to process. Pausing for " - + RedoPauseDescription + "...."); - Console.WriteLine("Press CTRL-C to exit."); - try - { - Thread.Sleep(RedoPauseTimeout); - } - catch (ThreadInterruptedException) - { - // ignore the exception - } - continue; - } - - try - { - // process the redo record - engine.ProcessRedoRecord(redo, SzNoFlags); - - // increment the redone count - redoneCount++; - - } - catch (SzRetryableException e) - { - LogFailedRedo(Warning, e, redo); - errorCount++; - retryCount++; - TrackRetryRecord(redo); - - } - catch (Exception e) - { - LogFailedRedo(Critical, e, redo); - errorCount++; - throw; - } + LogFailedRedo(Critical, e, redo); + errorCount++; + throw; } + } } catch (Exception e) { - Console.Error.WriteLine(); - Console.Error.WriteLine("*** Terminated due to critical error ***"); - Console.Error.WriteLine(e); - Console.Error.Flush(); - throw; + Console.Error.WriteLine(); + Console.Error.WriteLine("*** Terminated due to critical error ***"); + Console.Error.WriteLine(e); + Console.Error.Flush(); + throw; } finally { - // normally we would call env.destroy() here, but we have registered - // a shutdown hook to do that since termination will typically occur - // via CTRL-C being pressed, and the shutdown hook will still run if - // we get an exception + // normally we would call env.destroy() here, but we have registered + // a shutdown hook to do that since termination will typically occur + // via CTRL-C being pressed, and the shutdown hook will still run if + // we get an exception } static void OutputRedoStatistics() { - Console.WriteLine(); - Console.WriteLine("Redos successfully processed : " + redoneCount); - Console.WriteLine("Total failed records/redos : " + errorCount); - - // check on any retry records - if (retryWriter != null) - { - retryWriter.Flush(); - retryWriter.Close(); - } - if (retryCount > 0) - { - Console.WriteLine( - retryCount + " records/redos to be retried in " + retryFile); - } - Console.Out.Flush(); + Console.WriteLine(); + Console.WriteLine("Redos successfully processed : " + redoneCount); + Console.WriteLine("Total failed records/redos : " + errorCount); + + // check on any retry records + if (retryWriter != null) + { + retryWriter.Flush(); + retryWriter.Close(); + } + if (retryCount > 0) + { + Console.WriteLine( + retryCount + " records/redos to be retried in " + retryFile); + } + Console.Out.Flush(); } /// @@ -135,28 +135,28 @@ static void OutputRedoStatistics() /// in a retry file. /// /// -/// The JSON text definining the record to be retried +/// The JSON text defining the record to be retried /// static void TrackRetryRecord(string recordJson) { - // track the retry record so it can be retried later - if (retryFile == null) - { - retryFile = new FileInfo( - Path.Combine( - Path.GetTempPath(), - RetryPrefix + Path.GetRandomFileName() + RetrySuffix)); - - retryWriter = new StreamWriter( - new FileStream(retryFile.FullName, - FileMode.Open, - FileAccess.Write), - Encoding.UTF8); - } - if (retryWriter != null) - { - retryWriter.WriteLine(recordJson); - } + // track the retry record so it can be retried later + if (retryFile == null) + { + retryFile = new FileInfo( + Path.Combine( + Path.GetTempPath(), + RetryPrefix + Path.GetRandomFileName() + RetrySuffix)); + + retryWriter = new StreamWriter( + new FileStream(retryFile.FullName, + FileMode.Open, + FileAccess.Write), + Encoding.UTF8); + } + if (retryWriter != null) + { + retryWriter.WriteLine(recordJson); + } } /// @@ -170,30 +170,30 @@ static void LogFailedRedo(string errorType, Exception exception, string redoRecord) { - Console.Error.WriteLine(); - Console.Error.WriteLine("** " + errorType + " ** FAILED TO PROCESS REDO: "); - Console.Error.WriteLine(redoRecord); - Console.Error.WriteLine(exception); - Console.Error.Flush(); + Console.Error.WriteLine(); + Console.Error.WriteLine("** " + errorType + " ** FAILED TO PROCESS REDO: "); + Console.Error.WriteLine(redoRecord); + Console.Error.WriteLine(exception); + Console.Error.Flush(); } public partial class Program { - private const string RedoPauseDescription = "30 seconds"; + private const string RedoPauseDescription = "30 seconds"; - private const int RedoPauseTimeout = 30000; + private const int RedoPauseTimeout = 30000; - private const string RetryPrefix = "retry-"; - private const string RetrySuffix = ".jsonl"; - private const string Warning = "WARNING"; - private const string Critical = "CRITICAL"; + private const string RetryPrefix = "retry-"; + private const string RetrySuffix = ".jsonl"; + private const string Warning = "WARNING"; + private const string Critical = "CRITICAL"; - // setup some class-wide variables - private static int errorCount; - private static int redoneCount; - private static int retryCount; - private static FileInfo? retryFile; - private static StreamWriter? retryWriter; + // setup some class-wide variables + private static int errorCount; + private static int redoneCount; + private static int retryCount; + private static FileInfo? retryFile; + private static StreamWriter? retryWriter; } #pragma warning restore CA1303 // Do not pass literals as localized parameters (example messages) diff --git a/csharp/snippets/redo/RedoContinuousViaFutures/Program.cs b/csharp/snippets/redo/RedoContinuousViaFutures/Program.cs index 3d4cdc9..7bd8195 100644 --- a/csharp/snippets/redo/RedoContinuousViaFutures/Program.cs +++ b/csharp/snippets/redo/RedoContinuousViaFutures/Program.cs @@ -17,8 +17,8 @@ string? settings = Environment.GetEnvironmentVariable("SENZING_ENGINE_CONFIGURATION_JSON"); if (settings == null) { - Console.Error.WriteLine("Unable to get settings."); - throw new ArgumentException("Unable to get settings"); + Console.Error.WriteLine("Unable to get settings."); + throw new ArgumentException("Unable to get settings"); } // create a descriptive instance name (can be anything) @@ -36,7 +36,7 @@ // execution to a specific limited pool of threads. In order to // improve performance and conserve memory we want to use the same // threads for Senzing work. The TaskScheduler implementation used -// here is directly pulled from Mirosoft's TaskScheduler documentation +// here is directly pulled from Microsoft's TaskScheduler documentation TaskScheduler taskScheduler = new LimitedConcurrencyLevelTaskScheduler(ThreadCount); @@ -49,183 +49,183 @@ TaskScheduler taskScheduler AppDomain.CurrentDomain.ProcessExit += (s, e) => { #pragma warning disable CA1031 // Need to catch all exceptions here - try - { - HandlePendingFutures(pendingFutures, true); - } - catch (Exception exception) - { - Console.Error.WriteLine(exception); - } + try + { + HandlePendingFutures(pendingFutures, true); + } + catch (Exception exception) + { + Console.Error.WriteLine(exception); + } #pragma warning restore CA1031 // Need to catch all exceptions here - // IMPORTANT: make sure to destroy the environment - env.Destroy(); - OutputRedoStatistics(); + // IMPORTANT: make sure to destroy the environment + env.Destroy(); + OutputRedoStatistics(); }; try { - // get the engine from the environment - SzEngine engine = env.GetEngine(); - - while (true) + // get the engine from the environment + SzEngine engine = env.GetEngine(); + + while (true) + { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.Count < MaximumBacklog) { - // loop through the example records and queue them up so long - // as we have more records and backlog is not too large - while (pendingFutures.Count < MaximumBacklog) - { - // get the next redo record - string redo = engine.GetRedoRecord(); + // get the next redo record + string redo = engine.GetRedoRecord(); - // check if no redo reords are available - if (redo == null) break; + // check if no redo records are available + if (redo == null) break; - Task task = factory.StartNew(() => - { - engine.ProcessRedoRecord(redo, SzNoFlags); - }, - CancellationToken.None, - TaskCreationOptions.None, - taskScheduler); + Task task = factory.StartNew(() => + { + engine.ProcessRedoRecord(redo, SzNoFlags); + }, + CancellationToken.None, + TaskCreationOptions.None, + taskScheduler); - // add the future to the pending future list - pendingFutures.Add((task, redo)); - } + // add the future to the pending future list + pendingFutures.Add((task, redo)); + } - do + do + { + // handle any pending futures WITHOUT blocking to reduce the backlog + HandlePendingFutures(pendingFutures, false); + + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.Count >= MaximumBacklog) + { + try { - // handle any pending futures WITHOUT blocking to reduce the backlog - HandlePendingFutures(pendingFutures, false); - - // if we still have exceeded the backlog size then pause - // briefly before trying again - if (pendingFutures.Count >= MaximumBacklog) - { - try - { - Thread.Sleep(HandlePauseTimeout); - - } - catch (ThreadInterruptedException) - { - // do nothing - } - } - } while (pendingFutures.Count >= MaximumBacklog); - - // check if there are no redo records right now - if (engine.CountRedoRecords() == 0) + Thread.Sleep(HandlePauseTimeout); + + } + catch (ThreadInterruptedException) { - OutputRedoStatistics(); - Console.WriteLine(); - Console.WriteLine( - "No redo records to process. Pausing for " - + RedoPauseDescription + "...."); - Console.WriteLine("Press CTRL-C to exit."); - try - { - Thread.Sleep(RedoPauseTimeout); - } - catch (ThreadInterruptedException) - { - // ignore the exception - } - continue; + // do nothing } + } + } while (pendingFutures.Count >= MaximumBacklog); + + // check if there are no redo records right now + if (engine.CountRedoRecords() == 0) + { + OutputRedoStatistics(); + Console.WriteLine(); + Console.WriteLine( + "No redo records to process. Pausing for " + + RedoPauseDescription + "...."); + Console.WriteLine("Press CTRL-C to exit."); + try + { + Thread.Sleep(RedoPauseTimeout); + } + catch (ThreadInterruptedException) + { + // ignore the exception + } + continue; } + } } catch (Exception e) { - Console.Error.WriteLine(); - Console.Error.WriteLine("*** Terminated due to critical error ***"); - Console.Error.WriteLine(e); - Console.Error.Flush(); - throw; + Console.Error.WriteLine(); + Console.Error.WriteLine("*** Terminated due to critical error ***"); + Console.Error.WriteLine(e); + Console.Error.Flush(); + throw; } finally { - // normally we would call env.destroy() here, but we have registered - // a shutdown hook to do that since termination will typically occur - // via CTRL-C being pressed, and the shutdown hook will still run if - // we get an exception + // normally we would call env.destroy() here, but we have registered + // a shutdown hook to do that since termination will typically occur + // via CTRL-C being pressed, and the shutdown hook will still run if + // we get an exception } static void HandlePendingFutures(IList<(Task, string)> pendingFutures, bool blocking) { - // loop through the pending futures - for (int index = 0; index < pendingFutures.Count; index++) - { - // get the next pending future - (Task task, string redoRecord) = pendingFutures[index]; + // loop through the pending futures + for (int index = 0; index < pendingFutures.Count; index++) + { + // get the next pending future + (Task task, string redoRecord) = pendingFutures[index]; - // if not blocking and this one is not done then continue - if (!blocking && !task.IsCompleted) continue; + // if not blocking and this one is not done then continue + if (!blocking && !task.IsCompleted) continue; - // remove the pending future from the list - pendingFutures.RemoveAt(index--); + // remove the pending future from the list + pendingFutures.RemoveAt(index--); - try + try + { + try + { + // wait for completion -- if non-blocking then this + // task is already completed and this will just + // throw any exception that might have occurred + if (blocking && !task.IsCompleted) { - try - { - // wait for completion -- if non-bocking then this - // task is already completed and this will just - // throw any exception that might have occurred - if (blocking && !task.IsCompleted) - { - task.Wait(); - } - - // if we get here then increment the success count - redoneCount++; - - } - catch (AggregateException e) - when (e.InnerException is TaskCanceledException - || e.InnerException is ThreadInterruptedException) - { - throw new SzRetryableException(e.InnerException); - } - catch (ThreadInterruptedException e) - { - throw new SzRetryableException(e.InnerException); - } - catch (AggregateException e) - { - if (e.InnerException != null) - { - // get the inner exception - throw e.InnerException; - } - else - { - throw; - } - } - + task.Wait(); } - catch (SzRetryableException e) - { - // handle thread interruption and cancellation as retries - LogFailedRedo(Warning, e, redoRecord); - errorCount++; // increment the error count - retryCount++; // increment the retry count - // track the retry record so it can be retried later - TrackRetryRecord(redoRecord); + // if we get here then increment the success count + redoneCount++; + + } + catch (AggregateException e) + when (e.InnerException is TaskCanceledException + || e.InnerException is ThreadInterruptedException) + { + throw new SzRetryableException(e.InnerException); + } + catch (ThreadInterruptedException e) + { + throw new SzRetryableException(e.InnerException); + } + catch (AggregateException e) + { + if (e.InnerException != null) + { + // get the inner exception + throw e.InnerException; } - catch (Exception e) + else { - // catch any other exception (incl. SzException) here - LogFailedRedo(Critical, e, redoRecord); - errorCount++; - throw; // rethrow since exception is critical + throw; } + } + + } + catch (SzRetryableException e) + { + // handle thread interruption and cancellation as retries + LogFailedRedo(Warning, e, redoRecord); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + TrackRetryRecord(redoRecord); + } + catch (Exception e) + { + // catch any other exception (incl. SzException) here + LogFailedRedo(Critical, e, redoRecord); + errorCount++; + throw; // rethrow since exception is critical } + } } /// @@ -233,28 +233,28 @@ static void HandlePendingFutures(IList<(Task, string)> pendingFutures, bool bloc /// in a retry file. /// /// -/// The JSON text definining the record to be retried +/// The JSON text defining the record to be retried /// static void TrackRetryRecord(string recordJson) { - // track the retry record so it can be retried later - if (retryFile == null) - { - retryFile = new FileInfo( - Path.Combine( - Path.GetTempPath(), - RetryPrefix + Path.GetRandomFileName() + RetrySuffix)); - - retryWriter = new StreamWriter( - new FileStream(retryFile.FullName, - FileMode.Open, - FileAccess.Write), - Encoding.UTF8); - } - if (retryWriter != null) - { - retryWriter.WriteLine(recordJson); - } + // track the retry record so it can be retried later + if (retryFile == null) + { + retryFile = new FileInfo( + Path.Combine( + Path.GetTempPath(), + RetryPrefix + Path.GetRandomFileName() + RetrySuffix)); + + retryWriter = new StreamWriter( + new FileStream(retryFile.FullName, + FileMode.Open, + FileAccess.Write), + Encoding.UTF8); + } + if (retryWriter != null) + { + retryWriter.WriteLine(recordJson); + } } /// @@ -268,58 +268,58 @@ static void LogFailedRedo(string errorType, Exception exception, string redoRecord) { - Console.Error.WriteLine(); - Console.Error.WriteLine("** " + errorType + " ** FAILED TO PROCESS REDO: "); - Console.Error.WriteLine(redoRecord); - Console.Error.WriteLine(exception); - Console.Error.Flush(); + Console.Error.WriteLine(); + Console.Error.WriteLine("** " + errorType + " ** FAILED TO PROCESS REDO: "); + Console.Error.WriteLine(redoRecord); + Console.Error.WriteLine(exception); + Console.Error.Flush(); } static void OutputRedoStatistics() { - Console.WriteLine(); - Console.WriteLine("Redos successfully processed : " + redoneCount); - Console.WriteLine("Total failed records/redos : " + errorCount); - - // check on any retry records - if (retryWriter != null) - { - retryWriter.Flush(); - retryWriter.Close(); - } - if (retryCount > 0) - { - Console.WriteLine( - retryCount + " records/redos to be retried in " + retryFile); - } - Console.Out.Flush(); + Console.WriteLine(); + Console.WriteLine("Redos successfully processed : " + redoneCount); + Console.WriteLine("Total failed records/redos : " + errorCount); + + // check on any retry records + if (retryWriter != null) + { + retryWriter.Flush(); + retryWriter.Close(); + } + if (retryCount > 0) + { + Console.WriteLine( + retryCount + " records/redos to be retried in " + retryFile); + } + Console.Out.Flush(); } public partial class Program { - private const string RedoPauseDescription = "30 seconds"; + private const string RedoPauseDescription = "30 seconds"; - private const int RedoPauseTimeout = 30000; + private const int RedoPauseTimeout = 30000; - private const string RetryPrefix = "retry-"; - private const string RetrySuffix = ".jsonl"; - private const string Warning = "WARNING"; - private const string Critical = "CRITICAL"; + private const string RetryPrefix = "retry-"; + private const string RetrySuffix = ".jsonl"; + private const string Warning = "WARNING"; + private const string Critical = "CRITICAL"; - // setup some class-wide variables - private static int errorCount; - private static int redoneCount; - private static int retryCount; - private static FileInfo? retryFile; - private static StreamWriter? retryWriter; + // setup some class-wide variables + private static int errorCount; + private static int redoneCount; + private static int retryCount; + private static FileInfo? retryFile; + private static StreamWriter? retryWriter; - private const int ThreadCount = 8; + private const int ThreadCount = 8; - private const int BacklogFactor = 10; + private const int BacklogFactor = 10; - private const int MaximumBacklog = ThreadCount * BacklogFactor; + private const int MaximumBacklog = ThreadCount * BacklogFactor; - private const int HandlePauseTimeout = 100; + private const int HandlePauseTimeout = 100; } #pragma warning restore CA1303 // Do not pass literals as localized parameters (example messages) diff --git a/csharp/snippets/redo/RedoWithInfoContinuous/Program.cs b/csharp/snippets/redo/RedoWithInfoContinuous/Program.cs index ab3b831..fd6f98d 100644 --- a/csharp/snippets/redo/RedoWithInfoContinuous/Program.cs +++ b/csharp/snippets/redo/RedoWithInfoContinuous/Program.cs @@ -15,8 +15,8 @@ string? settings = Environment.GetEnvironmentVariable("SENZING_ENGINE_CONFIGURATION_JSON"); if (settings == null) { - Console.Error.WriteLine("Unable to get settings."); - throw new ArgumentException("Unable to get settings"); + Console.Error.WriteLine("Unable to get settings."); + throw new ArgumentException("Unable to get settings"); } // create a descriptive instance name (can be anything) @@ -32,106 +32,106 @@ AppDomain.CurrentDomain.ProcessExit += (s, e) => { - // IMPORTANT: make sure to destroy the environment - env.Destroy(); - OutputRedoStatistics(); + // IMPORTANT: make sure to destroy the environment + env.Destroy(); + OutputRedoStatistics(); }; try { - // get the engine from the environment - SzEngine engine = env.GetEngine(); + // get the engine from the environment + SzEngine engine = env.GetEngine(); - while (true) + while (true) + { + // get the next redo record + string redo = engine.GetRedoRecord(); + + // check if no redo records are available + if (redo == null) + { + OutputRedoStatistics(); + Console.WriteLine(); + Console.WriteLine( + "No redo records to process. Pausing for " + + RedoPauseDescription + "...."); + Console.WriteLine("Press CTRL-C to exit."); + try + { + Thread.Sleep(RedoPauseTimeout); + } + catch (ThreadInterruptedException) + { + // ignore the exception + } + continue; + } + + try { - // get the next redo record - string redo = engine.GetRedoRecord(); - - // check if no redo reords are available - if (redo == null) - { - OutputRedoStatistics(); - Console.WriteLine(); - Console.WriteLine( - "No redo records to process. Pausing for " - + RedoPauseDescription + "...."); - Console.WriteLine("Press CTRL-C to exit."); - try - { - Thread.Sleep(RedoPauseTimeout); - } - catch (ThreadInterruptedException) - { - // ignore the exception - } - continue; - } - - try - { - // process the redo record - string info = engine.ProcessRedoRecord(redo, SzWithInfo); - - // increment the redone count - redoneCount++; - - // process the info - ProcessInfo(engine, info); - - } - catch (SzRetryableException e) - { - LogFailedRedo(Warning, e, redo); - errorCount++; - retryCount++; - TrackRetryRecord(redo); - - } - catch (Exception e) - { - LogFailedRedo(Critical, e, redo); - errorCount++; - throw; - } + // process the redo record + string info = engine.ProcessRedoRecord(redo, SzWithInfo); + + // increment the redone count + redoneCount++; + + // process the info + ProcessInfo(engine, info); + } + catch (SzRetryableException e) + { + LogFailedRedo(Warning, e, redo); + errorCount++; + retryCount++; + TrackRetryRecord(redo); + + } + catch (Exception e) + { + LogFailedRedo(Critical, e, redo); + errorCount++; + throw; + } + } } catch (Exception e) { - Console.Error.WriteLine(); - Console.Error.WriteLine("*** Terminated due to critical error ***"); - Console.Error.WriteLine(e); - Console.Error.Flush(); - throw; + Console.Error.WriteLine(); + Console.Error.WriteLine("*** Terminated due to critical error ***"); + Console.Error.WriteLine(e); + Console.Error.Flush(); + throw; } finally { - // normally we would call env.destroy() here, but we have registered - // a shutdown hook to do that since termination will typically occur - // via CTRL-C being pressed, and the shutdown hook will still run if - // we get an exception + // normally we would call env.destroy() here, but we have registered + // a shutdown hook to do that since termination will typically occur + // via CTRL-C being pressed, and the shutdown hook will still run if + // we get an exception } static void OutputRedoStatistics() { - Console.WriteLine(); - Console.WriteLine("Redos successfully processed : " + redoneCount); - Console.WriteLine("Total entities affected : " + entityIDSet.Count); - Console.WriteLine("Total failed records/redos : " + errorCount); - - // check on any retry records - if (retryWriter != null) - { - retryWriter.Flush(); - retryWriter.Close(); - } - if (retryCount > 0) - { - Console.WriteLine( - retryCount + " records/redos to be retried in " + retryFile); - } - Console.Out.Flush(); + Console.WriteLine(); + Console.WriteLine("Redos successfully processed : " + redoneCount); + Console.WriteLine("Total entities affected : " + entityIDSet.Count); + Console.WriteLine("Total failed records/redos : " + errorCount); + + // check on any retry records + if (retryWriter != null) + { + retryWriter.Flush(); + retryWriter.Close(); + } + if (retryCount > 0) + { + Console.WriteLine( + retryCount + " records/redos to be retried in " + retryFile); + } + Console.Out.Flush(); } /// @@ -139,28 +139,28 @@ static void OutputRedoStatistics() /// in a retry file. /// /// -/// The JSON text definining the record to be retried +/// The JSON text defining the record to be retried /// static void TrackRetryRecord(string recordJson) { - // track the retry record so it can be retried later - if (retryFile == null) - { - retryFile = new FileInfo( - Path.Combine( - Path.GetTempPath(), - RetryPrefix + Path.GetRandomFileName() + RetrySuffix)); - - retryWriter = new StreamWriter( - new FileStream(retryFile.FullName, - FileMode.Open, - FileAccess.Write), - Encoding.UTF8); - } - if (retryWriter != null) - { - retryWriter.WriteLine(recordJson); - } + // track the retry record so it can be retried later + if (retryFile == null) + { + retryFile = new FileInfo( + Path.Combine( + Path.GetTempPath(), + RetryPrefix + Path.GetRandomFileName() + RetrySuffix)); + + retryWriter = new StreamWriter( + new FileStream(retryFile.FullName, + FileMode.Open, + FileAccess.Write), + Encoding.UTF8); + } + if (retryWriter != null) + { + retryWriter.WriteLine(recordJson); + } } /// @@ -178,37 +178,37 @@ static void TrackRetryRecord(string recordJson) /// The info message static void ProcessInfo(SzEngine engine, string info) { - JsonObject? jsonObject = JsonNode.Parse(info)?.AsObject(); - if (jsonObject == null) return; - if (!jsonObject.ContainsKey(AffectedEntities)) return; + JsonObject? jsonObject = JsonNode.Parse(info)?.AsObject(); + if (jsonObject == null) return; + if (!jsonObject.ContainsKey(AffectedEntities)) return; + + JsonArray? affectedArr = jsonObject[AffectedEntities]?.AsArray(); + if (affectedArr == null) return; - JsonArray? affectedArr = jsonObject[AffectedEntities]?.AsArray(); - if (affectedArr == null) return; + for (int index = 0; index < affectedArr.Count; index++) + { + JsonObject? affected = affectedArr[index]?.AsObject(); + long entityID = affected?[EntityID]?.GetValue() ?? 0L; + if (entityID == 0L) continue; - for (int index = 0; index < affectedArr.Count; index++) + try + { + engine.GetEntity(entityID, null); + entityIDSet.Add(entityID); + } + catch (SzNotFoundException) + { + entityIDSet.Remove(entityID); + } + catch (SzException e) { - JsonObject? affected = affectedArr[index]?.AsObject(); - long entityID = affected?[EntityID]?.GetValue() ?? 0L; - if (entityID == 0L) continue; - - try - { - engine.GetEntity(entityID, null); - entityIDSet.Add(entityID); - } - catch (SzNotFoundException) - { - entityIDSet.Remove(entityID); - } - catch (SzException e) - { - // simply log the exception, do not rethrow - Console.Error.WriteLine(); - Console.Error.WriteLine("**** FAILED TO RETRIEVE ENTITY: " + entityID); - Console.Error.WriteLine(e); - Console.Error.Flush(); - } + // simply log the exception, do not rethrow + Console.Error.WriteLine(); + Console.Error.WriteLine("**** FAILED TO RETRIEVE ENTITY: " + entityID); + Console.Error.WriteLine(e); + Console.Error.Flush(); } + } } /// @@ -222,33 +222,33 @@ static void LogFailedRedo(string errorType, Exception exception, string redoRecord) { - Console.Error.WriteLine(); - Console.Error.WriteLine("** " + errorType + " ** FAILED TO PROCESS REDO: "); - Console.Error.WriteLine(redoRecord); - Console.Error.WriteLine(exception); - Console.Error.Flush(); + Console.Error.WriteLine(); + Console.Error.WriteLine("** " + errorType + " ** FAILED TO PROCESS REDO: "); + Console.Error.WriteLine(redoRecord); + Console.Error.WriteLine(exception); + Console.Error.Flush(); } public partial class Program { - private const string RedoPauseDescription = "30 seconds"; - - private const int RedoPauseTimeout = 30000; - - private const string RetryPrefix = "retry-"; - private const string RetrySuffix = ".jsonl"; - private const string Warning = "WARNING"; - private const string Critical = "CRITICAL"; - private const string AffectedEntities = "AFFECTED_ENTITIES"; - private const string EntityID = "ENTITY_ID"; - - // setup some class-wide variables - private static int errorCount; - private static int redoneCount; - private static int retryCount; - private static FileInfo? retryFile; - private static StreamWriter? retryWriter; - private static readonly ISet entityIDSet = new HashSet(); + private const string RedoPauseDescription = "30 seconds"; + + private const int RedoPauseTimeout = 30000; + + private const string RetryPrefix = "retry-"; + private const string RetrySuffix = ".jsonl"; + private const string Warning = "WARNING"; + private const string Critical = "CRITICAL"; + private const string AffectedEntities = "AFFECTED_ENTITIES"; + private const string EntityID = "ENTITY_ID"; + + // setup some class-wide variables + private static int errorCount; + private static int redoneCount; + private static int retryCount; + private static FileInfo? retryFile; + private static StreamWriter? retryWriter; + private static readonly ISet entityIDSet = new HashSet(); } diff --git a/csharp/snippets/searching/SearchRecords/Program.cs b/csharp/snippets/searching/SearchRecords/Program.cs index 81dec3f..5c56532 100644 --- a/csharp/snippets/searching/SearchRecords/Program.cs +++ b/csharp/snippets/searching/SearchRecords/Program.cs @@ -13,8 +13,8 @@ string? settings = Environment.GetEnvironmentVariable("SENZING_ENGINE_CONFIGURATION_JSON"); if (settings == null) { - Console.Error.WriteLine("Unable to get settings."); - throw new ArgumentException("Unable to get settings"); + Console.Error.WriteLine("Unable to get settings."); + throw new ArgumentException("Unable to get settings"); } // create a descriptive instance name (can be anything) @@ -30,67 +30,67 @@ try { - // get the engine from the environment - SzEngine engine = env.GetEngine(); + // get the engine from the environment + SzEngine engine = env.GetEngine(); - // loop through the example records and add them to the repository - foreach (string criteria in GetSearchCriteria()) - { - // call the searchByAttributes() function with default flags - string result = engine.SearchByAttributes( - criteria, SzSearchByAttributesDefaultFlags); + // loop through the example records and add them to the repository + foreach (string criteria in GetSearchCriteria()) + { + // call the searchByAttributes() function with default flags + string result = engine.SearchByAttributes( + criteria, SzSearchByAttributesDefaultFlags); - JsonObject? jsonObj = JsonNode.Parse(result)?.AsObject(); + JsonObject? jsonObj = JsonNode.Parse(result)?.AsObject(); - Console.WriteLine(); - JsonArray? jsonArr = jsonObj?["RESOLVED_ENTITIES"]?.AsArray(); - if (jsonArr == null || jsonArr.Count == 0) - { - Console.WriteLine("No results for criteria: " + criteria); - } - else + Console.WriteLine(); + JsonArray? jsonArr = jsonObj?["RESOLVED_ENTITIES"]?.AsArray(); + if (jsonArr == null || jsonArr.Count == 0) + { + Console.WriteLine("No results for criteria: " + criteria); + } + else + { + Console.WriteLine("Results for criteria: " + criteria); + for (int index = 0; index < jsonArr.Count; index++) + { + JsonObject? obj = jsonArr[index]?.AsObject(); + obj = obj?["ENTITY"]?.AsObject(); + obj = obj?["RESOLVED_ENTITY"]?.AsObject(); + if (obj == null) { - Console.WriteLine("Results for criteria: " + criteria); - for (int index = 0; index < jsonArr.Count; index++) - { - JsonObject? obj = jsonArr[index]?.AsObject(); - obj = obj?["ENTITY"]?.AsObject(); - obj = obj?["RESOLVED_ENTITY"]?.AsObject(); - if (obj == null) - { - throw new JsonException("Unexpected result format: " + result); - } - long? entityID = obj["ENTITY_ID"]?.GetValue(); - string? name = obj["ENTITY_NAME"]?.GetValue(); - Console.WriteLine(entityID + ": " + name); - } + throw new JsonException("Unexpected result format: " + result); } - Console.Out.Flush(); + long? entityID = obj["ENTITY_ID"]?.GetValue(); + string? name = obj["ENTITY_NAME"]?.GetValue(); + Console.WriteLine(entityID + ": " + name); + } } + Console.Out.Flush(); + } } catch (SzException e) { - // handle any exception that may have occurred - Console.Error.WriteLine("Senzing Error Message : " + e.Message); - Console.Error.WriteLine("Senzing Error Code : " + e.ErrorCode); - Console.Error.WriteLine(e); - throw; + // handle any exception that may have occurred + Console.Error.WriteLine("Senzing Error Message : " + e.Message); + Console.Error.WriteLine("Senzing Error Code : " + e.ErrorCode); + Console.Error.WriteLine(e); + throw; } catch (Exception e) { - Console.Error.WriteLine(); - Console.Error.WriteLine("*** Terminated due to critical error ***"); - Console.Error.WriteLine(e); - Console.Error.Flush(); - throw; + Console.Error.WriteLine(); + Console.Error.WriteLine("*** Terminated due to critical error ***"); + Console.Error.WriteLine(e); + Console.Error.Flush(); + throw; } finally { - // IMPORTANT: make sure to destroy the environment - env.Destroy(); + // IMPORTANT: make sure to destroy the environment + env.Destroy(); } /// @@ -98,14 +98,14 @@ /// /// /// -/// A of JSON text values desribing the +/// A of JSON text values describing the /// sets of criteria with which to search. /// static IList GetSearchCriteria() { - IList records = new List(); - records.Add( - """ + IList records = new List(); + records.Add( + """ { "NAME_FULL": "Susan Moony", "DATE_OF_BIRTH": "15/6/1998", @@ -113,8 +113,8 @@ static IList GetSearchCriteria() } """); - records.Add( - """ + records.Add( + """ { "NAME_FIRST": "Robert", "NAME_LAST": "Smith", @@ -122,8 +122,8 @@ static IList GetSearchCriteria() } """); - records.Add( - """ + records.Add( + """ { "NAME_FIRST": "Makio", "NAME_LAST": "Yamanaka", @@ -131,5 +131,5 @@ static IList GetSearchCriteria() } """); - return records; + return records; } \ No newline at end of file diff --git a/csharp/snippets/searching/SearchViaFutures/Program.cs b/csharp/snippets/searching/SearchViaFutures/Program.cs index 5d97cfd..fb77272 100644 --- a/csharp/snippets/searching/SearchViaFutures/Program.cs +++ b/csharp/snippets/searching/SearchViaFutures/Program.cs @@ -18,8 +18,8 @@ string? settings = Environment.GetEnvironmentVariable("SENZING_ENGINE_CONFIGURATION_JSON"); if (settings == null) { - Console.Error.WriteLine("Unable to get settings."); - throw new ArgumentException("Unable to get settings"); + Console.Error.WriteLine("Unable to get settings."); + throw new ArgumentException("Unable to get settings"); } // create a descriptive instance name (can be anything) @@ -39,7 +39,7 @@ // execution to a specific limited pool of threads. In order to // improve performance and conserve memory we want to use the same // threads for Senzing work. The TaskScheduler implementation used -// here is directly pulled from Mirosoft's TaskScheduler documentation +// here is directly pulled from Microsoft's TaskScheduler documentation TaskScheduler taskScheduler = new LimitedConcurrencyLevelTaskScheduler(ThreadCount); @@ -56,228 +56,228 @@ TaskScheduler taskScheduler StreamReader rdr = new StreamReader(fs, Encoding.UTF8); try { - // get the engine from the environment - SzEngine engine = env.GetEngine(); + // get the engine from the environment + SzEngine engine = env.GetEngine(); - int lineNumber = 0; - bool eof = false; + int lineNumber = 0; + bool eof = false; - while (!eof) + while (!eof) + { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.Count < MaximumBacklog) { - // loop through the example records and queue them up so long - // as we have more records and backlog is not too large - while (pendingFutures.Count < MaximumBacklog) - { - // read the next line - string? line = rdr.ReadLine(); - lineNumber++; - - // check for EOF - if (line == null) - { - eof = true; - break; - } - - // trim the line - line = line.Trim(); - - // skip any blank lines - if (line.Length == 0) continue; + // read the next line + string? line = rdr.ReadLine(); + lineNumber++; - // skip any commented lines - if (line.StartsWith('#')) continue; + // check for EOF + if (line == null) + { + eof = true; + break; + } - // construct the Record instance - Criteria criteria = new Criteria(lineNumber, line); + // trim the line + line = line.Trim(); - try - { - Task task = factory.StartNew(() => - { - // call the addRecord() function with no flags - return engine.SearchByAttributes( - criteria.Line, SzSearchByAttributesDefaultFlags); - }, - CancellationToken.None, - TaskCreationOptions.None, - taskScheduler); - - // add the future to the pending future list - pendingFutures.Add((task, criteria)); + // skip any blank lines + if (line.Length == 0) continue; - } - catch (SzBadInputException e) - { - LogFailedSearch(Error, e, lineNumber, line); - errorCount++; // increment the error count - } - } + // skip any commented lines + if (line.StartsWith('#')) continue; - do - { - // handle any pending futures WITHOUT blocking to reduce the backlog - HandlePendingFutures(pendingFutures, false); + // construct the Record instance + Criteria criteria = new Criteria(lineNumber, line); - // if we still have exceeded the backlog size then pause - // briefly before trying again - if (pendingFutures.Count >= MaximumBacklog) + try + { + Task task = factory.StartNew(() => { - Thread.Sleep(PauseTimeout); - } - } while (pendingFutures.Count >= MaximumBacklog); + // call the addRecord() function with no flags + return engine.SearchByAttributes( + criteria.Line, SzSearchByAttributesDefaultFlags); + }, + CancellationToken.None, + TaskCreationOptions.None, + taskScheduler); + + // add the future to the pending future list + pendingFutures.Add((task, criteria)); + + } + catch (SzBadInputException e) + { + LogFailedSearch(Error, e, lineNumber, line); + errorCount++; // increment the error count + } } - // after we have submitted all records we need to handle the remaining - // pending futures so this time we block on each future - HandlePendingFutures(pendingFutures, true); + do + { + // handle any pending futures WITHOUT blocking to reduce the backlog + HandlePendingFutures(pendingFutures, false); + + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.Count >= MaximumBacklog) + { + Thread.Sleep(PauseTimeout); + } + } while (pendingFutures.Count >= MaximumBacklog); + } + + // after we have submitted all records we need to handle the remaining + // pending futures so this time we block on each future + HandlePendingFutures(pendingFutures, true); } catch (Exception e) { - Console.Error.WriteLine(); - Console.Error.WriteLine("*** Terminated due to critical error ***"); - Console.Error.WriteLine(e); - Console.Error.Flush(); - throw; + Console.Error.WriteLine(); + Console.Error.WriteLine("*** Terminated due to critical error ***"); + Console.Error.WriteLine(e); + Console.Error.Flush(); + throw; } finally { - rdr.Close(); - fs.Close(); - - // IMPORTANT: make sure to destroy the environment - env.Destroy(); - - Console.WriteLine(); - Console.WriteLine("Searches successfully completed : " + successCount); - Console.WriteLine("Total entities found via searches : " + foundEntities.Count); - Console.WriteLine("Searches failed with errors : " + errorCount); - - // check on any retry records - if (retryWriter != null) - { - retryWriter.Flush(); - retryWriter.Close(); - } - if (retryCount > 0) - { - Console.WriteLine(retryCount + " searches to be retried in " + retryFile); - } - Console.Out.Flush(); + rdr.Close(); + fs.Close(); + + // IMPORTANT: make sure to destroy the environment + env.Destroy(); + + Console.WriteLine(); + Console.WriteLine("Searches successfully completed : " + successCount); + Console.WriteLine("Total entities found via searches : " + foundEntities.Count); + Console.WriteLine("Searches failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) + { + retryWriter.Flush(); + retryWriter.Close(); + } + if (retryCount > 0) + { + Console.WriteLine(retryCount + " searches to be retried in " + retryFile); + } + Console.Out.Flush(); } static void HandlePendingFutures(IList<(Task, Criteria)> pendingFutures, bool blocking) { - // loop through the pending futures - for (int index = 0; index < pendingFutures.Count; index++) - { - // get the next pending future - (Task task, Criteria criteria) = pendingFutures[index]; + // loop through the pending futures + for (int index = 0; index < pendingFutures.Count; index++) + { + // get the next pending future + (Task task, Criteria criteria) = pendingFutures[index]; - // if not blocking and this one is not done then continue - if (!blocking && !task.IsCompleted) continue; + // if not blocking and this one is not done then continue + if (!blocking && !task.IsCompleted) continue; - // remove the pending future from the list - pendingFutures.RemoveAt(index--); + // remove the pending future from the list + pendingFutures.RemoveAt(index--); - try + try + { + try + { + // this will block if the task is not yet completed, + // however we only get here with a pending task if + // the blocking parameter is true + string results = task.Result; + + // if we get here then increment the success count + successCount++; + + // parse the search results + JsonObject? jsonObj = JsonNode.Parse(results)?.AsObject(); + JsonArray? jsonArr = jsonObj?["RESOLVED_ENTITIES"]?.AsArray(); + if (jsonArr != null) { - try - { - // this will block if the task is not yet completed, - // however we only get here with a pending task if - // the blocking parameter is true - string results = task.Result; - - // if we get here then increment the success count - successCount++; - - // parse the search results - JsonObject? jsonObj = JsonNode.Parse(results)?.AsObject(); - JsonArray? jsonArr = jsonObj?["RESOLVED_ENTITIES"]?.AsArray(); - if (jsonArr != null) - { - for (int index2 = 0; index2 < jsonArr.Count; index2++) - { - JsonObject? obj = jsonArr[index2]?.AsObject(); - obj = obj?["ENTITY"]?.AsObject(); - obj = obj?["RESOLVED_ENTITY"]?.AsObject(); - long? entityID = obj?["ENTITY_ID"]?.GetValue(); - if (entityID != null) - { - foundEntities.Add(entityID ?? 0L); - } - } - } - - } - catch (AggregateException e) - when (e.InnerException is TaskCanceledException - || e.InnerException is ThreadInterruptedException) - { - throw new SzRetryableException(e.InnerException); - } - catch (ThreadInterruptedException e) - { - throw new SzRetryableException(e.InnerException); - } - catch (AggregateException e) + for (int index2 = 0; index2 < jsonArr.Count; index2++) + { + JsonObject? obj = jsonArr[index2]?.AsObject(); + obj = obj?["ENTITY"]?.AsObject(); + obj = obj?["RESOLVED_ENTITY"]?.AsObject(); + long? entityID = obj?["ENTITY_ID"]?.GetValue(); + if (entityID != null) { - if (e.InnerException != null) - { - // get the inner exception - throw e.InnerException; - } - else - { - throw; - } + foundEntities.Add(entityID ?? 0L); } - + } } - catch (SzBadInputException e) - { - LogFailedSearch(Error, e, criteria.LineNumber, criteria.Line); - errorCount++; // increment the error count - } - catch (SzRetryableException e) + } + catch (AggregateException e) + when (e.InnerException is TaskCanceledException + || e.InnerException is ThreadInterruptedException) + { + throw new SzRetryableException(e.InnerException); + } + catch (ThreadInterruptedException e) + { + throw new SzRetryableException(e.InnerException); + } + catch (AggregateException e) + { + if (e.InnerException != null) { - // handle thread interruption and cancellation as retries - LogFailedSearch(Warning, e, criteria.LineNumber, criteria.Line); - errorCount++; // increment the error count - retryCount++; // increment the retry count - - // track the retry record so it can be retried later - if (retryFile == null) - { - retryFile = new FileInfo( - Path.Combine( - Path.GetTempPath(), - RetryPrefix + Path.GetRandomFileName() + RetrySuffix)); - - retryWriter = new StreamWriter( - new FileStream(retryFile.FullName, - FileMode.Open, - FileAccess.Write), - Encoding.UTF8); - } - if (retryWriter != null) - { - retryWriter.WriteLine(criteria.Line); - } - + // get the inner exception + throw e.InnerException; } - catch (Exception e) + else { - // catch any other exception (incl. SzException) here - LogFailedSearch(Critical, e, criteria.LineNumber, criteria.Line); - errorCount++; - throw; // rethrow since exception is critical + throw; } + } + + } + catch (SzBadInputException e) + { + LogFailedSearch(Error, e, criteria.LineNumber, criteria.Line); + errorCount++; // increment the error count + + } + catch (SzRetryableException e) + { + // handle thread interruption and cancellation as retries + LogFailedSearch(Warning, e, criteria.LineNumber, criteria.Line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) + { + retryFile = new FileInfo( + Path.Combine( + Path.GetTempPath(), + RetryPrefix + Path.GetRandomFileName() + RetrySuffix)); + + retryWriter = new StreamWriter( + new FileStream(retryFile.FullName, + FileMode.Open, + FileAccess.Write), + Encoding.UTF8); + } + if (retryWriter != null) + { + retryWriter.WriteLine(criteria.Line); + } + + } + catch (Exception e) + { + // catch any other exception (incl. SzException) here + LogFailedSearch(Critical, e, criteria.LineNumber, criteria.Line); + errorCount++; + throw; // rethrow since exception is critical } + } } /// @@ -295,44 +295,44 @@ static void LogFailedSearch(string errorType, int lineNumber, string criteriaJson) { - Console.Error.WriteLine(); - Console.Error.WriteLine( - "** " + errorType + " ** FAILED TO SEARCH CRITERIA AT LINE " - + lineNumber + ": "); - Console.Error.WriteLine(criteriaJson); - Console.Error.WriteLine(exception); - Console.Error.Flush(); + Console.Error.WriteLine(); + Console.Error.WriteLine( + "** " + errorType + " ** FAILED TO SEARCH CRITERIA AT LINE " + + lineNumber + ": "); + Console.Error.WriteLine(criteriaJson); + Console.Error.WriteLine(exception); + Console.Error.Flush(); } public partial class Program { - private const string DefaultFilePath = "../../resources/data/search-5K.jsonl"; + private const string DefaultFilePath = "../../resources/data/search-5K.jsonl"; - private const string RetryPrefix = "retry-"; + private const string RetryPrefix = "retry-"; - private const string RetrySuffix = ".jsonl"; + private const string RetrySuffix = ".jsonl"; - private const int ThreadCount = 8; + private const int ThreadCount = 8; - private const int BacklogFactor = 10; + private const int BacklogFactor = 10; - private const int MaximumBacklog = ThreadCount * BacklogFactor; + private const int MaximumBacklog = ThreadCount * BacklogFactor; - private const int PauseTimeout = 100; + private const int PauseTimeout = 100; - private const string Error = "ERROR"; + private const string Error = "ERROR"; - private const string Warning = "WARNING"; + private const string Warning = "WARNING"; - private const string Critical = "CRITICAL"; + private const string Critical = "CRITICAL"; - private static int errorCount; - private static int successCount; - private static int retryCount; - private static FileInfo? retryFile; - private static StreamWriter? retryWriter; + private static int errorCount; + private static int successCount; + private static int retryCount; + private static FileInfo? retryFile; + private static StreamWriter? retryWriter; - private static readonly HashSet foundEntities = new HashSet(); + private static readonly HashSet foundEntities = new HashSet(); } internal sealed record Criteria(int LineNumber, string Line) { } diff --git a/csharp/snippets/stewardship/ForceResolve/Program.cs b/csharp/snippets/stewardship/ForceResolve/Program.cs index 6a61f53..1e822c9 100644 --- a/csharp/snippets/stewardship/ForceResolve/Program.cs +++ b/csharp/snippets/stewardship/ForceResolve/Program.cs @@ -15,8 +15,8 @@ string? settings = Environment.GetEnvironmentVariable("SENZING_ENGINE_CONFIGURATION_JSON"); if (settings == null) { - Console.Error.WriteLine("Unable to get settings."); - throw new ArgumentException("Unable to get settings"); + Console.Error.WriteLine("Unable to get settings."); + throw new ArgumentException("Unable to get settings"); } // create a descriptive instance name (can be anything) @@ -32,99 +32,99 @@ try { - // get the engine from the environment - SzEngine engine = env.GetEngine(); + // get the engine from the environment + SzEngine engine = env.GetEngine(); - IDictionary<(string, string), string> records = GetRecords(); + IDictionary<(string, string), string> records = GetRecords(); - // loop through the example records and add them to the repository - foreach (KeyValuePair<(string, string), string> pair in records) - { - (string dataSourceCode, string recordID) = pair.Key; - string recordDefinition = pair.Value; + // loop through the example records and add them to the repository + foreach (KeyValuePair<(string, string), string> pair in records) + { + (string dataSourceCode, string recordID) = pair.Key; + string recordDefinition = pair.Value; - // call the addRecord() function with no flags - engine.AddRecord(dataSourceCode, recordID, recordDefinition, SzNoFlags); + // call the addRecord() function with no flags + engine.AddRecord(dataSourceCode, recordID, recordDefinition, SzNoFlags); - Console.WriteLine("Record " + recordID + " added"); - Console.Out.Flush(); - } + Console.WriteLine("Record " + recordID + " added"); + Console.Out.Flush(); + } - Console.WriteLine(); - foreach ((string dataSourceCode, string recordID) in records.Keys) - { - string result = engine.GetEntity( - dataSourceCode, recordID, SzEntityBriefDefaultFlags); + Console.WriteLine(); + foreach ((string dataSourceCode, string recordID) in records.Keys) + { + string result = engine.GetEntity( + dataSourceCode, recordID, SzEntityBriefDefaultFlags); - JsonObject? jsonObj = JsonNode.Parse(result)?.AsObject(); - jsonObj = jsonObj?["RESOLVED_ENTITY"]?.AsObject(); - long? entityID = jsonObj?["ENTITY_ID"]?.GetValue(); + JsonObject? jsonObj = JsonNode.Parse(result)?.AsObject(); + jsonObj = jsonObj?["RESOLVED_ENTITY"]?.AsObject(); + long? entityID = jsonObj?["ENTITY_ID"]?.GetValue(); - Console.WriteLine( - "Record " + dataSourceCode + ":" + recordID - + " originally resolves to entity " + entityID); - } - Console.WriteLine(); - Console.WriteLine("Updating records with TRUSTED_ID to force resolve..."); + Console.WriteLine( + "Record " + dataSourceCode + ":" + recordID + + " originally resolves to entity " + entityID); + } + Console.WriteLine(); + Console.WriteLine("Updating records with TRUSTED_ID to force resolve..."); - string record1 = engine.GetRecord(TestDataSource, "1", SzRecordDefaultFlags); - string record3 = engine.GetRecord(TestDataSource, "3", SzRecordDefaultFlags); + string record1 = engine.GetRecord(TestDataSource, "1", SzRecordDefaultFlags); + string record3 = engine.GetRecord(TestDataSource, "3", SzRecordDefaultFlags); - JsonObject?[] jsonObjects = { + JsonObject?[] jsonObjects = { JsonNode.Parse(record1)?.AsObject()?["JSON_DATA"]?.AsObject(), JsonNode.Parse(record3)?.AsObject()?["JSON_DATA"]?.AsObject() }; - foreach (JsonObject? obj in jsonObjects) - { - if (obj == null) - { - throw new JsonException("Parsed record is unexpectedly null: " - + record1 + " / " + record3); - } - obj["TRUSTED_ID_NUMBER"] = JsonNode.Parse("\"TEST_R1-TEST_R3\""); - obj["TRUSTED_ID_TYPE"] = JsonNode.Parse("\"FORCE_RESOLVE\""); - } - engine.AddRecord(TestDataSource, "1", jsonObjects[0]?.ToJsonString()); - engine.AddRecord(TestDataSource, "3", jsonObjects[1]?.ToJsonString()); - - Console.WriteLine(); - - foreach ((string dataSourceCode, string recordID) in records.Keys) + foreach (JsonObject? obj in jsonObjects) + { + if (obj == null) { - string result = engine.GetEntity( - dataSourceCode, recordID, SzEntityBriefDefaultFlags); - - JsonObject? jsonObj = JsonNode.Parse(result)?.AsObject(); - jsonObj = jsonObj?["RESOLVED_ENTITY"]?.AsObject(); - long? entityID = jsonObj?["ENTITY_ID"]?.GetValue(); - - Console.WriteLine( - "Record " + dataSourceCode + ":" + recordID - + " now resolves to entity " + entityID); + throw new JsonException("Parsed record is unexpectedly null: " + + record1 + " / " + record3); } - Console.WriteLine(); + obj["TRUSTED_ID_NUMBER"] = JsonNode.Parse("\"TEST_R1-TEST_R3\""); + obj["TRUSTED_ID_TYPE"] = JsonNode.Parse("\"FORCE_RESOLVE\""); + } + engine.AddRecord(TestDataSource, "1", jsonObjects[0]?.ToJsonString()); + engine.AddRecord(TestDataSource, "3", jsonObjects[1]?.ToJsonString()); + + Console.WriteLine(); + + foreach ((string dataSourceCode, string recordID) in records.Keys) + { + string result = engine.GetEntity( + dataSourceCode, recordID, SzEntityBriefDefaultFlags); + + JsonObject? jsonObj = JsonNode.Parse(result)?.AsObject(); + jsonObj = jsonObj?["RESOLVED_ENTITY"]?.AsObject(); + long? entityID = jsonObj?["ENTITY_ID"]?.GetValue(); + + Console.WriteLine( + "Record " + dataSourceCode + ":" + recordID + + " now resolves to entity " + entityID); + } + Console.WriteLine(); } catch (SzException e) { - // handle any exception that may have occurred - Console.Error.WriteLine("Senzing Error Message : " + e.Message); - Console.Error.WriteLine("Senzing Error Code : " + e.ErrorCode); - Console.Error.WriteLine(e); - throw; + // handle any exception that may have occurred + Console.Error.WriteLine("Senzing Error Message : " + e.Message); + Console.Error.WriteLine("Senzing Error Code : " + e.ErrorCode); + Console.Error.WriteLine(e); + throw; } catch (Exception e) { - Console.Error.WriteLine(); - Console.Error.WriteLine("*** Terminated due to critical error ***"); - Console.Error.WriteLine(e); - Console.Error.Flush(); - throw; + Console.Error.WriteLine(); + Console.Error.WriteLine("*** Terminated due to critical error ***"); + Console.Error.WriteLine(e); + Console.Error.Flush(); + throw; } finally { - // IMPORTANT: make sure to destroy the environment - env.Destroy(); + // IMPORTANT: make sure to destroy the environment + env.Destroy(); } /// @@ -133,16 +133,16 @@ /// /// /// A of record key tuple keys -/// to string JSON text values desribing the records to be added. +/// to string JSON text values describing the records to be added. /// static IDictionary<(string, string), string> GetRecords() { - SortedDictionary<(string, string), string> records - = new SortedDictionary<(string, string), string>(); + SortedDictionary<(string, string), string> records + = new SortedDictionary<(string, string), string>(); - records.Add( - ("TEST", "1"), - """ + records.Add( + ("TEST", "1"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "1", @@ -154,9 +154,9 @@ } """); - records.Add( - ("TEST", "2"), - """ + records.Add( + ("TEST", "2"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "2", @@ -167,9 +167,9 @@ } """); - records.Add( - ("TEST", "3"), - """ + records.Add( + ("TEST", "3"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "3", @@ -179,12 +179,12 @@ } """); - return records; + return records; } public partial class Program { - private const string TestDataSource = "Test"; + private const string TestDataSource = "Test"; } #pragma warning restore CA1303 // Do not pass literals as localized parameters (example messages) diff --git a/csharp/snippets/stewardship/ForceUnresolve/Program.cs b/csharp/snippets/stewardship/ForceUnresolve/Program.cs index 5e554b4..7050303 100644 --- a/csharp/snippets/stewardship/ForceUnresolve/Program.cs +++ b/csharp/snippets/stewardship/ForceUnresolve/Program.cs @@ -15,8 +15,8 @@ string? settings = Environment.GetEnvironmentVariable("SENZING_ENGINE_CONFIGURATION_JSON"); if (settings == null) { - Console.Error.WriteLine("Unable to get settings."); - throw new ArgumentException("Unable to get settings"); + Console.Error.WriteLine("Unable to get settings."); + throw new ArgumentException("Unable to get settings"); } // create a descriptive instance name (can be anything) @@ -32,103 +32,103 @@ try { - // get the engine from the environment - SzEngine engine = env.GetEngine(); + // get the engine from the environment + SzEngine engine = env.GetEngine(); - IDictionary<(string, string), string> records = GetRecords(); + IDictionary<(string, string), string> records = GetRecords(); - // loop through the example records and add them to the repository - foreach (KeyValuePair<(string, string), string> pair in records) - { - (string dataSourceCode, string recordID) = pair.Key; - string recordDefinition = pair.Value; + // loop through the example records and add them to the repository + foreach (KeyValuePair<(string, string), string> pair in records) + { + (string dataSourceCode, string recordID) = pair.Key; + string recordDefinition = pair.Value; - // call the addRecord() function with no flags - engine.AddRecord(dataSourceCode, recordID, recordDefinition, SzNoFlags); + // call the addRecord() function with no flags + engine.AddRecord(dataSourceCode, recordID, recordDefinition, SzNoFlags); - Console.WriteLine("Record " + recordID + " added"); - Console.Out.Flush(); - } + Console.WriteLine("Record " + recordID + " added"); + Console.Out.Flush(); + } - Console.WriteLine(); - foreach ((string dataSourceCode, string recordID) in records.Keys) - { - string result = engine.GetEntity( - dataSourceCode, recordID, SzEntityBriefDefaultFlags); + Console.WriteLine(); + foreach ((string dataSourceCode, string recordID) in records.Keys) + { + string result = engine.GetEntity( + dataSourceCode, recordID, SzEntityBriefDefaultFlags); - JsonObject? jsonObj = JsonNode.Parse(result)?.AsObject(); - jsonObj = jsonObj?["RESOLVED_ENTITY"]?.AsObject(); - long? entityID = jsonObj?["ENTITY_ID"]?.GetValue(); + JsonObject? jsonObj = JsonNode.Parse(result)?.AsObject(); + jsonObj = jsonObj?["RESOLVED_ENTITY"]?.AsObject(); + long? entityID = jsonObj?["ENTITY_ID"]?.GetValue(); - Console.WriteLine( - "Record " + dataSourceCode + ":" + recordID - + " originally resolves to entity " + entityID); - } - Console.WriteLine(); - Console.WriteLine("Updating records with TRUSTED_ID to force resolve..."); + Console.WriteLine( + "Record " + dataSourceCode + ":" + recordID + + " originally resolves to entity " + entityID); + } + Console.WriteLine(); + Console.WriteLine("Updating records with TRUSTED_ID to force resolve..."); - string record4 = engine.GetRecord(TestDataSource, "4", SzRecordDefaultFlags); - string record6 = engine.GetRecord(TestDataSource, "6", SzRecordDefaultFlags); + string record4 = engine.GetRecord(TestDataSource, "4", SzRecordDefaultFlags); + string record6 = engine.GetRecord(TestDataSource, "6", SzRecordDefaultFlags); - JsonObject? obj4 = JsonNode.Parse(record4)?.AsObject(); - JsonObject? obj6 = JsonNode.Parse(record6)?.AsObject(); + JsonObject? obj4 = JsonNode.Parse(record4)?.AsObject(); + JsonObject? obj6 = JsonNode.Parse(record6)?.AsObject(); - obj4 = obj4?["JSON_DATA"]?.AsObject(); - obj6 = obj6?["JSON_DATA"]?.AsObject(); + obj4 = obj4?["JSON_DATA"]?.AsObject(); + obj6 = obj6?["JSON_DATA"]?.AsObject(); - if (obj4 == null || obj6 == null) - { - throw new JsonException("The JSON_DATA parses as null: " - + record4 + " / " + record6); - } + if (obj4 == null || obj6 == null) + { + throw new JsonException("The JSON_DATA parses as null: " + + record4 + " / " + record6); + } - obj4["TRUSTED_ID_NUMBER"] = JsonNode.Parse("\"TEST_R4-TEST_R6\""); - obj4["TRUSTED_ID_TYPE"] = JsonNode.Parse("\"FORCE_UNRESOLVE\""); + obj4["TRUSTED_ID_NUMBER"] = JsonNode.Parse("\"TEST_R4-TEST_R6\""); + obj4["TRUSTED_ID_TYPE"] = JsonNode.Parse("\"FORCE_UNRESOLVE\""); - obj6["TRUSTED_ID_NUMBER"] = JsonNode.Parse("\"TEST_R6-TEST_R4\""); - obj6["TRUSTED_ID_TYPE"] = JsonNode.Parse("\"FORCE_UNRESOLVE\""); + obj6["TRUSTED_ID_NUMBER"] = JsonNode.Parse("\"TEST_R6-TEST_R4\""); + obj6["TRUSTED_ID_TYPE"] = JsonNode.Parse("\"FORCE_UNRESOLVE\""); - engine.AddRecord(TestDataSource, "4", obj4.ToJsonString()); - engine.AddRecord(TestDataSource, "6", obj6.ToJsonString()); + engine.AddRecord(TestDataSource, "4", obj4.ToJsonString()); + engine.AddRecord(TestDataSource, "6", obj6.ToJsonString()); - Console.WriteLine(); + Console.WriteLine(); - foreach ((string dataSourceCode, string recordID) in records.Keys) - { - string result = engine.GetEntity( - dataSourceCode, recordID, SzEntityBriefDefaultFlags); + foreach ((string dataSourceCode, string recordID) in records.Keys) + { + string result = engine.GetEntity( + dataSourceCode, recordID, SzEntityBriefDefaultFlags); - JsonObject? jsonObj = JsonNode.Parse(result)?.AsObject(); - jsonObj = jsonObj?["RESOLVED_ENTITY"]?.AsObject(); - long? entityID = jsonObj?["ENTITY_ID"]?.GetValue(); + JsonObject? jsonObj = JsonNode.Parse(result)?.AsObject(); + jsonObj = jsonObj?["RESOLVED_ENTITY"]?.AsObject(); + long? entityID = jsonObj?["ENTITY_ID"]?.GetValue(); - Console.WriteLine( - "Record " + dataSourceCode + ":" + recordID - + " now resolves to entity " + entityID); - } - Console.WriteLine(); + Console.WriteLine( + "Record " + dataSourceCode + ":" + recordID + + " now resolves to entity " + entityID); + } + Console.WriteLine(); } catch (SzException e) { - // handle any exception that may have occurred - Console.Error.WriteLine("Senzing Error Message : " + e.Message); - Console.Error.WriteLine("Senzing Error Code : " + e.ErrorCode); - Console.Error.WriteLine(e); - throw; + // handle any exception that may have occurred + Console.Error.WriteLine("Senzing Error Message : " + e.Message); + Console.Error.WriteLine("Senzing Error Code : " + e.ErrorCode); + Console.Error.WriteLine(e); + throw; } catch (Exception e) { - Console.Error.WriteLine(); - Console.Error.WriteLine("*** Terminated due to critical error ***"); - Console.Error.WriteLine(e); - Console.Error.Flush(); - throw; + Console.Error.WriteLine(); + Console.Error.WriteLine("*** Terminated due to critical error ***"); + Console.Error.WriteLine(e); + Console.Error.Flush(); + throw; } finally { - // IMPORTANT: make sure to destroy the environment - env.Destroy(); + // IMPORTANT: make sure to destroy the environment + env.Destroy(); } /// @@ -137,16 +137,16 @@ /// /// /// A of record key tuple keys -/// to string JSON text values desribing the records to be added. +/// to string JSON text values describing the records to be added. /// static IDictionary<(string, string), string> GetRecords() { - SortedDictionary<(string, string), string> records - = new SortedDictionary<(string, string), string>(); + SortedDictionary<(string, string), string> records + = new SortedDictionary<(string, string), string>(); - records.Add( - ("TEST", "4"), - """ + records.Add( + ("TEST", "4"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "4", @@ -157,9 +157,9 @@ } """); - records.Add( - ("TEST", "5"), - """ + records.Add( + ("TEST", "5"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "5", @@ -170,9 +170,9 @@ } """); - records.Add( - ("TEST", "6"), - """ + records.Add( + ("TEST", "6"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "6", @@ -182,12 +182,12 @@ } """); - return records; + return records; } public partial class Program { - private const string TestDataSource = "Test"; + private const string TestDataSource = "Test"; } #pragma warning restore CA1303 // Do not pass literals as localized parameters (example messages) diff --git a/java/runner/java/com/senzing/runner/SnippetRunner.java b/java/runner/java/com/senzing/runner/SnippetRunner.java index 7b00011..618788a 100644 --- a/java/runner/java/com/senzing/runner/SnippetRunner.java +++ b/java/runner/java/com/senzing/runner/SnippetRunner.java @@ -14,493 +14,485 @@ import static com.senzing.sdk.SzFlag.SZ_NO_FLAGS; /** - * Helper class to run each of the snippetts. + * Helper class to run each of the snippets. */ public class SnippetRunner { - public static final String SOURCE_KEY_PREFIX = "source."; + public static final String SOURCE_KEY_PREFIX = "source."; - public static final String LOAD_KEY_PREFIX = "load."; + public static final String LOAD_KEY_PREFIX = "load."; - public static final String INPUT_KEY_PREFIX = "input."; + public static final String INPUT_KEY_PREFIX = "input."; - public static final String DESTROY_AFTER_KEY = "destroyAfter"; + public static final String DESTROY_AFTER_KEY = "destroyAfter"; - private static final String DATA_SOURCE = "DATA_SOURCE"; - private static final String RECORD_ID = "RECORD_ID"; - private static final String TEST_SOURCE = "TEST"; + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + private static final String TEST_SOURCE = "TEST"; - private static final long ONE_MILLION = 1000000L; + private static final long ONE_MILLION = 1000000L; - private static final String JAR_PATH = getJarPath(); + private static final String JAR_PATH = getJarPath(); - private static final int SIGTERM_EXIT_CODE = 143; + private static final int SIGTERM_EXIT_CODE = 143; - private static final boolean WINDOWS; - private static final boolean MACOS; + private static final boolean WINDOWS; + private static final boolean MACOS; - static { - final String osName = System.getProperty("os.name"); + static { + final String osName = System.getProperty("os.name"); - boolean windows = false; - boolean macOS = false; + boolean windows = false; + boolean macOS = false; - String lowerOSName = osName.toLowerCase().trim(); - if (lowerOSName.startsWith("windows")) { - windows = true; - } else if (lowerOSName.startsWith("mac") || lowerOSName.indexOf("darwin") >= 0) { - macOS = true; - } - - WINDOWS = windows; - MACOS = macOS; + String lowerOSName = osName.toLowerCase().trim(); + if (lowerOSName.startsWith("windows")) { + windows = true; + } else if (lowerOSName.startsWith("mac") || lowerOSName.indexOf("darwin") >= 0) { + macOS = true; } - /** - * Harness for running one or more of the code snippets. - * - * @param args The command line arguments. - */ - public static void main(String[] args) { - try { - SortedMap> snippetMap = getSnippetMap(); - Set snippetOptions = new LinkedHashSet<>(); - snippetOptions.addAll(snippetMap.keySet()); - for (Set set : snippetMap.values()) { - snippetOptions.addAll(set); - } + WINDOWS = windows; + MACOS = macOS; + } + + /** + * Harness for running one or more of the code snippets. + * + * @param args The command line arguments. + */ + public static void main(String[] args) { + try { + SortedMap> snippetMap = getSnippetMap(); + Set snippetOptions = new LinkedHashSet<>(); + snippetOptions.addAll(snippetMap.keySet()); + for (Set set : snippetMap.values()) { + snippetOptions.addAll(set); + } + + if (args.length == 0) { + printUsage(snippetMap); + System.exit(1); + } + String settings = System.getProperty("senzing.settings"); + if (settings != null) { + settings = settings.trim(); + } + + // check for settings in the environment if needed + if (settings == null) { + settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings != null) { + settings = settings.trim(); + } + } - if (args.length == 0) { - printUsage(snippetMap); - System.exit(1); + // validate the settings if we have them + if (settings != null) { + JsonObject settingsJson = null; + try { + settingsJson = parseJsonObject(settings); + } catch (Exception e) { + System.err.println("The provided Senzing settings were not valid JSON:"); + System.err.println(); + System.exit(1); + } + } + + // validate the SENZING_DIR + InstallLocations installLocations = null; + try { + installLocations = InstallLocations.findLocations(); + + } catch (Exception e) { + System.exit(1); + } + + Set snippets = new LinkedHashSet<>(); + for (int index = 0; index < args.length; index++) { + String arg = args[index]; + if (arg.equals("all")) { + snippetMap.values().forEach(snippetSet -> { + for (String snippet : snippetSet) { + if (!snippets.contains(snippet)) { + snippets.add(snippet); + } } - String settings = System.getProperty("senzing.settings"); - if (settings != null) { - settings = settings.trim(); + }); + continue; + } + if (!snippetOptions.contains(arg)) { + System.err.println("Unrecognized code snippet or snippet group: " + arg); + System.exit(1); + } + if (snippetMap.containsKey(arg)) { + for (String snippet : snippetMap.get(arg)) { + if (!snippets.contains(snippet)) { + snippets.add(snippet); } + } + } else { + if (!snippets.contains(arg)) { + snippets.add(arg); + } + } + } + + // check if we do not have settings and if not setup a temporary repository + if (settings == null) { + settings = setupTempRepository(installLocations); + } + + Long defaultConfigId = null; + SzEnvironment env = SzCoreEnvironment.newBuilder().settings(settings).build(); + try { + SzConfigManager configMgr = env.getConfigManager(); + defaultConfigId = configMgr.getDefaultConfigId(); + + } catch (SzException e) { + e.printStackTrace(); + System.exit(1); + + } finally { + env.destroy(); + env = null; + } + + // execute each snippet + for (String snippet : snippets) { + System.out.println(); + long start = System.nanoTime(); + Properties properties = new Properties(); + String resourceName = "/" + snippet.replaceAll("\\.", "/") + + ".properties"; + try (InputStream is = SnippetRunner.class.getResourceAsStream(resourceName)) { + if (is != null) { + properties.load(is); + } + } - // check for settings in the environment if needed - if (settings == null) { - settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); - if (settings != null) { - settings = settings.trim(); - } + System.out.println("Preparing repository for " + snippet + "..."); + env = SzCoreEnvironment.newBuilder().settings(settings).build(); + try { + // first purge the repository + SzDiagnostic diagnostic = env.getDiagnostic(); + diagnostic.purgeRepository(); + + // now set the configuration + SzConfigManager configMgr = env.getConfigManager(); + // check if we need to configure sources + if (properties.containsKey(SOURCE_KEY_PREFIX + 0)) { + SzConfig config = configMgr.createConfig(); + for (int index = 0; properties.containsKey(SOURCE_KEY_PREFIX + index); index++) { + String sourceKey = SOURCE_KEY_PREFIX + index; + String source = properties.getProperty(sourceKey); + source = source.trim(); + System.out.println("Adding data source: " + source); + config.addDataSource(source); } - - // validate the settings if we have them - if (settings != null) { - JsonObject settingsJson = null; - try { - settingsJson = parseJsonObject(settings); - } catch (Exception e) { - System.err.println("The provided Senzing settings were not valid JSON:"); - System.err.println(); - System.exit(1); + String snippetConfig = config.export(); + + // register the config + configMgr.setDefaultConfig(snippetConfig, snippet); + + } else { + // set the default config to the initial default + configMgr.setDefaultConfigId(defaultConfigId); + } + + // check if there are files we need to load + if (properties.containsKey(LOAD_KEY_PREFIX + 0)) { + SzEngine engine = env.getEngine(); + for (int index = 0; properties.containsKey(LOAD_KEY_PREFIX + index); index++) { + String loadKey = LOAD_KEY_PREFIX + index; + String fileName = properties.getProperty(loadKey); + fileName = fileName.trim(); + System.out.println("Loading records from file resource: " + fileName); + try (InputStream is = SnippetRunner.class.getResourceAsStream(fileName)) { + if (is == null) { + throw new IllegalArgumentException( + "Missing resource (" + fileName + ") for load file (" + + loadKey + ") for snippet (" + snippet + ")"); } - } - - // validate the SENZING_DIR - InstallLocations installLocations = null; - try { - installLocations = InstallLocations.findLocations(); - - } catch (Exception e) { - System.exit(1); - } - - Set snippets = new LinkedHashSet<>(); - for (int index = 0; index < args.length; index++) { - String arg = args[index]; - if (arg.equals("all")) { - snippetMap.values().forEach(snippetSet -> { - for (String snippet : snippetSet) { - if (!snippets.contains(snippet)) { - snippets.add(snippet); - } - } - }); + InputStreamReader isr = new InputStreamReader(is, UTF_8); + BufferedReader br = new BufferedReader(isr); + for (String line = br.readLine(); line != null; line = br.readLine()) { + line = line.trim(); + if (line.length() == 0) continue; + if (line.startsWith("#")) + continue; + JsonObject record = Json.createReader(new StringReader(line)).readObject(); + String dataSource = record.getString(DATA_SOURCE, TEST_SOURCE); + String recordId = record.getString(RECORD_ID, null); + SzRecordKey recordKey = SzRecordKey.of(dataSource, recordId); + engine.addRecord(recordKey, line, SZ_NO_FLAGS); } - if (!snippetOptions.contains(arg)) { - System.err.println("Unrecognized code snippet or snippet group: " + arg); - System.exit(1); - } - if (snippetMap.containsKey(arg)) { - for (String snippet : snippetMap.get(arg)) { - if (!snippets.contains(snippet)) { - snippets.add(snippet); - } - } - } else { - if (!snippets.contains(arg)) { - snippets.add(arg); - } - } - } - - // check if we do not have settings and if not setup a temporary repository - if (settings == null) { - settings = setupTempRepository(installLocations); + } } + } - Long defaultConfigId = null; - SzEnvironment env = SzCoreEnvironment.newBuilder().settings(settings).build(); - try { - SzConfigManager configMgr = env.getConfigManager(); - defaultConfigId = configMgr.getDefaultConfigId(); - - } catch (SzException e) { - e.printStackTrace(); - System.exit(1); - - } finally { - env.destroy(); - env = null; - } - - // execute each snippet - for (String snippet : snippets) { - System.out.println(); - long start = System.nanoTime(); - Properties properties = new Properties(); - String resourceName = "/" + snippet.replaceAll("\\.", "/") - + ".properties"; - try (InputStream is = SnippetRunner.class.getResourceAsStream(resourceName)) { - if (is != null) { - properties.load(is); - } - } - - System.out.println("Preparing repository for " + snippet + "..."); - env = SzCoreEnvironment.newBuilder().settings(settings).build(); - try { - // first purge the repository - SzDiagnostic diagnostic = env.getDiagnostic(); - diagnostic.purgeRepository(); - - // now set the configuration - SzConfigManager configMgr = env.getConfigManager(); - // check if we need to configure sources - if (properties.containsKey(SOURCE_KEY_PREFIX + 0)) { - SzConfig config = configMgr.createConfig(); - for (int index = 0; - properties.containsKey(SOURCE_KEY_PREFIX + index); - index++) - { - String sourceKey = SOURCE_KEY_PREFIX + index; - String source = properties.getProperty(sourceKey); - source = source.trim(); - System.out.println("Adding data source: " + source); - config.addDataSource(source); - } - String snippetConfig = config.export(); - - // register the config - configMgr.setDefaultConfig(snippetConfig, snippet); - - } else { - // set the default config to the initial default - configMgr.setDefaultConfigId(defaultConfigId); - } - - // check if there are files we need to load - if (properties.containsKey(LOAD_KEY_PREFIX + 0)) { - SzEngine engine = env.getEngine(); - for (int index = 0; properties.containsKey(LOAD_KEY_PREFIX + index); index++) - { - String loadKey = LOAD_KEY_PREFIX + index; - String fileName = properties.getProperty(loadKey); - fileName = fileName.trim(); - System.out.println("Loading records from file resource: " + fileName); - try (InputStream is = SnippetRunner.class.getResourceAsStream(fileName)) - { - if (is == null) { - throw new IllegalArgumentException( - "Missing resource (" + fileName + ") for load file (" - + loadKey + ") for snippet (" + snippet + ")"); - } - InputStreamReader isr = new InputStreamReader(is, UTF_8); - BufferedReader br = new BufferedReader(isr); - for (String line = br.readLine(); line != null; line = br.readLine()) { - line = line.trim(); - if (line.length() == 0) continue; - if (line.startsWith("#")) continue; - JsonObject record = Json.createReader(new StringReader(line)).readObject(); - String dataSource = record.getString(DATA_SOURCE, TEST_SOURCE); - String recordId = record.getString(RECORD_ID, null); - SzRecordKey recordKey = SzRecordKey.of(dataSource, recordId); - engine.addRecord(recordKey, line, SZ_NO_FLAGS); - } - } - } - } - - } catch (SzException e) { - e.printStackTrace(); - } finally { - env.destroy(); - } - long duration = (System.nanoTime() - start) / ONE_MILLION; - System.out.println("Prepared repository for " + snippet + ". (" + duration + "ms)"); + } catch (SzException e) { + e.printStackTrace(); + } finally { + env.destroy(); + } + long duration = (System.nanoTime() - start) / ONE_MILLION; + System.out.println("Prepared repository for " + snippet + ". (" + duration + "ms)"); - executeSnippet(snippet, installLocations, settings, properties); - } - System.out.println(); + executeSnippet(snippet, installLocations, settings, properties); + } + System.out.println(); - } catch (Exception e) { - e.printStackTrace(); - System.exit(1); + } catch (Exception e) { + e.printStackTrace(); + System.exit(1); + } + } + + private static String[] createRuntimeEnv(InstallLocations senzingInstall, String settings) { + Map origEnv = System.getenv(); + List envList = new ArrayList<>(origEnv.size() + 10); + origEnv.forEach((envKey, envVal) -> { + envList.add(envKey + "=" + envVal); + }); + envList.add("SENZING_ENGINE_CONFIGURATION_JSON=" + settings); + return envList.toArray(new String[envList.size()]); + } + + private static Thread startOutputThread(InputStream stream, PrintStream ps) { + Thread thread = new Thread(() -> { + final String UTF8 = "UTF-8"; + try (InputStreamReader isr = new InputStreamReader(stream, UTF8); + BufferedReader br = new BufferedReader(isr)) { + for (String line = br.readLine(); line != null; line = br.readLine()) { + ps.println(line); + ps.flush(); } + } catch (IOException e) { + e.printStackTrace(); + } + }); + thread.start(); + return thread; + } + + private static void executeSnippet(String snippet, + InstallLocations senzingInstall, + String settings, + Properties properties) + throws Exception { + String[] cmdArray = new String[] { "java", "-cp", JAR_PATH, snippet }; + + String[] runtimeEnv = createRuntimeEnv(senzingInstall, settings); + + System.out.println(); + System.out.println("---------------------------------------"); + System.out.println("Executing " + snippet + "..."); + long start = System.nanoTime(); + Runtime runtime = Runtime.getRuntime(); + Process process = runtime.exec(cmdArray, runtimeEnv); + Thread errThread = startOutputThread(process.getErrorStream(), System.err); + Thread outThread = startOutputThread(process.getInputStream(), System.out); + if (properties != null && properties.containsKey(INPUT_KEY_PREFIX + 0)) { + try { + // sleep for 1 second to give the process a chance to start up + Thread.sleep(1000L); + } catch (InterruptedException ignore) { + // ignore interruptions + } + PrintWriter pw = new PrintWriter( + new OutputStreamWriter(process.getOutputStream(), UTF_8)); + for (int index = 0; properties.containsKey(INPUT_KEY_PREFIX + index); index++) { + String inputLine = properties.getProperty(INPUT_KEY_PREFIX + index); + System.out.println(inputLine); + System.out.flush(); + inputLine = (inputLine == null) ? "" : inputLine.trim(); + pw.println(inputLine); + pw.flush(); + } } - - private static String[] createRuntimeEnv(InstallLocations senzingInstall, String settings) { - Map origEnv = System.getenv(); - List envList = new ArrayList<>(origEnv.size() + 10); - origEnv.forEach((envKey, envVal) -> { - envList.add(envKey + "=" + envVal); - }); - envList.add("SENZING_ENGINE_CONFIGURATION_JSON=" + settings); - return envList.toArray(new String[envList.size()]); + int exitValue = 0; + int expectedExitValue = 0; + if (properties.containsKey(DESTROY_AFTER_KEY)) { + String propValue = properties.getProperty(DESTROY_AFTER_KEY); + long delay = Long.parseLong(propValue); + boolean exited = process.waitFor(delay, TimeUnit.MILLISECONDS); + if (!exited && process.isAlive()) { + expectedExitValue = (WINDOWS) ? 1 : SIGTERM_EXIT_CODE; + System.out.println(); + System.out.println("Runner destroying " + snippet + " process..."); + // NOTE: using process.destroy() does not trigger the registered + // shutdown hooks in the snippet sub-process for some reason + Process killer = runtime.exec( + ((WINDOWS) ? "taskkill /F /PID " : "kill ") + process.pid()); + killer.waitFor(); // wait for the kill process to complete + } + exitValue = process.waitFor(); + + } else { + // wait indefinitely for the process to terminate + exitValue = process.waitFor(); } - private static Thread startOutputThread(InputStream stream, PrintStream ps) { - Thread thread = new Thread(() -> { - final String UTF8 = "UTF-8"; - try (InputStreamReader isr = new InputStreamReader(stream, UTF8); - BufferedReader br = new BufferedReader(isr)) - { - for (String line = br.readLine(); line != null; line = br.readLine()) { - ps.println(line); - ps.flush(); - } - } catch (IOException e) { - e.printStackTrace(); - } - }); - thread.start(); - return thread; + errThread.join(); + outThread.join(); + if (exitValue != expectedExitValue) { + throw new Exception("Failed to execute snippet; " + snippet + + " (" + exitValue + ")"); } - - private static void executeSnippet(String snippet, - InstallLocations senzingInstall, - String settings, - Properties properties) - throws Exception - { - String[] cmdArray = new String[] { "java", "-cp", JAR_PATH, snippet }; - - String[] runtimeEnv = createRuntimeEnv(senzingInstall, settings); - - System.out.println(); - System.out.println("---------------------------------------"); - System.out.println("Executing " + snippet + "..."); - long start = System.nanoTime(); - Runtime runtime = Runtime.getRuntime(); - Process process = runtime.exec(cmdArray, runtimeEnv); - Thread errThread = startOutputThread(process.getErrorStream(), System.err); - Thread outThread = startOutputThread(process.getInputStream(), System.out); - if (properties != null && properties.containsKey(INPUT_KEY_PREFIX + 0)) { - try { - // sleep for 1 second to give the process a chance to start up - Thread.sleep(1000L); - } catch (InterruptedException ignore) { - // ignore interruptions - } - PrintWriter pw = new PrintWriter( - new OutputStreamWriter(process.getOutputStream(), UTF_8)); - for (int index = 0; - properties.containsKey(INPUT_KEY_PREFIX + index); - index++) - { - String inputLine = properties.getProperty(INPUT_KEY_PREFIX + index); - System.out.println(inputLine); - System.out.flush(); - inputLine = (inputLine == null) ? "" : inputLine.trim(); - pw.println(inputLine); - pw.flush(); - } + long duration = (System.nanoTime() - start) / ONE_MILLION; + System.out.println("Executed " + snippet + ". (" + duration + "ms)"); + } + + private static void printUsage(SortedMap> snippetMap) { + System.err.println("java -jar sz-sdk-snippets.jar [ all | | ]*"); + System.err.println(); + System.err.println(" - Specifying no arguments will print this message"); + System.err.println(" - Specifying \"all\" will run all snippets"); + System.err.println(" - Specifying one or more groups will run all snippets in those groups"); + System.err.println(" - Specifying one or more snippets will run those snippet"); + System.err.println(); + System.err.println("Examples:"); + System.err.println(); + System.err.println(" java -jar sz-sdk-snippets.jar all"); + System.err.println(); + System.err.println(" java -jar sz-sdk-snippets.jar loading.LoadRecords loading.LoadViaFutures"); + System.err.println(); + System.err.println(" java -jar sz-sdk-snippets.jar initialization deleting loading.LoadRecords"); + System.err.println(); + System.err.println("Snippet Group Names:"); + snippetMap.keySet().forEach(group -> { + System.err.println(" - " + group); + }); + System.err.println(); + System.err.println("Snippet Names:"); + snippetMap.values().forEach(snippetSet -> { + for (String snippet : snippetSet) { + System.err.println(" - " + snippet); + } + }); + System.err.println(); + } + + private static String getJarPath() throws RuntimeException { + try { + String resourceName = SnippetRunner.class.getSimpleName() + ".class"; + String url = SnippetRunner.class.getResource(resourceName).toString(); + String jarPath = url.replaceAll("jar:file:(.*\\.jar)\\!/.*\\.class", "$1"); + + if (WINDOWS && jarPath.startsWith("/")) { + jarPath = jarPath.replaceAll("[/]+([^/].*)", "$1"); + } + + if (WINDOWS && jarPath.startsWith("/")) { + jarPath = jarPath.substring(1); + } + return jarPath; + } catch (RuntimeException e) { + throw e; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private static SortedMap> getSnippetMap() throws Exception { + SortedMap> snippetMap = new TreeMap<>(); + File jarFile = new File(JAR_PATH); + try (FileInputStream fis = new FileInputStream(jarFile); ZipInputStream zis = new ZipInputStream(fis)) { + for (ZipEntry entry = zis.getNextEntry(); entry != null; entry = zis.getNextEntry()) { + String name = entry.getName(); + if (name.startsWith("com/")) { + continue; } - int exitValue = 0; - int expectedExitValue = 0; - if (properties.containsKey(DESTROY_AFTER_KEY)) { - String propValue = properties.getProperty(DESTROY_AFTER_KEY); - long delay = Long.parseLong(propValue); - boolean exited = process.waitFor(delay, TimeUnit.MILLISECONDS); - if (!exited && process.isAlive()) { - expectedExitValue = (WINDOWS) ? 1 : SIGTERM_EXIT_CODE; - System.out.println(); - System.out.println("Runner destroying " + snippet + " process..."); - // NOTE: using process.destroy() does not trigger the registered - // shutdown hooks in the snippet sub-process for some reason - Process killer = runtime.exec( - ((WINDOWS) ? "taskkill /F /PID " : "kill ") + process.pid()); - killer.waitFor(); // wait for the kill process to complete - } - exitValue = process.waitFor(); - - } else { - // wait indefinitely for the process to terminate - exitValue = process.waitFor(); + if (name.startsWith("org/")) { + continue; } - - errThread.join(); - outThread.join(); - if (exitValue != expectedExitValue) { - throw new Exception("Failed to execute snippet; " + snippet - + " (" + exitValue + ")"); + if (name.startsWith("javax/")) { + continue; } - long duration = (System.nanoTime() - start) / ONE_MILLION; - System.out.println("Executed " + snippet + ". (" + duration + "ms)"); - } - - private static void printUsage(SortedMap> snippetMap) { - System.err.println("java -jar sz-sdk-snippets.jar [ all | | ]*"); - System.err.println(); - System.err.println(" - Specifying no arguments will print this message"); - System.err.println(" - Specifying \"all\" will run all snippets"); - System.err.println(" - Specifying one or more groups will run all snippets in those groups"); - System.err.println(" - Specifying one or more snippets will run those snippet"); - System.err.println(); - System.err.println("Examples:"); - System.err.println(); - System.err.println(" java -jar sz-sdk-snippets.jar all"); - System.err.println(); - System.err.println(" java -jar sz-sdk-snippets.jar loading.LoadRecords loading.LoadViaFutures"); - System.err.println(); - System.err.println(" java -jar sz-sdk-snippets.jar initialization deleting loading.LoadRecords"); - System.err.println(); - System.err.println("Snippet Group Names:"); - snippetMap.keySet().forEach(group -> { - System.err.println(" - " + group); - }); - System.err.println(); - System.err.println("Snippet Names:"); - snippetMap.values().forEach(snippetSet -> { - for (String snippet : snippetSet) { - System.err.println(" - " + snippet); - } - }); - System.err.println(); - } - - private static String getJarPath() throws RuntimeException { - try { - String resourceName = SnippetRunner.class.getSimpleName() + ".class"; - String url = SnippetRunner.class.getResource(resourceName).toString(); - String jarPath = url.replaceAll("jar:file:(.*\\.jar)\\!/.*\\.class", "$1"); - - if (WINDOWS && jarPath.startsWith("/")) { - jarPath = jarPath.replaceAll("[/]+([^/].*)", "$1"); - } - - if (WINDOWS && jarPath.startsWith("/")) { - jarPath = jarPath.substring(1); - } - return jarPath; - } catch (RuntimeException e) { - throw e; - } catch (Exception e) { - throw new RuntimeException(e); + if (name.startsWith("META-INF/")) { + continue; } - } - - private static SortedMap> getSnippetMap() throws Exception { - SortedMap> snippetMap = new TreeMap<>(); - File jarFile = new File(JAR_PATH); - try (FileInputStream fis = new FileInputStream(jarFile); ZipInputStream zis = new ZipInputStream(fis)) { - for (ZipEntry entry = zis.getNextEntry(); entry != null; entry = zis.getNextEntry()) { - String name = entry.getName(); - if (name.startsWith("com/")) { - continue; - } - if (name.startsWith("org/")) { - continue; - } - if (name.startsWith("javax/")) { - continue; - } - if (name.startsWith("META-INF/")) { - continue; - } - if (!name.endsWith(".class")) { - continue; - } - if (name.indexOf('$') >= 0) { - continue; - } - int index = name.indexOf('/'); - if (index < 0) { - continue; - } - String group = name.substring(0, index); - String snippet = name.substring(0, name.length() - ".class".length()).replace('/', '.'); - SortedSet snippetSet = snippetMap.get(group); - if (snippetSet == null) { - snippetSet = new TreeSet<>(); - snippetMap.put(group, snippetSet); - } - snippetSet.add(snippet); - } + if (!name.endsWith(".class")) { + continue; } - return snippetMap; + if (name.indexOf('$') >= 0) { + continue; + } + int index = name.indexOf('/'); + if (index < 0) { + continue; + } + String group = name.substring(0, index); + String snippet = name.substring(0, name.length() - ".class".length()).replace('/', '.'); + SortedSet snippetSet = snippetMap.get(group); + if (snippetSet == null) { + snippetSet = new TreeSet<>(); + snippetMap.put(group, snippetSet); + } + snippetSet.add(snippet); + } + } + return snippetMap; + } + + /** + * + */ + private static String setupTempRepository(InstallLocations senzingInstall) throws Exception { + File resourcesDir = senzingInstall.getResourceDirectory(); + File templatesDir = senzingInstall.getTemplatesDirectory(); + File configDir = senzingInstall.getConfigDirectory(); + File schemaDir = new File(resourcesDir, "schema"); + File schemaFile = new File(schemaDir, "szcore-schema-sqlite-create.sql"); + File configFile = new File(templatesDir, "g2config.json"); + + // lay down the database schema + File databaseFile = File.createTempFile("G2C-", ".db"); + String jdbcUrl = "jdbc:sqlite:" + databaseFile.getCanonicalPath(); + + try (FileReader rdr = new FileReader(schemaFile, UTF_8_CHARSET); + BufferedReader br = new BufferedReader(rdr); + Connection conn = DriverManager.getConnection(jdbcUrl); + Statement stmt = conn.createStatement()) { + for (String sql = br.readLine(); sql != null; sql = br.readLine()) { + sql = sql.trim(); + if (sql.length() == 0) + continue; + stmt.execute(sql); + } } - /** - * - */ - private static String setupTempRepository(InstallLocations senzingInstall) throws Exception { - File resourcesDir = senzingInstall.getResourceDirectory(); - File templatesDir = senzingInstall.getTemplatesDirectory(); - File configDir = senzingInstall.getConfigDirectory(); - File schemaDir = new File(resourcesDir, "schema"); - File schemaFile = new File(schemaDir, "szcore-schema-sqlite-create.sql"); - File configFile = new File(templatesDir, "g2config.json"); - - // lay down the database schema - File databaseFile = File.createTempFile("G2C-", ".db"); - String jdbcUrl = "jdbc:sqlite:" + databaseFile.getCanonicalPath(); - - try (FileReader rdr = new FileReader(schemaFile, UTF_8_CHARSET); - BufferedReader br = new BufferedReader(rdr); - Connection conn = DriverManager.getConnection(jdbcUrl); - Statement stmt = conn.createStatement()) { - for (String sql = br.readLine(); sql != null; sql = br.readLine()) { - sql = sql.trim(); - if (sql.length() == 0) - continue; - stmt.execute(sql); + String supportPath = senzingInstall.getSupportDirectory().getCanonicalPath().replace("\\", "\\\\"); + String configPath = configDir.getCanonicalPath().replace("\\", "\\\\"); + String resourcePath = resourcesDir.toString().replace("\\", "\\\\"); + String databasePath = databaseFile.getCanonicalPath().replace("\\", "\\\\"); + String baseConfig = readTextFileAsString(configFile, UTF_8); + String settings = """ + { + "PIPELINE": { + "SUPPORTPATH": "%s", + "CONFIGPATH": "%s", + "RESOURCEPATH": "%s" + }, + "SQL": { + "CONNECTION": "sqlite3://na:na@%s" } } + """.formatted(supportPath, configPath, resourcePath, databasePath).trim(); - String supportPath = senzingInstall.getSupportDirectory().getCanonicalPath().replace("\\", "\\\\"); - String configPath = configDir.getCanonicalPath().replace("\\", "\\\\"); - String resourcePath = resourcesDir.toString().replace("\\", "\\\\"); - String databasePath = databaseFile.getCanonicalPath().replace("\\", "\\\\"); - String baseConfig = readTextFileAsString(configFile, UTF_8); - String settings = """ - { - "PIPELINE": { - "SUPPORTPATH": "%s", - "CONFIGPATH": "%s", - "RESOURCEPATH": "%s" - }, - "SQL": { - "CONNECTION": "sqlite3://na:na@%s" - } - } - """.formatted(supportPath, configPath, resourcePath, databasePath).trim(); - - SzEnvironment env = SzCoreEnvironment.newBuilder().settings(settings).build(); - try { - env.getConfigManager().setDefaultConfig(baseConfig); - - } catch (SzException e) { - System.err.println(settings); - throw e; + SzEnvironment env = SzCoreEnvironment.newBuilder().settings(settings).build(); + try { + env.getConfigManager().setDefaultConfig(baseConfig); - } finally { - env.destroy(); - } + } catch (SzException e) { + System.err.println(settings); + throw e; - return settings; + } finally { + env.destroy(); } + + return settings; + } } diff --git a/java/snippets/deleting/DeleteViaFutures.java b/java/snippets/deleting/DeleteViaFutures.java index fab9426..f8a9706 100644 --- a/java/snippets/deleting/DeleteViaFutures.java +++ b/java/snippets/deleting/DeleteViaFutures.java @@ -13,279 +13,276 @@ * Provides a simple example of deleting records from the Senzing repository. */ public class DeleteViaFutures { - public static void main(String[] args) { - // get the senzing repository settings - String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); - if (settings == null) { - System.err.println("Unable to get settings."); - throw new IllegalArgumentException("Unable to get settings"); - } - - // create a descriptive instance name (can be anything) - String instanceName = DeleteViaFutures.class.getSimpleName(); - - // initialize the Senzing environment - SzEnvironment env = SzCoreEnvironment.newBuilder() - .settings(settings) - .instanceName(instanceName) - .verboseLogging(false) - .build(); - - String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; - - // create the thread pool and executor service - ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); - - // keep track of pending futures and don't backlog too many for memory's sake - Map, Record> pendingFutures = new IdentityHashMap<>(); - - try (FileInputStream fis = new FileInputStream(filePath); - InputStreamReader isr = new InputStreamReader(fis, UTF_8); - BufferedReader br = new BufferedReader(isr)) - { - // get the engine from the environment - SzEngine engine = env.getEngine(); - - int lineNumber = 0; - boolean eof = false; - - while (!eof) { - // loop through the example records and queue them up so long - // as we have more records and backlog is not too large - while (pendingFutures.size() < MAXIMUM_BACKLOG) { - // read the next line - String line = br.readLine(); - lineNumber++; - - // check for EOF - if (line == null) { - eof = true; - break; - } - - // trim the line - line = line.trim(); - - // skip any blank lines - if (line.length() == 0) { - continue; - } - - // skip any commented lines - if (line.startsWith("#")) { - continue; - } - - // construct the Record instance - Record record = new Record(lineNumber, line); - - try { - // parse the line as a JSON object - JsonObject recordJson - = Json.createReader(new StringReader(line)).readObject(); - - // extract the data source code and record ID - String dataSourceCode = recordJson.getString(DATA_SOURCE, null); - String recordId = recordJson.getString(RECORD_ID, null); - SzRecordKey recordKey = SzRecordKey.of(dataSourceCode, recordId); - - Future future = executor.submit(() -> { - // call the deleteRecord() function with no flags - engine.deleteRecord(recordKey, SZ_NO_FLAGS); - - return null; - }); - - // add the futures to the pending future list - pendingFutures.put(future, record); - - } catch (JsonException e) { - logFailedRecord(ERROR, e, lineNumber, line); - errorCount++; // increment the error count - } - } - - do { - // handle any pending futures WITHOUT blocking to reduce the backlog - handlePendingFutures(pendingFutures, false); - - // if we still have exceeded the backlog size then pause - // briefly before trying again - if (pendingFutures.size() >= MAXIMUM_BACKLOG) { - try { - Thread.sleep(PAUSE_TIMEOUT); - - } catch (InterruptedException ignore) { - // do nothing - } - } - } while (pendingFutures.size() >= MAXIMUM_BACKLOG); - } - - // shutdown the executor service - executor.shutdown(); - - // after we have submitted all records we need to handle the remaining - // pending futures so this time we block on each future - handlePendingFutures(pendingFutures, true); - - } catch (Exception e) { - System.err.println(); - System.err.println("*** Terminated due to critical error ***"); - System.err.flush(); - if (e instanceof RuntimeException) { - throw ((RuntimeException) e); - } - throw new RuntimeException(e); - - } finally { - // check if executor service is shutdown - if (!executor.isShutdown()) { - executor.shutdown(); - } - - // IMPORTANT: make sure to destroy the environment - env.destroy(); - - System.out.println(); - System.out.println("Successful delete operations : " + successCount); - System.out.println("Failed delete operations : " + errorCount); - - // check on any retry records - if (retryWriter != null) { - retryWriter.flush(); - retryWriter.close(); - } - if (retryCount > 0) { - System.out.println(retryCount + " deletions to be retried in " + retryFile); - } - System.out.flush(); - - } - + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); } - private static void handlePendingFutures(Map, Record> pendingFutures, boolean blocking) - throws Exception - { - // check for completed futures - Iterator, Record>> iter - = pendingFutures.entrySet().iterator(); - - // loop through the pending futures - while (iter.hasNext()) { - // get the next pending future - Map.Entry, Record> entry = iter.next(); - Future future = entry.getKey(); - Record record = entry.getValue(); - - // if not blocking and this one is not done then continue - if (!blocking && !future.isDone()) { - continue; - } + // create a descriptive instance name (can be anything) + String instanceName = DeleteViaFutures.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + // create the thread pool and executor service + ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); + + // keep track of pending futures and don't backlog too many for memory's sake + Map, Record> pendingFutures = new IdentityHashMap<>(); + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + boolean eof = false; + + while (!eof) { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.size() < MAXIMUM_BACKLOG) { + // read the next line + String line = br.readLine(); + lineNumber++; + + // check for EOF + if (line == null) { + eof = true; + break; + } + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) { + continue; + } + + // skip any commented lines + if (line.startsWith("#")) { + continue; + } + + // construct the Record instance + Record record = new Record(lineNumber, line); + + try { + // parse the line as a JSON object + JsonObject recordJson = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + SzRecordKey recordKey = SzRecordKey.of(dataSourceCode, recordId); + + Future future = executor.submit(() -> { + // call the deleteRecord() function with no flags + engine.deleteRecord(recordKey, SZ_NO_FLAGS); + + return null; + }); + + // add the futures to the pending future list + pendingFutures.put(future, record); + + } catch (JsonException e) { + logFailedRecord(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + } + } - // remove the pending future from the map - iter.remove(); + do { + // handle any pending futures WITHOUT blocking to reduce the backlog + handlePendingFutures(pendingFutures, false); + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.size() >= MAXIMUM_BACKLOG) { try { - try { - // get the value to see if there was an exception - future.get(); - - // if we get here then increment the success count - successCount++; - - } catch (InterruptedException e) { - // this could only happen if blocking is true, just - // rethrow as retryable and log the interruption - throw e; - - } catch (ExecutionException e) { - // if execution failed with an exception then retrhow - Throwable cause = e.getCause(); - if ((cause == null) || !(cause instanceof Exception)) { - // rethrow the execution exception - throw e; - } - // cast to an Exception and rethrow - throw ((Exception) cause); - } - - } catch (SzBadInputException e) { - logFailedRecord(ERROR, e, record.lineNumber, record.line); - errorCount++; // increment the error count - - } catch (SzRetryableException | InterruptedException | CancellationException e) { - // handle thread interruption and cancellation as retries - logFailedRecord(WARNING, e, record.lineNumber, record.line); - errorCount++; // increment the error count - retryCount++; // increment the retry count - - // track the retry record so it can be retried later - if (retryFile == null) { - retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); - retryWriter = new PrintWriter( - new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); - } - retryWriter.println(record.line); - - } catch (Exception e) { - // catch any other exception (incl. SzException) here - logFailedRecord(CRITICAL, e, record.lineNumber, record.line); - errorCount++; - throw e; // rethrow since exception is critical + Thread.sleep(PAUSE_TIMEOUT); + + } catch (InterruptedException ignore) { + // do nothing } - } - } + } + } while (pendingFutures.size() >= MAXIMUM_BACKLOG); + } + + // shutdown the executor service + executor.shutdown(); + + // after we have submitted all records we need to handle the remaining + // pending futures so this time we block on each future + handlePendingFutures(pendingFutures, true); + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // check if executor service is shutdown + if (!executor.isShutdown()) { + executor.shutdown(); + } + + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Successful delete operations : " + successCount); + System.out.println("Failed delete operations : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " deletions to be retried in " + retryFile); + } + System.out.flush(); - /** - * Example method for logging failed records. - * - * @param errorType The error type description. - * @param exception The exception itself. - * @param lineNumber The line number of the failed record in the JSON input file. - * @param recordJson The JSON text for the failed record. - */ - private static void logFailedRecord(String errorType, - Exception exception, - int lineNumber, - String recordJson) - { - System.err.println(); - System.err.println( - "** " + errorType + " ** FAILED TO DELETE RECORD AT LINE " + lineNumber + ": "); - System.err.println(recordJson); - System.err.println(exception); - System.err.flush(); } - private static final String DEFAULT_FILE_PATH = "../resources/data/del-500.jsonl"; - - private static final String UTF_8 = "UTF-8"; - - private static final String RETRY_PREFIX = "retry-"; - private static final String RETRY_SUFFIX = ".jsonl"; - - private static final int THREAD_COUNT = 8; - - private static final int BACKLOG_FACTOR = 10; - - private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; - - private static final long PAUSE_TIMEOUT = 100L; - - private static final String DATA_SOURCE = "DATA_SOURCE"; - private static final String RECORD_ID = "RECORD_ID"; - - private static final String ERROR = "ERROR"; - private static final String WARNING = "WARNING"; - private static final String CRITICAL = "CRITICAL"; - - public record Record(int lineNumber, String line) { } + } + + private static void handlePendingFutures(Map, Record> pendingFutures, boolean blocking) + throws Exception { + // check for completed futures + Iterator, Record>> iter = pendingFutures.entrySet().iterator(); + + // loop through the pending futures + while (iter.hasNext()) { + // get the next pending future + Map.Entry, Record> entry = iter.next(); + Future future = entry.getKey(); + Record record = entry.getValue(); + + // if not blocking and this one is not done then continue + if (!blocking && !future.isDone()) { + continue; + } + + // remove the pending future from the map + iter.remove(); + + try { + try { + // get the value to see if there was an exception + future.get(); + + // if we get here then increment the success count + successCount++; + + } catch (InterruptedException e) { + // this could only happen if blocking is true, just + // rethrow as retryable and log the interruption + throw e; + + } catch (ExecutionException e) { + // if execution failed with an exception then rethrow + Throwable cause = e.getCause(); + if ((cause == null) || !(cause instanceof Exception)) { + // rethrow the execution exception + throw e; + } + // cast to an Exception and rethrow + throw ((Exception) cause); + } - private static int errorCount = 0; - private static int successCount = 0; - private static int retryCount = 0; - private static File retryFile = null; - private static PrintWriter retryWriter = null; + } catch (SzBadInputException e) { + logFailedRecord(ERROR, e, record.lineNumber, record.line); + errorCount++; // increment the error count + + } catch (SzRetryableException | InterruptedException | CancellationException e) { + // handle thread interruption and cancellation as retries + logFailedRecord(WARNING, e, record.lineNumber, record.line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(record.line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, record.lineNumber, record.line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input + * file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO DELETE RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + + private static final String DEFAULT_FILE_PATH = "../resources/data/del-500.jsonl"; + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final int THREAD_COUNT = 8; + + private static final int BACKLOG_FACTOR = 10; + + private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; + + private static final long PAUSE_TIMEOUT = 100L; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + public record Record(int lineNumber, String line) { + } + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; } diff --git a/java/snippets/deleting/DeleteWithInfoViaFutures.java b/java/snippets/deleting/DeleteWithInfoViaFutures.java index 89b7de4..45c385b 100644 --- a/java/snippets/deleting/DeleteWithInfoViaFutures.java +++ b/java/snippets/deleting/DeleteWithInfoViaFutures.java @@ -13,321 +13,318 @@ * Provides a simple example of deleting records from the Senzing repository. */ public class DeleteWithInfoViaFutures { - public static void main(String[] args) { - // get the senzing repository settings - String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); - if (settings == null) { - System.err.println("Unable to get settings."); - throw new IllegalArgumentException("Unable to get settings"); - } - - // create a descriptive instance name (can be anything) - String instanceName = DeleteWithInfoViaFutures.class.getSimpleName(); - - // initialize the Senzing environment - SzEnvironment env = SzCoreEnvironment.newBuilder() - .settings(settings) - .instanceName(instanceName) - .verboseLogging(false) - .build(); - - String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; - - // create the thread pool and executor service - ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); - - // keep track of pending futures and don't backlog too many for memory's sake - Map, Record> pendingFutures = new IdentityHashMap<>(); - - try (FileInputStream fis = new FileInputStream(filePath); - InputStreamReader isr = new InputStreamReader(fis, UTF_8); - BufferedReader br = new BufferedReader(isr)) - { - // get the engine from the environment - SzEngine engine = env.getEngine(); - - int lineNumber = 0; - boolean eof = false; - - while (!eof) { - // loop through the example records and queue them up so long - // as we have more records and backlog is not too large - while (pendingFutures.size() < MAXIMUM_BACKLOG) { - // read the next line - String line = br.readLine(); - lineNumber++; - - // check for EOF - if (line == null) { - eof = true; - break; - } - - // trim the line - line = line.trim(); - - // skip any blank lines - if (line.length() == 0) { - continue; - } - - // skip any commented lines - if (line.startsWith("#")) { - continue; - } - - // construct the Record instance - Record record = new Record(lineNumber, line); - - try { - // parse the line as a JSON object - JsonObject recordJson - = Json.createReader(new StringReader(line)).readObject(); - - // extract the data source code and record ID - String dataSourceCode = recordJson.getString(DATA_SOURCE, null); - String recordId = recordJson.getString(RECORD_ID, null); - SzRecordKey recordKey = SzRecordKey.of(dataSourceCode, recordId); - - Future future = executor.submit(() -> { - // call the deleteRecord() function with info flags - return engine.deleteRecord(recordKey, SZ_WITH_INFO_FLAGS); - }); - - // add the futures to the pending future list - pendingFutures.put(future, record); - - } catch (JsonException e) { - logFailedRecord(ERROR, e, lineNumber, line); - errorCount++; // increment the error count - } - } - - do { - // handle any pending futures WITHOUT blocking to reduce the backlog - handlePendingFutures(engine, pendingFutures, false); - - // if we still have exceeded the backlog size then pause - // briefly before trying again - if (pendingFutures.size() >= MAXIMUM_BACKLOG) { - try { - Thread.sleep(PAUSE_TIMEOUT); - - } catch (InterruptedException ignore) { - // do nothing - } - } - } while (pendingFutures.size() >= MAXIMUM_BACKLOG); - } - - // shutdown the executor service - executor.shutdown(); - - // after we have submitted all records we need to handle the remaining - // pending futures so this time we block on each future - handlePendingFutures(engine, pendingFutures, true); - - } catch (Exception e) { - System.err.println(); - System.err.println("*** Terminated due to critical error ***"); - System.err.flush(); - if (e instanceof RuntimeException) { - throw ((RuntimeException) e); - } - throw new RuntimeException(e); - - } finally { - // check if executor service is shutdown - if (!executor.isShutdown()) { - executor.shutdown(); - } - - // IMPORTANT: make sure to destroy the environment - env.destroy(); - - System.out.println(); - System.out.println("Successful delete operations : " + successCount); - System.out.println("Total entities deleted : " + entityIdSet.size()); - System.out.println("Failed delete operations : " + errorCount); - - // check on any retry records - if (retryWriter != null) { - retryWriter.flush(); - retryWriter.close(); - } - if (retryCount > 0) { - System.out.println(retryCount + " deletions to be retried in " + retryFile); - } - System.out.flush(); - - } - + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); } - private static void handlePendingFutures(SzEngine engine, - Map, Record> pendingFutures, - boolean blocking) - throws Exception - { - // check for completed futures - Iterator, Record>> iter - = pendingFutures.entrySet().iterator(); - - // loop through the pending futures - while (iter.hasNext()) { - // get the next pending future - Map.Entry, Record> entry = iter.next(); - Future future = entry.getKey(); - Record record = entry.getValue(); - - // if not blocking and this one is not done then continue - if (!blocking && !future.isDone()) { - continue; - } + // create a descriptive instance name (can be anything) + String instanceName = DeleteWithInfoViaFutures.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + // create the thread pool and executor service + ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); + + // keep track of pending futures and don't backlog too many for memory's sake + Map, Record> pendingFutures = new IdentityHashMap<>(); + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + boolean eof = false; + + while (!eof) { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.size() < MAXIMUM_BACKLOG) { + // read the next line + String line = br.readLine(); + lineNumber++; + + // check for EOF + if (line == null) { + eof = true; + break; + } + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) { + continue; + } + + // skip any commented lines + if (line.startsWith("#")) { + continue; + } + + // construct the Record instance + Record record = new Record(lineNumber, line); + + try { + // parse the line as a JSON object + JsonObject recordJson = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + SzRecordKey recordKey = SzRecordKey.of(dataSourceCode, recordId); + + Future future = executor.submit(() -> { + // call the deleteRecord() function with info flags + return engine.deleteRecord(recordKey, SZ_WITH_INFO_FLAGS); + }); + + // add the futures to the pending future list + pendingFutures.put(future, record); + + } catch (JsonException e) { + logFailedRecord(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + } + } - // remove the pending future from the map - iter.remove(); + do { + // handle any pending futures WITHOUT blocking to reduce the backlog + handlePendingFutures(engine, pendingFutures, false); + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.size() >= MAXIMUM_BACKLOG) { try { - try { - // get the value to see if there was an exception - String info = future.get(); - - // if we get here then increment the success count - successCount++; - - // process the info - processInfo(engine, info); - - } catch (InterruptedException e) { - // this could only happen if blocking is true, just - // rethrow as retryable and log the interruption - throw e; - - } catch (ExecutionException e) { - // if execution failed with an exception then retrhow - Throwable cause = e.getCause(); - if ((cause == null) || !(cause instanceof Exception)) { - // rethrow the execution exception - throw e; - } - // cast to an Exception and rethrow - throw ((Exception) cause); - } - - } catch (SzBadInputException e) { - logFailedRecord(ERROR, e, record.lineNumber, record.line); - errorCount++; // increment the error count - - } catch (SzRetryableException | InterruptedException | CancellationException e) { - // handle thread interruption and cancellation as retries - logFailedRecord(WARNING, e, record.lineNumber, record.line); - errorCount++; // increment the error count - retryCount++; // increment the retry count - - // track the retry record so it can be retried later - if (retryFile == null) { - retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); - retryWriter = new PrintWriter( - new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); - } - retryWriter.println(record.line); - - } catch (Exception e) { - // catch any other exception (incl. SzException) here - logFailedRecord(CRITICAL, e, record.lineNumber, record.line); - errorCount++; - throw e; // rethrow since exception is critical + Thread.sleep(PAUSE_TIMEOUT); + + } catch (InterruptedException ignore) { + // do nothing } - } + } + } while (pendingFutures.size() >= MAXIMUM_BACKLOG); + } + + // shutdown the executor service + executor.shutdown(); + + // after we have submitted all records we need to handle the remaining + // pending futures so this time we block on each future + handlePendingFutures(engine, pendingFutures, true); + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // check if executor service is shutdown + if (!executor.isShutdown()) { + executor.shutdown(); + } + + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Successful delete operations : " + successCount); + System.out.println("Total entities deleted : " + entityIdSet.size()); + System.out.println("Failed delete operations : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " deletions to be retried in " + retryFile); + } + System.out.flush(); + } - /** - * Example method for parsing and handling the INFO message (formatted - * as JSON). This example implementation simply tracks all entity ID's - * that appear as "AFFECTED_ENTITIES" to count the number - * of entities deleted for the records -- essentially a contrived - * data mart. - * - * @param engine the {@link SzEngine} to use. - * @param info The info message. - */ - private static void processInfo(SzEngine engine, String info) { - JsonObject jsonObject = Json.createReader(new StringReader(info)).readObject(); - if (!jsonObject.containsKey(AFFECTED_ENTITIES)) { - return; + } + + private static void handlePendingFutures(SzEngine engine, + Map, Record> pendingFutures, + boolean blocking) + throws Exception { + // check for completed futures + Iterator, Record>> iter = pendingFutures.entrySet().iterator(); + + // loop through the pending futures + while (iter.hasNext()) { + // get the next pending future + Map.Entry, Record> entry = iter.next(); + Future future = entry.getKey(); + Record record = entry.getValue(); + + // if not blocking and this one is not done then continue + if (!blocking && !future.isDone()) { + continue; + } + + // remove the pending future from the map + iter.remove(); + + try { + try { + // get the value to see if there was an exception + String info = future.get(); + + // if we get here then increment the success count + successCount++; + + // process the info + processInfo(engine, info); + + } catch (InterruptedException e) { + // this could only happen if blocking is true, just + // rethrow as retryable and log the interruption + throw e; + + } catch (ExecutionException e) { + // if execution failed with an exception then rethrow + Throwable cause = e.getCause(); + if ((cause == null) || !(cause instanceof Exception)) { + // rethrow the execution exception + throw e; + } + // cast to an Exception and rethrow + throw ((Exception) cause); } - JsonArray affectedArr = jsonObject.getJsonArray(AFFECTED_ENTITIES); - for (JsonObject affected : affectedArr.getValuesAs(JsonObject.class)) { - JsonNumber number = affected.getJsonNumber(ENTITY_ID); - long entityId = number.longValue(); - try { - engine.getEntity(entityId, null); - entityIdSet.remove(entityId); - } catch (SzNotFoundException e) { - entityIdSet.add(entityId); - } catch (SzException e) { - // simply log the exception, do not rethrow - System.err.println(); - System.err.println("**** FAILED TO RETRIEVE ENTITY: " + entityId); - System.err.println(e.toString()); - System.err.flush(); - } + } catch (SzBadInputException e) { + logFailedRecord(ERROR, e, record.lineNumber, record.line); + errorCount++; // increment the error count + + } catch (SzRetryableException | InterruptedException | CancellationException e) { + // handle thread interruption and cancellation as retries + logFailedRecord(WARNING, e, record.lineNumber, record.line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); } + retryWriter.println(record.line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, record.lineNumber, record.line); + errorCount++; + throw e; // rethrow since exception is critical + } } - - /** - * Example method for logging failed records. - * - * @param errorType The error type description. - * @param exception The exception itself. - * @param lineNumber The line number of the failed record in the JSON input file. - * @param recordJson The JSON text for the failed record. - */ - private static void logFailedRecord(String errorType, - Exception exception, - int lineNumber, - String recordJson) - { + } + + /** + * Example method for parsing and handling the INFO message (formatted + * as JSON). This example implementation simply tracks all entity ID's + * that appear as "AFFECTED_ENTITIES" to count the number + * of entities deleted for the records -- essentially a contrived + * data mart. + * + * @param engine the {@link SzEngine} to use. + * @param info The info message. + */ + private static void processInfo(SzEngine engine, String info) { + JsonObject jsonObject = Json.createReader(new StringReader(info)).readObject(); + if (!jsonObject.containsKey(AFFECTED_ENTITIES)) { + return; + } + JsonArray affectedArr = jsonObject.getJsonArray(AFFECTED_ENTITIES); + for (JsonObject affected : affectedArr.getValuesAs(JsonObject.class)) { + JsonNumber number = affected.getJsonNumber(ENTITY_ID); + long entityId = number.longValue(); + + try { + engine.getEntity(entityId, null); + entityIdSet.remove(entityId); + } catch (SzNotFoundException e) { + entityIdSet.add(entityId); + } catch (SzException e) { + // simply log the exception, do not rethrow System.err.println(); - System.err.println( - "** " + errorType + " ** FAILED TO DELETE RECORD AT LINE " + lineNumber + ": "); - System.err.println(recordJson); - System.err.println(exception); + System.err.println("**** FAILED TO RETRIEVE ENTITY: " + entityId); + System.err.println(e.toString()); System.err.flush(); + } } - - private static final String DEFAULT_FILE_PATH = "../resources/data/del-500.jsonl"; - - private static final String UTF_8 = "UTF-8"; - - private static final String RETRY_PREFIX = "retry-"; - private static final String RETRY_SUFFIX = ".jsonl"; - - private static final int THREAD_COUNT = 8; - - private static final int BACKLOG_FACTOR = 10; - - private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; - - private static final long PAUSE_TIMEOUT = 100L; - - private static final String DATA_SOURCE = "DATA_SOURCE"; - private static final String RECORD_ID = "RECORD_ID"; - private static final String AFFECTED_ENTITIES = "AFFECTED_ENTITIES"; - private static final String ENTITY_ID = "ENTITY_ID"; - - private static final String ERROR = "ERROR"; - private static final String WARNING = "WARNING"; - private static final String CRITICAL = "CRITICAL"; - - public record Record(int lineNumber, String line) { } - - private static int errorCount = 0; - private static int successCount = 0; - private static int retryCount = 0; - private static File retryFile = null; - private static PrintWriter retryWriter = null; - private static Set entityIdSet = new HashSet<>(); + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input + * file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO DELETE RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + + private static final String DEFAULT_FILE_PATH = "../resources/data/del-500.jsonl"; + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final int THREAD_COUNT = 8; + + private static final int BACKLOG_FACTOR = 10; + + private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; + + private static final long PAUSE_TIMEOUT = 100L; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + private static final String AFFECTED_ENTITIES = "AFFECTED_ENTITIES"; + private static final String ENTITY_ID = "ENTITY_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + public record Record(int lineNumber, String line) { + } + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + private static Set entityIdSet = new HashSet<>(); } diff --git a/java/snippets/loading/LoadRecords.java b/java/snippets/loading/LoadRecords.java index 723842e..7cfef80 100644 --- a/java/snippets/loading/LoadRecords.java +++ b/java/snippets/loading/LoadRecords.java @@ -11,72 +11,72 @@ * Provides a simple example of adding records to the Senzing repository. */ public class LoadRecords { - public static void main(String[] args) { - // get the senzing repository settings - String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); - if (settings == null) { - System.err.println("Unable to get settings."); - throw new IllegalArgumentException("Unable to get settings"); - } - - // create a descriptive instance name (can be anything) - String instanceName = LoadRecords.class.getSimpleName(); - - // initialize the Senzing environment - SzEnvironment env = SzCoreEnvironment.newBuilder() - .settings(settings) - .instanceName(instanceName) - .verboseLogging(false) - .build(); - - try { - // get the engine from the environment - SzEngine engine = env.getEngine(); - - // loop through the example records and add them to the repository - for (Map.Entry entry : getRecords().entrySet()) { - SzRecordKey recordKey = entry.getKey(); - String recordDefinition = entry.getValue(); - - // call the addRecord() function with no flags - engine.addRecord(recordKey, recordDefinition, SZ_NO_FLAGS); - - System.out.println("Record " + recordKey.recordId() + " added"); - System.out.flush(); - } + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } - } catch (SzException e) { - // handle any exception that may have occurred - System.err.println("Senzing Error Message : " + e.getMessage()); - System.err.println("Senzing Error Code : " + e.getErrorCode()); - e.printStackTrace(); - throw new RuntimeException(e); - - } catch (Exception e) { - e.printStackTrace(); - if (e instanceof RuntimeException) { - throw ((RuntimeException) e); - } - throw new RuntimeException(e); + // create a descriptive instance name (can be anything) + String instanceName = LoadRecords.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + // loop through the example records and add them to the repository + for (Map.Entry entry : getRecords().entrySet()) { + SzRecordKey recordKey = entry.getKey(); + String recordDefinition = entry.getValue(); + + // call the addRecord() function with no flags + engine.addRecord(recordKey, recordDefinition, SZ_NO_FLAGS); - } finally { - // IMPORTANT: make sure to destroy the environment - env.destroy(); - } + System.out.println("Record " + recordKey.recordId() + " added"); + System.out.flush(); + } + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); } - /** - * This is a support method for providing example records to add. - * - * @return A {@link Map} of {@link SzRecordKey} keys to {@link String} - * JSON text values desribing the records to be added. - */ - public static Map getRecords() { - Map records = new LinkedHashMap<>(); - records.put( - SzRecordKey.of("TEST", "1001"), - """ + } + + /** + * This is a support method for providing example records to add. + * + * @return A {@link Map} of {@link SzRecordKey} keys to {@link String} + * JSON text values describing the records to be added. + */ + public static Map getRecords() { + Map records = new LinkedHashMap<>(); + records.put( + SzRecordKey.of("TEST", "1001"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "1001", @@ -91,10 +91,10 @@ public static Map getRecords() { "EMAIL_ADDRESS": "bsmith@work.com" } """); - - records.put( - SzRecordKey.of("TEST", "1002"), - """ + + records.put( + SzRecordKey.of("TEST", "1002"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "1002", @@ -112,10 +112,10 @@ public static Map getRecords() { "PHONE_NUMBER": "702-919-1300" } """); - - records.put( - SzRecordKey.of("TEST", "1003"), - """ + + records.put( + SzRecordKey.of("TEST", "1003"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "1003", @@ -128,9 +128,9 @@ public static Map getRecords() { } """); - records.put( - SzRecordKey.of("TEST", "1004"), - """ + records.put( + SzRecordKey.of("TEST", "1004"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "1004", @@ -146,9 +146,9 @@ public static Map getRecords() { } """); - records.put( - SzRecordKey.of("TEST", "1005"), - """ + records.put( + SzRecordKey.of("TEST", "1005"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "1005", @@ -165,7 +165,7 @@ public static Map getRecords() { "ADDR_POSTAL_CODE": "89132" } """); - - return records; - } + + return records; + } } diff --git a/java/snippets/loading/LoadViaFutures.java b/java/snippets/loading/LoadViaFutures.java index 5de5175..96c39ff 100644 --- a/java/snippets/loading/LoadViaFutures.java +++ b/java/snippets/loading/LoadViaFutures.java @@ -13,280 +13,277 @@ * Provides a simple example of adding records to the Senzing repository. */ public class LoadViaFutures { - public static void main(String[] args) { - // get the senzing repository settings - String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); - if (settings == null) { - System.err.println("Unable to get settings."); - throw new IllegalArgumentException("Unable to get settings"); - } - - // create a descriptive instance name (can be anything) - String instanceName = LoadViaFutures.class.getSimpleName(); - - // initialize the Senzing environment - SzEnvironment env = SzCoreEnvironment.newBuilder() - .settings(settings) - .instanceName(instanceName) - .verboseLogging(false) - .build(); - - String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; - - // create the thread pool and executor service - ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); - - // keep track of pending futures and don't backlog too many for memory's sake - Map, Record> pendingFutures = new IdentityHashMap<>(); - - try (FileInputStream fis = new FileInputStream(filePath); - InputStreamReader isr = new InputStreamReader(fis, UTF_8); - BufferedReader br = new BufferedReader(isr)) - { - // get the engine from the environment - SzEngine engine = env.getEngine(); - - int lineNumber = 0; - boolean eof = false; - - while (!eof) { - // loop through the example records and queue them up so long - // as we have more records and backlog is not too large - while (pendingFutures.size() < MAXIMUM_BACKLOG) { - // read the next line - String line = br.readLine(); - lineNumber++; - - // check for EOF - if (line == null) { - eof = true; - break; - } - - // trim the line - line = line.trim(); - - // skip any blank lines - if (line.length() == 0) { - continue; - } - - // skip any commented lines - if (line.startsWith("#")) { - continue; - } - - // construct the Record instance - Record record = new Record(lineNumber, line); - - try { - // parse the line as a JSON object - JsonObject recordJson - = Json.createReader(new StringReader(line)).readObject(); - - // extract the data source code and record ID - String dataSourceCode = recordJson.getString(DATA_SOURCE, null); - String recordId = recordJson.getString(RECORD_ID, null); - SzRecordKey recordKey = SzRecordKey.of(dataSourceCode, recordId); - - Future future = executor.submit(() -> { - // call the addRecord() function with no flags - engine.addRecord(recordKey, record.line, SZ_NO_FLAGS); - - // return null since we have no "info" to return - return null; - }); - - // add the future to the pending future list - pendingFutures.put(future, record); - - } catch (JsonException e) { - logFailedRecord(ERROR, e, lineNumber, line); - errorCount++; // increment the error count - } - } - - do { - // handle any pending futures WITHOUT blocking to reduce the backlog - handlePendingFutures(pendingFutures, false); - - // if we still have exceeded the backlog size then pause - // briefly before trying again - if (pendingFutures.size() >= MAXIMUM_BACKLOG) { - try { - Thread.sleep(PAUSE_TIMEOUT); - - } catch (InterruptedException ignore) { - // do nothing - } - } - } while (pendingFutures.size() >= MAXIMUM_BACKLOG); - } - - // shutdown the executor service - executor.shutdown(); - - // after we have submitted all records we need to handle the remaining - // pending futures so this time we block on each future - handlePendingFutures(pendingFutures, true); - - } catch (Exception e) { - System.err.println(); - System.err.println("*** Terminated due to critical error ***"); - System.err.flush(); - if (e instanceof RuntimeException) { - throw ((RuntimeException) e); - } - throw new RuntimeException(e); - - } finally { - // check if executor service is shutdown - if (!executor.isShutdown()) { - executor.shutdown(); - } - - // IMPORTANT: make sure to destroy the environment - env.destroy(); - - System.out.println(); - System.out.println("Records successfully added : " + successCount); - System.out.println("Records failed with errors : " + errorCount); - - // check on any retry records - if (retryWriter != null) { - retryWriter.flush(); - retryWriter.close(); - } - if (retryCount > 0) { - System.out.println(retryCount + " records to be retried in " + retryFile); - } - System.out.flush(); - - } - + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); } - private static void handlePendingFutures(Map, Record> pendingFutures, boolean blocking) - throws Exception - { - // check for completed futures - Iterator, Record>> iter - = pendingFutures.entrySet().iterator(); - - // loop through the pending futures - while (iter.hasNext()) { - // get the next pending future - Map.Entry, Record> entry = iter.next(); - Future future = entry.getKey(); - Record record = entry.getValue(); - - // if not blocking and this one is not done then continue - if (!blocking && !future.isDone()) { - continue; - } + // create a descriptive instance name (can be anything) + String instanceName = LoadViaFutures.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + // create the thread pool and executor service + ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); + + // keep track of pending futures and don't backlog too many for memory's sake + Map, Record> pendingFutures = new IdentityHashMap<>(); + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + boolean eof = false; + + while (!eof) { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.size() < MAXIMUM_BACKLOG) { + // read the next line + String line = br.readLine(); + lineNumber++; + + // check for EOF + if (line == null) { + eof = true; + break; + } + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) { + continue; + } + + // skip any commented lines + if (line.startsWith("#")) { + continue; + } + + // construct the Record instance + Record record = new Record(lineNumber, line); + + try { + // parse the line as a JSON object + JsonObject recordJson = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + SzRecordKey recordKey = SzRecordKey.of(dataSourceCode, recordId); + + Future future = executor.submit(() -> { + // call the addRecord() function with no flags + engine.addRecord(recordKey, record.line, SZ_NO_FLAGS); + + // return null since we have no "info" to return + return null; + }); + + // add the future to the pending future list + pendingFutures.put(future, record); + + } catch (JsonException e) { + logFailedRecord(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + } + } - // remove the pending future from the map - iter.remove(); + do { + // handle any pending futures WITHOUT blocking to reduce the backlog + handlePendingFutures(pendingFutures, false); + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.size() >= MAXIMUM_BACKLOG) { try { - try { - // get the value to see if there was an exception - future.get(); - - // if we get here then increment the success count - successCount++; - - } catch (InterruptedException e) { - // this could only happen if blocking is true, just - // rethrow as retryable and log the interruption - throw e; - - } catch (ExecutionException e) { - // if execution failed with an exception then retrhow - Throwable cause = e.getCause(); - if ((cause == null) || !(cause instanceof Exception)) { - // rethrow the execution exception - throw e; - } - // cast to an Exception and rethrow - throw ((Exception) cause); - } - - } catch (SzBadInputException e) { - logFailedRecord(ERROR, e, record.lineNumber, record.line); - errorCount++; // increment the error count - - } catch (SzRetryableException | InterruptedException | CancellationException e) { - // handle thread interruption and cancellation as retries - logFailedRecord(WARNING, e, record.lineNumber, record.line); - errorCount++; // increment the error count - retryCount++; // increment the retry count - - // track the retry record so it can be retried later - if (retryFile == null) { - retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); - retryWriter = new PrintWriter( - new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); - } - retryWriter.println(record.line); - - } catch (Exception e) { - // catch any other exception (incl. SzException) here - logFailedRecord(CRITICAL, e, record.lineNumber, record.line); - errorCount++; - throw e; // rethrow since exception is critical + Thread.sleep(PAUSE_TIMEOUT); + + } catch (InterruptedException ignore) { + // do nothing } - } - } + } + } while (pendingFutures.size() >= MAXIMUM_BACKLOG); + } + + // shutdown the executor service + executor.shutdown(); + + // after we have submitted all records we need to handle the remaining + // pending futures so this time we block on each future + handlePendingFutures(pendingFutures, true); + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // check if executor service is shutdown + if (!executor.isShutdown()) { + executor.shutdown(); + } + + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Records successfully added : " + successCount); + System.out.println("Records failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " records to be retried in " + retryFile); + } + System.out.flush(); - /** - * Example method for logging failed records. - * - * @param errorType The error type description. - * @param exception The exception itself. - * @param lineNumber The line number of the failed record in the JSON input file. - * @param recordJson The JSON text for the failed record. - */ - private static void logFailedRecord(String errorType, - Exception exception, - int lineNumber, - String recordJson) - { - System.err.println(); - System.err.println( - "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + lineNumber + ": "); - System.err.println(recordJson); - System.err.println(exception); - System.err.flush(); } - private static final String DEFAULT_FILE_PATH = "../resources/data/load-500.jsonl"; - - private static final String UTF_8 = "UTF-8"; - - private static final String RETRY_PREFIX = "retry-"; - private static final String RETRY_SUFFIX = ".jsonl"; - - private static final int THREAD_COUNT = 8; - - private static final int BACKLOG_FACTOR = 10; - - private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; - - private static final long PAUSE_TIMEOUT = 100L; - - private static final String DATA_SOURCE = "DATA_SOURCE"; - private static final String RECORD_ID = "RECORD_ID"; - - private static final String ERROR = "ERROR"; - private static final String WARNING = "WARNING"; - private static final String CRITICAL = "CRITICAL"; - - public record Record(int lineNumber, String line) { } + } + + private static void handlePendingFutures(Map, Record> pendingFutures, boolean blocking) + throws Exception { + // check for completed futures + Iterator, Record>> iter = pendingFutures.entrySet().iterator(); + + // loop through the pending futures + while (iter.hasNext()) { + // get the next pending future + Map.Entry, Record> entry = iter.next(); + Future future = entry.getKey(); + Record record = entry.getValue(); + + // if not blocking and this one is not done then continue + if (!blocking && !future.isDone()) { + continue; + } + + // remove the pending future from the map + iter.remove(); + + try { + try { + // get the value to see if there was an exception + future.get(); + + // if we get here then increment the success count + successCount++; + + } catch (InterruptedException e) { + // this could only happen if blocking is true, just + // rethrow as retryable and log the interruption + throw e; + + } catch (ExecutionException e) { + // if execution failed with an exception then rethrow + Throwable cause = e.getCause(); + if ((cause == null) || !(cause instanceof Exception)) { + // rethrow the execution exception + throw e; + } + // cast to an Exception and rethrow + throw ((Exception) cause); + } - private static int errorCount = 0; - private static int successCount = 0; - private static int retryCount = 0; - private static File retryFile = null; - private static PrintWriter retryWriter = null; + } catch (SzBadInputException e) { + logFailedRecord(ERROR, e, record.lineNumber, record.line); + errorCount++; // increment the error count + + } catch (SzRetryableException | InterruptedException | CancellationException e) { + // handle thread interruption and cancellation as retries + logFailedRecord(WARNING, e, record.lineNumber, record.line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(record.line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, record.lineNumber, record.line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input + * file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + + private static final String DEFAULT_FILE_PATH = "../resources/data/load-500.jsonl"; + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final int THREAD_COUNT = 8; + + private static final int BACKLOG_FACTOR = 10; + + private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; + + private static final long PAUSE_TIMEOUT = 100L; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + public record Record(int lineNumber, String line) { + } + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; } diff --git a/java/snippets/loading/LoadWithInfoViaFutures.java b/java/snippets/loading/LoadWithInfoViaFutures.java index e6dcaaf..3151b69 100644 --- a/java/snippets/loading/LoadWithInfoViaFutures.java +++ b/java/snippets/loading/LoadWithInfoViaFutures.java @@ -13,321 +13,318 @@ * Provides a simple example of adding records to the Senzing repository. */ public class LoadWithInfoViaFutures { - public static void main(String[] args) { - // get the senzing repository settings - String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); - if (settings == null) { - System.err.println("Unable to get settings."); - throw new IllegalArgumentException("Unable to get settings"); - } - - // create a descriptive instance name (can be anything) - String instanceName = LoadWithInfoViaFutures.class.getSimpleName(); - - // initialize the Senzing environment - SzEnvironment env = SzCoreEnvironment.newBuilder() - .settings(settings) - .instanceName(instanceName) - .verboseLogging(false) - .build(); - - String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; - - // create the thread pool and executor service - ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); - - // keep track of pending futures and don't backlog too many for memory's sake - Map, Record> pendingFutures = new IdentityHashMap<>(); - - try (FileInputStream fis = new FileInputStream(filePath); - InputStreamReader isr = new InputStreamReader(fis, UTF_8); - BufferedReader br = new BufferedReader(isr)) - { - // get the engine from the environment - SzEngine engine = env.getEngine(); - - int lineNumber = 0; - boolean eof = false; - - while (!eof) { - // loop through the example records and queue them up so long - // as we have more records and backlog is not too large - while (pendingFutures.size() < MAXIMUM_BACKLOG) { - // read the next line - String line = br.readLine(); - lineNumber++; - - // check for EOF - if (line == null) { - eof = true; - break; - } - - // trim the line - line = line.trim(); - - // skip any blank lines - if (line.length() == 0) { - continue; - } - - // skip any commented lines - if (line.startsWith("#")) { - continue; - } - - // construct the Record instance - Record record = new Record(lineNumber, line); - - try { - // parse the line as a JSON object - JsonObject recordJson - = Json.createReader(new StringReader(line)).readObject(); - - // extract the data source code and record ID - String dataSourceCode = recordJson.getString(DATA_SOURCE, null); - String recordId = recordJson.getString(RECORD_ID, null); - SzRecordKey recordKey = SzRecordKey.of(dataSourceCode, recordId); - - Future future = executor.submit(() -> { - // call the addRecord() function with info flags - return engine.addRecord(recordKey, record.line, SZ_WITH_INFO_FLAGS); - }); - - // add the futures to the pending future list - pendingFutures.put(future, record); - - } catch (JsonException e) { - logFailedRecord(ERROR, e, lineNumber, line); - errorCount++; // increment the error count - } - } - - do { - // handle any pending futures WITHOUT blocking to reduce the backlog - handlePendingFutures(engine, pendingFutures, false); - - // if we still have exceeded the backlog size then pause - // briefly before trying again - if (pendingFutures.size() >= MAXIMUM_BACKLOG) { - try { - Thread.sleep(PAUSE_TIMEOUT); - - } catch (InterruptedException ignore) { - // do nothing - } - } - } while (pendingFutures.size() >= MAXIMUM_BACKLOG); - } - - // shutdown the executor service - executor.shutdown(); - - // after we have submitted all records we need to handle the remaining - // pending futures so this time we block on each future - handlePendingFutures(engine, pendingFutures, true); - - } catch (Exception e) { - System.err.println(); - System.err.println("*** Terminated due to critical error ***"); - System.err.flush(); - if (e instanceof RuntimeException) { - throw ((RuntimeException) e); - } - throw new RuntimeException(e); - - } finally { - // check if executor service is shutdown - if (!executor.isShutdown()) { - executor.shutdown(); - } - - // IMPORTANT: make sure to destroy the environment - env.destroy(); - - System.out.println(); - System.out.println("Records successfully added : " + successCount); - System.out.println("Total entities created : " + entityIdSet.size()); - System.out.println("Records failed with errors : " + errorCount); - - // check on any retry records - if (retryWriter != null) { - retryWriter.flush(); - retryWriter.close(); - } - if (retryCount > 0) { - System.out.println(retryCount + " records to be retried in " + retryFile); - } - System.out.flush(); - - } - + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); } - private static void handlePendingFutures(SzEngine engine, - Map, Record> pendingFutures, - boolean blocking) - throws Exception - { - // check for completed futures - Iterator, Record>> iter - = pendingFutures.entrySet().iterator(); - - // loop through the pending futures - while (iter.hasNext()) { - // get the next pending future - Map.Entry, Record> entry = iter.next(); - Future future = entry.getKey(); - Record record = entry.getValue(); - - // if not blocking and this one is not done then continue - if (!blocking && !future.isDone()) { - continue; - } + // create a descriptive instance name (can be anything) + String instanceName = LoadWithInfoViaFutures.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + // create the thread pool and executor service + ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); + + // keep track of pending futures and don't backlog too many for memory's sake + Map, Record> pendingFutures = new IdentityHashMap<>(); + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + boolean eof = false; + + while (!eof) { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.size() < MAXIMUM_BACKLOG) { + // read the next line + String line = br.readLine(); + lineNumber++; + + // check for EOF + if (line == null) { + eof = true; + break; + } + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) { + continue; + } + + // skip any commented lines + if (line.startsWith("#")) { + continue; + } + + // construct the Record instance + Record record = new Record(lineNumber, line); + + try { + // parse the line as a JSON object + JsonObject recordJson = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + SzRecordKey recordKey = SzRecordKey.of(dataSourceCode, recordId); + + Future future = executor.submit(() -> { + // call the addRecord() function with info flags + return engine.addRecord(recordKey, record.line, SZ_WITH_INFO_FLAGS); + }); + + // add the futures to the pending future list + pendingFutures.put(future, record); + + } catch (JsonException e) { + logFailedRecord(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + } + } - // remove the pending future from the map - iter.remove(); + do { + // handle any pending futures WITHOUT blocking to reduce the backlog + handlePendingFutures(engine, pendingFutures, false); + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.size() >= MAXIMUM_BACKLOG) { try { - try { - // get the value to see if there was an exception - String info = future.get(); - - // if we get here then increment the success count - successCount++; - - // process the info - processInfo(engine, info); - - } catch (InterruptedException e) { - // this could only happen if blocking is true, just - // rethrow as retryable and log the interruption - throw e; - - } catch (ExecutionException e) { - // if execution failed with an exception then retrhow - Throwable cause = e.getCause(); - if ((cause == null) || !(cause instanceof Exception)) { - // rethrow the execution exception - throw e; - } - // cast to an Exception and rethrow - throw ((Exception) cause); - } - - } catch (SzBadInputException e) { - logFailedRecord(ERROR, e, record.lineNumber, record.line); - errorCount++; // increment the error count - - } catch (SzRetryableException | InterruptedException | CancellationException e) { - // handle thread interruption and cancellation as retries - logFailedRecord(WARNING, e, record.lineNumber, record.line); - errorCount++; // increment the error count - retryCount++; // increment the retry count - - // track the retry record so it can be retried later - if (retryFile == null) { - retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); - retryWriter = new PrintWriter( - new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); - } - retryWriter.println(record.line); - - } catch (Exception e) { - // catch any other exception (incl. SzException) here - logFailedRecord(CRITICAL, e, record.lineNumber, record.line); - errorCount++; - throw e; // rethrow since exception is critical + Thread.sleep(PAUSE_TIMEOUT); + + } catch (InterruptedException ignore) { + // do nothing } - } + } + } while (pendingFutures.size() >= MAXIMUM_BACKLOG); + } + + // shutdown the executor service + executor.shutdown(); + + // after we have submitted all records we need to handle the remaining + // pending futures so this time we block on each future + handlePendingFutures(engine, pendingFutures, true); + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // check if executor service is shutdown + if (!executor.isShutdown()) { + executor.shutdown(); + } + + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Records successfully added : " + successCount); + System.out.println("Total entities created : " + entityIdSet.size()); + System.out.println("Records failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " records to be retried in " + retryFile); + } + System.out.flush(); + } - /** - * Example method for parsing and handling the INFO message (formatted - * as JSON). This example implementation simply tracks all entity ID's - * that appear as "AFFECTED_ENTITIES" to count the number - * of entities created for the records -- essentially a contrived - * data mart. - * - * @param engine The {@link SzEngine} to use. - * @param info The info message. - */ - private static void processInfo(SzEngine engine, String info) { - JsonObject jsonObject = Json.createReader(new StringReader(info)).readObject(); - if (!jsonObject.containsKey(AFFECTED_ENTITIES)) { - return; + } + + private static void handlePendingFutures(SzEngine engine, + Map, Record> pendingFutures, + boolean blocking) + throws Exception { + // check for completed futures + Iterator, Record>> iter = pendingFutures.entrySet().iterator(); + + // loop through the pending futures + while (iter.hasNext()) { + // get the next pending future + Map.Entry, Record> entry = iter.next(); + Future future = entry.getKey(); + Record record = entry.getValue(); + + // if not blocking and this one is not done then continue + if (!blocking && !future.isDone()) { + continue; + } + + // remove the pending future from the map + iter.remove(); + + try { + try { + // get the value to see if there was an exception + String info = future.get(); + + // if we get here then increment the success count + successCount++; + + // process the info + processInfo(engine, info); + + } catch (InterruptedException e) { + // this could only happen if blocking is true, just + // rethrow as retryable and log the interruption + throw e; + + } catch (ExecutionException e) { + // if execution failed with an exception then rethrow + Throwable cause = e.getCause(); + if ((cause == null) || !(cause instanceof Exception)) { + // rethrow the execution exception + throw e; + } + // cast to an Exception and rethrow + throw ((Exception) cause); } - JsonArray affectedArr = jsonObject.getJsonArray(AFFECTED_ENTITIES); - for (JsonObject affected : affectedArr.getValuesAs(JsonObject.class)) { - JsonNumber number = affected.getJsonNumber(ENTITY_ID); - long entityId = number.longValue(); - try { - engine.getEntity(entityId, null); - entityIdSet.add(entityId); - } catch (SzNotFoundException e) { - entityIdSet.remove(entityId); - } catch (SzException e) { - // simply log the exception, do not rethrow - System.err.println(); - System.err.println("**** FAILED TO RETRIEVE ENTITY: " + entityId); - System.err.println(e.toString()); - System.err.flush(); - } + } catch (SzBadInputException e) { + logFailedRecord(ERROR, e, record.lineNumber, record.line); + errorCount++; // increment the error count + + } catch (SzRetryableException | InterruptedException | CancellationException e) { + // handle thread interruption and cancellation as retries + logFailedRecord(WARNING, e, record.lineNumber, record.line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); } + retryWriter.println(record.line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, record.lineNumber, record.line); + errorCount++; + throw e; // rethrow since exception is critical + } } - - /** - * Example method for logging failed records. - * - * @param errorType The error type description. - * @param exception The exception itself. - * @param lineNumber The line number of the failed record in the JSON input file. - * @param recordJson The JSON text for the failed record. - */ - private static void logFailedRecord(String errorType, - Exception exception, - int lineNumber, - String recordJson) - { + } + + /** + * Example method for parsing and handling the INFO message (formatted + * as JSON). This example implementation simply tracks all entity ID's + * that appear as "AFFECTED_ENTITIES" to count the number + * of entities created for the records -- essentially a contrived + * data mart. + * + * @param engine The {@link SzEngine} to use. + * @param info The info message. + */ + private static void processInfo(SzEngine engine, String info) { + JsonObject jsonObject = Json.createReader(new StringReader(info)).readObject(); + if (!jsonObject.containsKey(AFFECTED_ENTITIES)) { + return; + } + JsonArray affectedArr = jsonObject.getJsonArray(AFFECTED_ENTITIES); + for (JsonObject affected : affectedArr.getValuesAs(JsonObject.class)) { + JsonNumber number = affected.getJsonNumber(ENTITY_ID); + long entityId = number.longValue(); + + try { + engine.getEntity(entityId, null); + entityIdSet.add(entityId); + } catch (SzNotFoundException e) { + entityIdSet.remove(entityId); + } catch (SzException e) { + // simply log the exception, do not rethrow System.err.println(); - System.err.println( - "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + lineNumber + ": "); - System.err.println(recordJson); - System.err.println(exception); + System.err.println("**** FAILED TO RETRIEVE ENTITY: " + entityId); + System.err.println(e.toString()); System.err.flush(); + } } - - private static final String DEFAULT_FILE_PATH = "../resources/data/load-500.jsonl"; - - private static final String UTF_8 = "UTF-8"; - - private static final String RETRY_PREFIX = "retry-"; - private static final String RETRY_SUFFIX = ".jsonl"; - - private static final int THREAD_COUNT = 8; - - private static final int BACKLOG_FACTOR = 10; - - private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; - - private static final long PAUSE_TIMEOUT = 100L; - - private static final String DATA_SOURCE = "DATA_SOURCE"; - private static final String RECORD_ID = "RECORD_ID"; - private static final String AFFECTED_ENTITIES = "AFFECTED_ENTITIES"; - private static final String ENTITY_ID = "ENTITY_ID"; - - private static final String ERROR = "ERROR"; - private static final String WARNING = "WARNING"; - private static final String CRITICAL = "CRITICAL"; - - public record Record(int lineNumber, String line) { } - - private static int errorCount = 0; - private static int successCount = 0; - private static int retryCount = 0; - private static File retryFile = null; - private static PrintWriter retryWriter = null; - private static Set entityIdSet = new HashSet<>(); + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input + * file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + + private static final String DEFAULT_FILE_PATH = "../resources/data/load-500.jsonl"; + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final int THREAD_COUNT = 8; + + private static final int BACKLOG_FACTOR = 10; + + private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; + + private static final long PAUSE_TIMEOUT = 100L; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + private static final String AFFECTED_ENTITIES = "AFFECTED_ENTITIES"; + private static final String ENTITY_ID = "ENTITY_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + public record Record(int lineNumber, String line) { + } + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + private static Set entityIdSet = new HashSet<>(); } diff --git a/java/snippets/loading/LoadWithStatsViaLoop.java b/java/snippets/loading/LoadWithStatsViaLoop.java index 716e7e8..efd71ec 100644 --- a/java/snippets/loading/LoadWithStatsViaLoop.java +++ b/java/snippets/loading/LoadWithStatsViaLoop.java @@ -11,180 +11,178 @@ * Provides a simple example of adding records to the Senzing repository. */ public class LoadWithStatsViaLoop { - public static void main(String[] args) { - // get the senzing repository settings - String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); - if (settings == null) { - System.err.println("Unable to get settings."); - throw new IllegalArgumentException("Unable to get settings"); - } + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } - // create a descriptive instance name (can be anything) - String instanceName = LoadWithStatsViaLoop.class.getSimpleName(); - - // initialize the Senzing environment - SzEnvironment env = SzCoreEnvironment.newBuilder() - .settings(settings) - .instanceName(instanceName) - .verboseLogging(false) - .build(); - - String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; - - try (FileInputStream fis = new FileInputStream(filePath); - InputStreamReader isr = new InputStreamReader(fis, UTF_8); - BufferedReader br = new BufferedReader(isr)) - { - // get the engine from the environment - SzEngine engine = env.getEngine(); - - int lineNumber = 0; - // loop through the example records and add them to the repository - for (String line = br.readLine(); line != null; line = br.readLine()) { - // increment the line number - lineNumber++; - - // trim the line - line = line.trim(); - - // skip any blank lines - if (line.length() == 0) { - continue; - } - - // skip any commented lines - if (line.startsWith("#")) { - continue; - } - - try { - // parse the line as a JSON object - JsonObject recordJson - = Json.createReader(new StringReader(line)).readObject(); - - // extract the data source code and record ID - String dataSourceCode = recordJson.getString(DATA_SOURCE, null); - String recordId = recordJson.getString(RECORD_ID, null); - - // call the addRecord() function with no flags - engine.addRecord( - SzRecordKey.of(dataSourceCode, recordId), line, SZ_NO_FLAGS); - - successCount++; - - // check if it is time obtain stats - if ((successCount % STATS_INTERVAL) == 0) { - try { - String stats = engine.getStats(); - if (stats.length() > STATS_TRUNCATE) { - stats = stats.substring(0, STATS_TRUNCATE) + " ..."; - } - System.out.println("* STATS: " + stats); - - } catch (SzException e) { - // trap the stats exeption so it is not misinterpreted - // as an exception from engine.addRecord() - System.err.println("**** FAILED TO OBTAIN STATS: " + e); - } - } - - } catch (JsonException | SzBadInputException e) { - logFailedRecord(ERROR, e, lineNumber, line); - errorCount++; // increment the error count - - } catch (SzRetryableException e) { - logFailedRecord(WARNING, e, lineNumber, line); - errorCount++; // increment the error count - retryCount++; // increment the retry count - - // track the retry record so it can be retried later - if (retryFile == null) { - retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); - retryWriter = new PrintWriter( - new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); - } - retryWriter.println(line); - - } catch (Exception e) { - // catch any other exception (incl. SzException) here - logFailedRecord(CRITICAL, e, lineNumber, line); - errorCount++; - throw e; // rethrow since exception is critical - } - } + // create a descriptive instance name (can be anything) + String instanceName = LoadWithStatsViaLoop.class.getSimpleName(); - } catch (Exception e) { - System.err.println(); - System.err.println("*** Terminated due to critical error ***"); - System.err.flush(); - if (e instanceof RuntimeException) { - throw ((RuntimeException) e); - } - throw new RuntimeException(e); + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); - } finally { - // IMPORTANT: make sure to destroy the environment - env.destroy(); + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; - System.out.println(); - System.out.println("Records successfully added : " + successCount); - System.out.println("Records failed with errors : " + errorCount); + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) { + // get the engine from the environment + SzEngine engine = env.getEngine(); - // check on any retry records - if (retryWriter != null) { - retryWriter.flush(); - retryWriter.close(); - } - if (retryCount > 0) { - System.out.println(retryCount + " records to be retried in " + retryFile); - } - System.out.flush(); + int lineNumber = 0; + // loop through the example records and add them to the repository + for (String line = br.readLine(); line != null; line = br.readLine()) { + // increment the line number + lineNumber++; - } + // trim the line + line = line.trim(); - } + // skip any blank lines + if (line.length() == 0) { + continue; + } - /** - * Example method for logging failed records. - * - * @param errorType The error type description. - * @param exception The exception itself. - * @param lineNumber The line number of the failed record in the JSON input file. - * @param recordJson The JSON text for the failed record. - */ - private static void logFailedRecord(String errorType, - Exception exception, - int lineNumber, - String recordJson) - { - System.err.println(); - System.err.println( - "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + lineNumber + ": "); - System.err.println(recordJson); - System.err.println(exception); - System.err.flush(); - } + // skip any commented lines + if (line.startsWith("#")) { + continue; + } - private static final String DEFAULT_FILE_PATH = "../resources/data/load-500.jsonl"; + try { + // parse the line as a JSON object + JsonObject recordJson = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + + // call the addRecord() function with no flags + engine.addRecord( + SzRecordKey.of(dataSourceCode, recordId), line, SZ_NO_FLAGS); + + successCount++; + + // check if it is time obtain stats + if ((successCount % STATS_INTERVAL) == 0) { + try { + String stats = engine.getStats(); + if (stats.length() > STATS_TRUNCATE) { + stats = stats.substring(0, STATS_TRUNCATE) + " ..."; + } + System.out.println("* STATS: " + stats); + + } catch (SzException e) { + // trap the stats exception so it is not misinterpreted + // as an exception from engine.addRecord() + System.err.println("**** FAILED TO OBTAIN STATS: " + e); + } + } - private static final String UTF_8 = "UTF-8"; + } catch (JsonException | SzBadInputException e) { + logFailedRecord(ERROR, e, lineNumber, line); + errorCount++; // increment the error count - private static final int STATS_INTERVAL = 100; - private static final int STATS_TRUNCATE = 70; + } catch (SzRetryableException e) { + logFailedRecord(WARNING, e, lineNumber, line); + errorCount++; // increment the error count + retryCount++; // increment the retry count - private static final String RETRY_PREFIX = "retry-"; - private static final String RETRY_SUFFIX = ".jsonl"; + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(line); - private static final String DATA_SOURCE = "DATA_SOURCE"; - private static final String RECORD_ID = "RECORD_ID"; + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, lineNumber, line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Records successfully added : " + successCount); + System.out.println("Records failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " records to be retried in " + retryFile); + } + System.out.flush(); - private static final String ERROR = "ERROR"; - private static final String WARNING = "WARNING"; - private static final String CRITICAL = "CRITICAL"; + } - private static int errorCount = 0; - private static int successCount = 0; - private static int retryCount = 0; - private static File retryFile = null; - private static PrintWriter retryWriter = null; + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input + * file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + + private static final String DEFAULT_FILE_PATH = "../resources/data/load-500.jsonl"; + + private static final String UTF_8 = "UTF-8"; + + private static final int STATS_INTERVAL = 100; + private static final int STATS_TRUNCATE = 70; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; } diff --git a/java/snippets/loading/README.md b/java/snippets/loading/README.md index 035b9a8..37022e0 100644 --- a/java/snippets/loading/README.md +++ b/java/snippets/loading/README.md @@ -7,9 +7,9 @@ The loading snippets outline adding new source records. Adding source records in - **LoadRecords.java** - Basic iteration over a few records, adding each one - **LoadTruthSetWithInfoViaLoop.java** - - Read and load from multiple source files, adding a sample truth + - Read and load from multiple source files, adding a sample truth - Collect the response using the [SZ_WITH_INFO flag](../../../README.md#with-info) on the `addRecord()` method and track the entity ID's for the records. -- **LoaeViaFutures.java** +- **LoadViaFutures.java** - Read and load source records from a file using multiple threads - **LoadViaLoop.java** - Basic read and add source records from a file diff --git a/java/snippets/redo/RedoContinuous.java b/java/snippets/redo/RedoContinuous.java index d750fd7..69fa7a6 100644 --- a/java/snippets/redo/RedoContinuous.java +++ b/java/snippets/redo/RedoContinuous.java @@ -12,165 +12,162 @@ * the pending redo records in the Senzing repository. */ public class RedoContinuous { - public static void main(String[] args) { - // get the senzing repository settings - String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); - if (settings == null) { - System.err.println("Unable to get settings."); - throw new IllegalArgumentException("Unable to get settings"); + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = RedoContinuous.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + // make sure we cleanup if exiting by CTRL-C or due to an exception + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + outputRedoStatistics(); + })); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + while (true) { + // get the next redo record + String redo = engine.getRedoRecord(); + + // check if no redo records are available + if (redo == null) { + outputRedoStatistics(); + System.out.println(); + System.out.println( + "No redo records to process. Pausing for " + + REDO_PAUSE_DESCRIPTION + "...."); + System.out.println("Press CTRL-C to exit."); + try { + Thread.sleep(REDO_PAUSE_TIMEOUT); + } catch (InterruptedException ignore) { + // ignore the exception + } + continue; } - // create a descriptive instance name (can be anything) - String instanceName = RedoContinuous.class.getSimpleName(); - - // initialize the Senzing environment - SzEnvironment env = SzCoreEnvironment.newBuilder() - .settings(settings) - .instanceName(instanceName) - .verboseLogging(false) - .build(); - - // make sure we cleanup if exiting by CTRL-C or due to an exception - Runtime.getRuntime().addShutdownHook(new Thread(() -> { - // IMPORTANT: make sure to destroy the environment - env.destroy(); - outputRedoStatistics(); - })); - try { - // get the engine from the environment - SzEngine engine = env.getEngine(); - - while (true) { - // get the next redo record - String redo = engine.getRedoRecord(); - - // check if no redo reords are available - if (redo == null) { - outputRedoStatistics(); - System.out.println(); - System.out.println( - "No redo records to process. Pausing for " - + REDO_PAUSE_DESCRIPTION + "...."); - System.out.println("Press CTRL-C to exit."); - try { - Thread.sleep(REDO_PAUSE_TIMEOUT); - } catch (InterruptedException ignore) { - // ignore the exception - } - continue; - } - - try { - // process the redo record - engine.processRedoRecord(redo, SZ_NO_FLAGS); - - // increment the redone count - redoneCount++; - - } catch (SzRetryableException e) { - logFailedRedo(WARNING, e, redo); - errorCount++; - retryCount++; - trackRetryRecord(redo); - - } catch (Exception e) { - logFailedRedo(CRITICAL, e, redo); - errorCount++; - throw e; - } - } + // process the redo record + engine.processRedoRecord(redo, SZ_NO_FLAGS); + + // increment the redone count + redoneCount++; + } catch (SzRetryableException e) { + logFailedRedo(WARNING, e, redo); + errorCount++; + retryCount++; + trackRetryRecord(redo); } catch (Exception e) { - System.err.println(); - System.err.println("*** Terminated due to critical error ***"); - System.err.flush(); - if (e instanceof RuntimeException) { - throw ((RuntimeException) e); - } - throw new RuntimeException(e); - - } finally { - // normally we would call env.destroy() here, but we have registered - // a shutdown hook to do that since termination will typically occur - // via CTRL-C being pressed, and the shutdown hook will still run if - // we get an exception + logFailedRedo(CRITICAL, e, redo); + errorCount++; + throw e; } - + } + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // normally we would call env.destroy() here, but we have registered + // a shutdown hook to do that since termination will typically occur + // via CTRL-C being pressed, and the shutdown hook will still run if + // we get an exception } - private static void outputRedoStatistics() { - System.out.println(); - System.out.println("Redos successfully processed : " + redoneCount); - System.out.println("Total failed records/redos : " + errorCount); + } - // check on any retry records - if (retryWriter != null) { - retryWriter.flush(); - retryWriter.close(); - } - if (retryCount > 0) { - System.out.println( - retryCount + " records/redos to be retried in " + retryFile); - } - System.out.flush(); - } + private static void outputRedoStatistics() { + System.out.println(); + System.out.println("Redos successfully processed : " + redoneCount); + System.out.println("Total failed records/redos : " + errorCount); - /** - * Example method for logging failed records. - * - * @param errorType The error type description. - * @param exception The exception itself. - * @param redoRecord The JSON text for the redo record. - */ - private static void logFailedRedo(String errorType, - Exception exception, - String redoRecord) - { - System.err.println(); - System.err.println("** " + errorType + " ** FAILED TO PROCESS REDO: "); - System.err.println(redoRecord); - System.err.println(exception); - System.err.flush(); + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); } - - /** - * Tracks the specified JSON record definition to be retried in a - * retry file. - * - * @param recordJson The JSON text defining the record to be retried. - * - * @throws IOException If a failure occurs in writing the record to the - * retry file. - */ - private static void trackRetryRecord(String recordJson) - throws IOException - { - // track the retry record so it can be retried later - if (retryFile == null) { - retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); - retryWriter = new PrintWriter( - new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); - } - retryWriter.println(recordJson); + if (retryCount > 0) { + System.out.println( + retryCount + " records/redos to be retried in " + retryFile); } + System.out.flush(); + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param redoRecord The JSON text for the redo record. + */ + private static void logFailedRedo(String errorType, + Exception exception, + String redoRecord) { + System.err.println(); + System.err.println("** " + errorType + " ** FAILED TO PROCESS REDO: "); + System.err.println(redoRecord); + System.err.println(exception); + System.err.flush(); + } + + /** + * Tracks the specified JSON record definition to be retried in a + * retry file. + * + * @param recordJson The JSON text defining the record to be retried. + * + * @throws IOException If a failure occurs in writing the record to the + * retry file. + */ + private static void trackRetryRecord(String recordJson) + throws IOException { + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(recordJson); + } + + private static final String UTF_8 = "UTF-8"; - private static final String UTF_8 = "UTF-8"; + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; - private static final String RETRY_PREFIX = "retry-"; - private static final String RETRY_SUFFIX = ".jsonl"; + private static final long REDO_PAUSE_TIMEOUT = 30000L; - private static final long REDO_PAUSE_TIMEOUT = 30000L; + private static final String REDO_PAUSE_DESCRIPTION = "30 seconds"; - private static final String REDO_PAUSE_DESCRIPTION = "30 seconds"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; - private static final String WARNING = "WARNING"; - private static final String CRITICAL = "CRITICAL"; - - private static int errorCount = 0; - private static int redoneCount = 0; - private static int retryCount = 0; - private static File retryFile = null; - private static PrintWriter retryWriter = null; + private static int errorCount = 0; + private static int redoneCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; } diff --git a/java/snippets/redo/RedoContinuousViaFutures.java b/java/snippets/redo/RedoContinuousViaFutures.java index 5d30d96..eae1d5d 100644 --- a/java/snippets/redo/RedoContinuousViaFutures.java +++ b/java/snippets/redo/RedoContinuousViaFutures.java @@ -15,273 +15,268 @@ * futures. */ public class RedoContinuousViaFutures { - public static void main(String[] args) { - // get the senzing repository settings - String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); - if (settings == null) { - System.err.println("Unable to get settings."); - throw new IllegalArgumentException("Unable to get settings"); + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = RedoContinuousViaFutures.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + // create the thread pool and executor service + ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); + + // keep track of pending futures and don't backlog too many for memory's sake + Map, String> pendingFutures = new IdentityHashMap<>(); + + // make sure we cleanup if exiting by CTRL-C or due to an exception + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + // shutdown the executor service + if (!executor.isShutdown()) { + executor.shutdown(); + } + + try { + handlePendingFutures(pendingFutures, true); + } catch (Exception e) { + e.printStackTrace(); + } + + // IMPORTANT: make sure to destroy the environment + env.destroy(); + outputRedoStatistics(); + })); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + while (true) { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.size() < MAXIMUM_BACKLOG) { + + // get the next redo record + String redo = engine.getRedoRecord(); + + // check if no redo records are available + if (redo == null) { + break; + } + + Future future = executor.submit(() -> { + // process the redo record + engine.processRedoRecord(redo, SZ_NO_FLAGS); + + // return null since we have no "info" to return + return null; + }); + + // add the future to the pending future list + pendingFutures.put(future, redo); } - // create a descriptive instance name (can be anything) - String instanceName = RedoContinuousViaFutures.class.getSimpleName(); - - // initialize the Senzing environment - SzEnvironment env = SzCoreEnvironment.newBuilder() - .settings(settings) - .instanceName(instanceName) - .verboseLogging(false) - .build(); - - // create the thread pool and executor service - ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); - - // keep track of pending futures and don't backlog too many for memory's sake - Map, String> pendingFutures = new IdentityHashMap<>(); - - // make sure we cleanup if exiting by CTRL-C or due to an exception - Runtime.getRuntime().addShutdownHook(new Thread(() -> { - // shutdown the executor service - if (!executor.isShutdown()) { - executor.shutdown(); - } + do { + // handle any pending futures WITHOUT blocking to reduce the backlog + handlePendingFutures(pendingFutures, false); + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.size() >= MAXIMUM_BACKLOG) { try { - handlePendingFutures(pendingFutures, true); - } catch (Exception e) { - e.printStackTrace(); - } - - // IMPORTANT: make sure to destroy the environment - env.destroy(); - outputRedoStatistics(); - })); + Thread.sleep(HANDLE_PAUSE_TIMEOUT); - try { - // get the engine from the environment - SzEngine engine = env.getEngine(); - - while (true) { - // loop through the example records and queue them up so long - // as we have more records and backlog is not too large - while (pendingFutures.size() < MAXIMUM_BACKLOG) { - - // get the next redo record - String redo = engine.getRedoRecord(); - - // check if no redo reords are available - if (redo == null) { - break; - } - - Future future = executor.submit(() -> { - // process the redo record - engine.processRedoRecord(redo, SZ_NO_FLAGS); - - // return null since we have no "info" to return - return null; - }); - - // add the future to the pending future list - pendingFutures.put(future, redo); - } - - do { - // handle any pending futures WITHOUT blocking to reduce the backlog - handlePendingFutures(pendingFutures, false); - - // if we still have exceeded the backlog size then pause - // briefly before trying again - if (pendingFutures.size() >= MAXIMUM_BACKLOG) { - try { - Thread.sleep(HANDLE_PAUSE_TIMEOUT); - - } catch (InterruptedException ignore) { - // do nothing - } - } - } while (pendingFutures.size() >= MAXIMUM_BACKLOG); - - // check if there are no redo records right now - if (engine.countRedoRecords() == 0) { - outputRedoStatistics(); - System.out.println(); - System.out.println( - "No redo records to process. Pausing for " - + REDO_PAUSE_DESCRIPTION + "...."); - System.out.println("Press CTRL-C to exit."); - try { - Thread.sleep(REDO_PAUSE_TIMEOUT); - } catch (InterruptedException ignore) { - // ignore the exception - } - continue; - } + } catch (InterruptedException ignore) { + // do nothing } + } + } while (pendingFutures.size() >= MAXIMUM_BACKLOG); + + // check if there are no redo records right now + if (engine.countRedoRecords() == 0) { + outputRedoStatistics(); + System.out.println(); + System.out.println( + "No redo records to process. Pausing for " + + REDO_PAUSE_DESCRIPTION + "...."); + System.out.println("Press CTRL-C to exit."); + try { + Thread.sleep(REDO_PAUSE_TIMEOUT); + } catch (InterruptedException ignore) { + // ignore the exception + } + continue; + } + } + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // normally we would call env.destroy() here, but we have registered + // a shutdown hook to do that since termination will typically occur + // via CTRL-C being pressed, and the shutdown hook will still run if + // we get an exception + } + } - } catch (Exception e) { - System.err.println(); - System.err.println("*** Terminated due to critical error ***"); - System.err.flush(); - if (e instanceof RuntimeException) { - throw ((RuntimeException) e); - } - throw new RuntimeException(e); + private static void handlePendingFutures(Map, String> pendingFutures, + boolean blocking) + throws Exception { + // check for completed futures + Iterator, String>> iter = pendingFutures.entrySet().iterator(); - } finally { - // normally we would call env.destroy() here, but we have registered - // a shutdown hook to do that since termination will typically occur - // via CTRL-C being pressed, and the shutdown hook will still run if - // we get an exception - } + // loop through the pending futures + while (iter.hasNext()) { + // get the next pending future + Map.Entry, String> entry = iter.next(); + Future future = entry.getKey(); + String redoRecord = entry.getValue(); - } + // if not blocking and this one is not done then continue + if (!blocking && !future.isDone()) { + continue; + } - private static void handlePendingFutures(Map, String> pendingFutures, - boolean blocking) - throws Exception - { - // check for completed futures - Iterator, String>> iter - = pendingFutures.entrySet().iterator(); - - // loop through the pending futures - while (iter.hasNext()) { - // get the next pending future - Map.Entry, String> entry = iter.next(); - Future future = entry.getKey(); - String redoRecord = entry.getValue(); - - // if not blocking and this one is not done then continue - if (!blocking && !future.isDone()) { - continue; - } + // remove the pending future from the map + iter.remove(); + + try { + try { + // get the value to see if there was an exception + future.get(); + + // if we get here then increment the success count + redoneCount++; + + } catch (InterruptedException e) { + // this could only happen if blocking is true, just + // rethrow as retryable and log the interruption + throw e; + + } catch (ExecutionException e) { + // if execution failed with an exception then rethrow + Throwable cause = e.getCause(); + if ((cause == null) || !(cause instanceof Exception)) { + // rethrow the execution exception + throw e; + } + // cast to an Exception and rethrow + throw ((Exception) cause); + } - // remove the pending future from the map - iter.remove(); + } catch (SzRetryableException | InterruptedException | CancellationException e) { + // handle thread interruption and cancellation as retries + logFailedRedo(WARNING, e, redoRecord); + errorCount++; // increment the error count + retryCount++; // increment the retry count - try { - try { - // get the value to see if there was an exception - future.get(); - - // if we get here then increment the success count - redoneCount++; - - } catch (InterruptedException e) { - // this could only happen if blocking is true, just - // rethrow as retryable and log the interruption - throw e; - - } catch (ExecutionException e) { - // if execution failed with an exception then retrhow - Throwable cause = e.getCause(); - if ((cause == null) || !(cause instanceof Exception)) { - // rethrow the execution exception - throw e; - } - // cast to an Exception and rethrow - throw ((Exception) cause); - } - - } catch (SzRetryableException | InterruptedException | CancellationException e) { - // handle thread interruption and cancellation as retries - logFailedRedo(WARNING, e, redoRecord); - errorCount++; // increment the error count - retryCount++; // increment the retry count - - // track the retry record so it can be retried later - trackRetryRecord(redoRecord); - - } catch (Exception e) { - // catch any other exception (incl. SzException) here - logFailedRedo(CRITICAL, e, redoRecord); - errorCount++; - throw e; // rethrow since exception is critical - } - } + // track the retry record so it can be retried later + trackRetryRecord(redoRecord); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRedo(CRITICAL, e, redoRecord); + errorCount++; + throw e; // rethrow since exception is critical + } } + } - private static void outputRedoStatistics() { - System.out.println(); - System.out.println("Redos successfully processed : " + redoneCount); - System.out.println("Total failed records/redos : " + errorCount); + private static void outputRedoStatistics() { + System.out.println(); + System.out.println("Redos successfully processed : " + redoneCount); + System.out.println("Total failed records/redos : " + errorCount); - // check on any retry records - if (retryWriter != null) { - retryWriter.flush(); - retryWriter.close(); - } - if (retryCount > 0) { - System.out.println( - retryCount + " records/redos to be retried in " + retryFile); - } - System.out.flush(); + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); } - - /** - * Example method for logging failed records. - * - * @param errorType The error type description. - * @param exception The exception itself. - * @param redoRecord The JSON text for the redo record. - */ - private static void logFailedRedo(String errorType, - Exception exception, - String redoRecord) - { - System.err.println(); - System.err.println("** " + errorType + " ** FAILED TO PROCESS REDO: "); - System.err.println(redoRecord); - System.err.println(exception); - System.err.flush(); + if (retryCount > 0) { + System.out.println( + retryCount + " records/redos to be retried in " + retryFile); } - - /** - * Tracks the specified JSON record definition to be retried in a - * retry file. - * - * @param recordJson The JSON text defining the record to be retried. - * - * @throws IOException If a failure occurs in writing the record to the - * retry file. - */ - private static void trackRetryRecord(String recordJson) - throws IOException - { - // track the retry record so it can be retried later - if (retryFile == null) { - retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); - retryWriter = new PrintWriter( - new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); - } - retryWriter.println(recordJson); + System.out.flush(); + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param redoRecord The JSON text for the redo record. + */ + private static void logFailedRedo(String errorType, + Exception exception, + String redoRecord) { + System.err.println(); + System.err.println("** " + errorType + " ** FAILED TO PROCESS REDO: "); + System.err.println(redoRecord); + System.err.println(exception); + System.err.flush(); + } + + /** + * Tracks the specified JSON record definition to be retried in a + * retry file. + * + * @param recordJson The JSON text defining the record to be retried. + * + * @throws IOException If a failure occurs in writing the record to the + * retry file. + */ + private static void trackRetryRecord(String recordJson) + throws IOException { + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); } - - private static final String UTF_8 = "UTF-8"; + retryWriter.println(recordJson); + } + + private static final String UTF_8 = "UTF-8"; - private static final String RETRY_PREFIX = "retry-"; - private static final String RETRY_SUFFIX = ".jsonl"; + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; - private static final int THREAD_COUNT = 8; + private static final int THREAD_COUNT = 8; - private static final int BACKLOG_FACTOR = 10; + private static final int BACKLOG_FACTOR = 10; - private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; + private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; - private static final long HANDLE_PAUSE_TIMEOUT = 100L; + private static final long HANDLE_PAUSE_TIMEOUT = 100L; - private static final long REDO_PAUSE_TIMEOUT = 30000L; + private static final long REDO_PAUSE_TIMEOUT = 30000L; - private static final String REDO_PAUSE_DESCRIPTION = "30 seconds"; + private static final String REDO_PAUSE_DESCRIPTION = "30 seconds"; - private static final String WARNING = "WARNING"; - private static final String CRITICAL = "CRITICAL"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; - private static int errorCount = 0; - private static int redoneCount = 0; - private static int retryCount = 0; - private static File retryFile = null; - private static PrintWriter retryWriter = null; + private static int errorCount = 0; + private static int redoneCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; } diff --git a/java/snippets/redo/RedoWithInfoContinuous.java b/java/snippets/redo/RedoWithInfoContinuous.java index 161740e..cd6b7b6 100644 --- a/java/snippets/redo/RedoWithInfoContinuous.java +++ b/java/snippets/redo/RedoWithInfoContinuous.java @@ -20,208 +20,205 @@ * the INFO messages returned from processing those redo records. */ public class RedoWithInfoContinuous { - public static void main(String[] args) { - // get the senzing repository settings - String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); - if (settings == null) { - System.err.println("Unable to get settings."); - throw new IllegalArgumentException("Unable to get settings"); + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = RedoWithInfoContinuous.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + // make sure we cleanup if exiting by CTRL-C or due to an exception + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + outputRedoStatistics(); + })); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + while (true) { + // get the next redo record + String redo = engine.getRedoRecord(); + + // check if no redo records are available + if (redo == null) { + outputRedoStatistics(); + System.out.println(); + System.out.println( + "No redo records to process. Pausing for " + + REDO_PAUSE_DESCRIPTION + "...."); + System.out.println("Press CTRL-C to exit."); + try { + Thread.sleep(REDO_PAUSE_TIMEOUT); + } catch (InterruptedException ignore) { + // ignore the exception + } + continue; } - // create a descriptive instance name (can be anything) - String instanceName = RedoWithInfoContinuous.class.getSimpleName(); - - // initialize the Senzing environment - SzEnvironment env = SzCoreEnvironment.newBuilder() - .settings(settings) - .instanceName(instanceName) - .verboseLogging(false) - .build(); - - // make sure we cleanup if exiting by CTRL-C or due to an exception - Runtime.getRuntime().addShutdownHook(new Thread(() -> { - // IMPORTANT: make sure to destroy the environment - env.destroy(); - outputRedoStatistics(); - })); - try { - // get the engine from the environment - SzEngine engine = env.getEngine(); - - while (true) { - // get the next redo record - String redo = engine.getRedoRecord(); - - // check if no redo reords are available - if (redo == null) { - outputRedoStatistics(); - System.out.println(); - System.out.println( - "No redo records to process. Pausing for " - + REDO_PAUSE_DESCRIPTION + "...."); - System.out.println("Press CTRL-C to exit."); - try { - Thread.sleep(REDO_PAUSE_TIMEOUT); - } catch (InterruptedException ignore) { - // ignore the exception - } - continue; - } - - try { - // process the redo record - String info = engine.processRedoRecord(redo, SZ_WITH_INFO_FLAGS); - - // increment the redone count - redoneCount++; - - // process the info - processInfo(engine, info); - - } catch (SzRetryableException e) { - logFailedRedo(WARNING, e, redo); - errorCount++; - retryCount++; - trackRetryRecord(redo); - - } catch (Exception e) { - logFailedRedo(CRITICAL, e, redo); - errorCount++; - throw e; - } - } + // process the redo record + String info = engine.processRedoRecord(redo, SZ_WITH_INFO_FLAGS); + + // increment the redone count + redoneCount++; + // process the info + processInfo(engine, info); + + } catch (SzRetryableException e) { + logFailedRedo(WARNING, e, redo); + errorCount++; + retryCount++; + trackRetryRecord(redo); } catch (Exception e) { - System.err.println(); - System.err.println("*** Terminated due to critical error ***"); - System.err.flush(); - if (e instanceof RuntimeException) { - throw ((RuntimeException) e); - } - throw new RuntimeException(e); - - } finally { - // normally we would call env.destroy() here, but we have registered - // a shutdown hook to do that since termination will typically occur - // via CTRL-C being pressed, and the shutdown hook will still run if - // we get an exception + logFailedRedo(CRITICAL, e, redo); + errorCount++; + throw e; } - + } + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // normally we would call env.destroy() here, but we have registered + // a shutdown hook to do that since termination will typically occur + // via CTRL-C being pressed, and the shutdown hook will still run if + // we get an exception } - private static void outputRedoStatistics() { - System.out.println(); - System.out.println("Redos successfully processed : " + redoneCount); - System.out.println("Total entities affected : " + entityIdSet.size()); - System.out.println("Total failed records/redos : " + errorCount); + } - // check on any retry records - if (retryWriter != null) { - retryWriter.flush(); - retryWriter.close(); - } - if (retryCount > 0) { - System.out.println( - retryCount + " records/redos to be retried in " + retryFile); - } - System.out.flush(); - } + private static void outputRedoStatistics() { + System.out.println(); + System.out.println("Redos successfully processed : " + redoneCount); + System.out.println("Total entities affected : " + entityIdSet.size()); + System.out.println("Total failed records/redos : " + errorCount); - /** - * Example method for logging failed records. - * - * @param errorType The error type description. - * @param exception The exception itself. - * @param redoRecord The JSON text for the redo record. - */ - private static void logFailedRedo(String errorType, - Exception exception, - String redoRecord) - { + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println( + retryCount + " records/redos to be retried in " + retryFile); + } + System.out.flush(); + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param redoRecord The JSON text for the redo record. + */ + private static void logFailedRedo(String errorType, + Exception exception, + String redoRecord) { + System.err.println(); + System.err.println("** " + errorType + " ** FAILED TO PROCESS REDO: "); + System.err.println(redoRecord); + System.err.println(exception); + System.err.flush(); + } + + /** + * Tracks the specified JSON record definition to be retried in a + * retry file. + * + * @param recordJson The JSON text defining the record to be retried. + * + * @throws IOException If a failure occurs in writing the record to the + * retry file. + */ + private static void trackRetryRecord(String recordJson) + throws IOException { + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(recordJson); + } + + /** + * Example method for parsing and handling the INFO message (formatted + * as JSON). This example implementation simply tracks all entity ID's + * that appear as "AFFECTED_ENTITIES" to count the number + * of entities created for the records -- essentially a contrived + * data mart. + * + * @param engine The {@link SzEngine} to use. + * @param info The info message. + */ + private static void processInfo(SzEngine engine, String info) { + JsonObject jsonObject = Json.createReader(new StringReader(info)).readObject(); + if (!jsonObject.containsKey(AFFECTED_ENTITIES)) { + return; + } + JsonArray affectedArr = jsonObject.getJsonArray(AFFECTED_ENTITIES); + for (JsonObject affected : affectedArr.getValuesAs(JsonObject.class)) { + JsonNumber number = affected.getJsonNumber(ENTITY_ID); + long entityId = number.longValue(); + + try { + engine.getEntity(entityId, null); + entityIdSet.add(entityId); + } catch (SzNotFoundException e) { + entityIdSet.remove(entityId); + } catch (SzException e) { + // simply log the exception, do not rethrow System.err.println(); - System.err.println("** " + errorType + " ** FAILED TO PROCESS REDO: "); - System.err.println(redoRecord); - System.err.println(exception); + System.err.println("**** FAILED TO RETRIEVE ENTITY: " + entityId); + System.err.println(e.toString()); System.err.flush(); + } } + } - /** - * Tracks the specified JSON record definition to be retried in a - * retry file. - * - * @param recordJson The JSON text defining the record to be retried. - * - * @throws IOException If a failure occurs in writing the record to the - * retry file. - */ - private static void trackRetryRecord(String recordJson) - throws IOException - { - // track the retry record so it can be retried later - if (retryFile == null) { - retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); - retryWriter = new PrintWriter( - new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); - } - retryWriter.println(recordJson); - } - - /** - * Example method for parsing and handling the INFO message (formatted - * as JSON). This example implementation simply tracks all entity ID's - * that appear as "AFFECTED_ENTITIES" to count the number - * of entities created for the records -- essentially a contrived - * data mart. - * - * @param engine The {@link SzEngine} to use. - * @param info The info message. - */ - private static void processInfo(SzEngine engine, String info) { - JsonObject jsonObject = Json.createReader(new StringReader(info)).readObject(); - if (!jsonObject.containsKey(AFFECTED_ENTITIES)) { - return; - } - JsonArray affectedArr = jsonObject.getJsonArray(AFFECTED_ENTITIES); - for (JsonObject affected : affectedArr.getValuesAs(JsonObject.class)) { - JsonNumber number = affected.getJsonNumber(ENTITY_ID); - long entityId = number.longValue(); - - try { - engine.getEntity(entityId, null); - entityIdSet.add(entityId); - } catch (SzNotFoundException e) { - entityIdSet.remove(entityId); - } catch (SzException e) { - // simply log the exception, do not rethrow - System.err.println(); - System.err.println("**** FAILED TO RETRIEVE ENTITY: " + entityId); - System.err.println(e.toString()); - System.err.flush(); - } - } - } - - private static final String UTF_8 = "UTF-8"; + private static final String UTF_8 = "UTF-8"; - private static final String RETRY_PREFIX = "retry-"; - private static final String RETRY_SUFFIX = ".jsonl"; + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; - private static final long REDO_PAUSE_TIMEOUT = 30000L; + private static final long REDO_PAUSE_TIMEOUT = 30000L; - private static final String REDO_PAUSE_DESCRIPTION = "30 seconds"; + private static final String REDO_PAUSE_DESCRIPTION = "30 seconds"; - private static final String AFFECTED_ENTITIES = "AFFECTED_ENTITIES"; - private static final String ENTITY_ID = "ENTITY_ID"; + private static final String AFFECTED_ENTITIES = "AFFECTED_ENTITIES"; + private static final String ENTITY_ID = "ENTITY_ID"; - private static final String WARNING = "WARNING"; - private static final String CRITICAL = "CRITICAL"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; - private static int errorCount = 0; - private static int redoneCount = 0; - private static int retryCount = 0; - private static File retryFile = null; - private static PrintWriter retryWriter = null; - private static Set entityIdSet = new HashSet<>(); + private static int errorCount = 0; + private static int redoneCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + private static Set entityIdSet = new HashSet<>(); } diff --git a/java/snippets/searching/SearchRecords.java b/java/snippets/searching/SearchRecords.java index c1e3df1..2c29828 100644 --- a/java/snippets/searching/SearchRecords.java +++ b/java/snippets/searching/SearchRecords.java @@ -10,106 +10,107 @@ import static com.senzing.sdk.SzFlag.*; /** - * Provides a simple example of searching for entities in the Senzing repository. + * Provides a simple example of searching for entities in the Senzing + * repository. */ public class SearchRecords { - public static void main(String[] args) { - // get the senzing repository settings - String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); - if (settings == null) { - System.err.println("Unable to get settings."); - throw new IllegalArgumentException("Unable to get settings"); - } + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } - // create a descriptive instance name (can be anything) - String instanceName = SearchRecords.class.getSimpleName(); - - // initialize the Senzing environment - SzEnvironment env = SzCoreEnvironment.newBuilder() - .settings(settings) - .instanceName(instanceName) - .verboseLogging(false) - .build(); - - try { - // get the engine from the environment - SzEngine engine = env.getEngine(); - - // loop through the example records and add them to the repository - for (String criteria : getSearchCriteria()) { - // call the searchByAttributes() function with default flags - String result = engine.searchByAttributes( - criteria, SZ_SEARCH_BY_ATTRIBUTES_DEFAULT_FLAGS); - - JsonObject jsonObj = Json.createReader( - new StringReader(result)).readObject(); - - System.out.println(); - JsonArray jsonArr = jsonObj.getJsonArray("RESOLVED_ENTITIES"); - if (jsonArr.size() == 0) { - System.out.println("No results for criteria: " + criteria); - } else { - System.out.println("Results for criteria: " + criteria); - for (JsonObject obj : jsonArr.getValuesAs(JsonObject.class)) { - obj = obj.getJsonObject("ENTITY"); - obj = obj.getJsonObject("RESOLVED_ENTITY"); - long entityId = obj.getJsonNumber("ENTITY_ID").longValue(); - String name = obj.getString("ENTITY_NAME", null); - System.out.println(entityId + ": " + name); - } - } - System.out.flush(); - } + // create a descriptive instance name (can be anything) + String instanceName = SearchRecords.class.getSimpleName(); - } catch (SzException e) { - // handle any exception that may have occurred - System.err.println("Senzing Error Message : " + e.getMessage()); - System.err.println("Senzing Error Code : " + e.getErrorCode()); - e.printStackTrace(); - throw new RuntimeException(e); - - } catch (Exception e) { - e.printStackTrace(); - if (e instanceof RuntimeException) { - throw ((RuntimeException) e); - } - throw new RuntimeException(e); + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + // loop through the example records and add them to the repository + for (String criteria : getSearchCriteria()) { + // call the searchByAttributes() function with default flags + String result = engine.searchByAttributes( + criteria, SZ_SEARCH_BY_ATTRIBUTES_DEFAULT_FLAGS); + + JsonObject jsonObj = Json.createReader( + new StringReader(result)).readObject(); - } finally { - // IMPORTANT: make sure to destroy the environment - env.destroy(); + System.out.println(); + JsonArray jsonArr = jsonObj.getJsonArray("RESOLVED_ENTITIES"); + if (jsonArr.size() == 0) { + System.out.println("No results for criteria: " + criteria); + } else { + System.out.println("Results for criteria: " + criteria); + for (JsonObject obj : jsonArr.getValuesAs(JsonObject.class)) { + obj = obj.getJsonObject("ENTITY"); + obj = obj.getJsonObject("RESOLVED_ENTITY"); + long entityId = obj.getJsonNumber("ENTITY_ID").longValue(); + String name = obj.getString("ENTITY_NAME", null); + System.out.println(entityId + ": " + name); + } } + System.out.flush(); + } + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); } - /** - * This is a support method for providing a list of criteria to search on. - * - * @return A {@link List} {@link String} JSON text values desribing the - * sets of criteria with which to search. - */ - public static List getSearchCriteria() { - List records = new LinkedList<>(); - records.add( - """ + } + + /** + * This is a support method for providing a list of criteria to search on. + * + * @return A {@link List} {@link String} JSON text values describing the + * sets of criteria with which to search. + */ + public static List getSearchCriteria() { + List records = new LinkedList<>(); + records.add( + """ { "NAME_FULL": "Susan Moony", "DATE_OF_BIRTH": "15/6/1998", "SSN_NUMBER": "521212123" } """); - - records.add( - """ + + records.add( + """ { "NAME_FIRST": "Robert", "NAME_LAST": "Smith", "ADDR_FULL": "123 Main Street Las Vegas NV 89132" } """); - - records.add( - """ + + records.add( + """ { "NAME_FIRST": "Makio", "NAME_LAST": "Yamanaka", @@ -117,6 +118,6 @@ public static List getSearchCriteria() { } """); - return records; - } + return records; + } } diff --git a/java/snippets/searching/SearchViaFutures.java b/java/snippets/searching/SearchViaFutures.java index 7b0f501..ff2d25e 100644 --- a/java/snippets/searching/SearchViaFutures.java +++ b/java/snippets/searching/SearchViaFutures.java @@ -14,286 +14,283 @@ * using futures. */ public class SearchViaFutures { - public static void main(String[] args) { - // get the senzing repository settings - String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); - if (settings == null) { - System.err.println("Unable to get settings."); - throw new IllegalArgumentException("Unable to get settings"); - } - - // create a descriptive instance name (can be anything) - String instanceName = SearchViaFutures.class.getSimpleName(); - - // initialize the Senzing environment - SzEnvironment env = SzCoreEnvironment.newBuilder() - .settings(settings) - .instanceName(instanceName) - .verboseLogging(false) - .build(); - - String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; - - // create the thread pool and executor service - ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); - - // keep track of pending futures and don't backlog too many for memory's sake - Map, Criteria> pendingFutures = new IdentityHashMap<>(); - - try (FileInputStream fis = new FileInputStream(filePath); - InputStreamReader isr = new InputStreamReader(fis, UTF_8); - BufferedReader br = new BufferedReader(isr)) - { - // get the engine from the environment - SzEngine engine = env.getEngine(); - - int lineNumber = 0; - boolean eof = false; - - while (!eof) { - // loop through the example records and queue them up so long - // as we have more records and backlog is not too large - while (pendingFutures.size() < MAXIMUM_BACKLOG) { - // read the next line - String line = br.readLine(); - lineNumber++; - - // check for EOF - if (line == null) { - eof = true; - break; - } - - // trim the line - line = line.trim(); - - // skip any blank lines - if (line.length() == 0) { - continue; - } - - // skip any commented lines - if (line.startsWith("#")) { - continue; - } - - // construct the Record instance - Criteria criteria = new Criteria(lineNumber, line); - - try { - Future future = executor.submit(() -> { - // call the searchByAttributes() function with default flags - return engine.searchByAttributes( - criteria.line, SZ_SEARCH_BY_ATTRIBUTES_DEFAULT_FLAGS); - }); - - // add the future to the pending future list - pendingFutures.put(future, criteria); - - } catch (JsonException e) { - logFailedSearch(ERROR, e, lineNumber, line); - errorCount++; // increment the error count - } - } - - do { - // handle any pending futures WITHOUT blocking to reduce the backlog - handlePendingFutures(pendingFutures, false); - - // if we still have exceeded the backlog size then pause - // briefly before trying again - if (pendingFutures.size() >= MAXIMUM_BACKLOG) { - try { - Thread.sleep(PAUSE_TIMEOUT); - - } catch (InterruptedException ignore) { - // do nothing - } - } - } while (pendingFutures.size() >= MAXIMUM_BACKLOG); - } - - // shutdown the executor service - executor.shutdown(); - - // after we have submitted all records we need to handle the remaining - // pending futures so this time we block on each future - handlePendingFutures(pendingFutures, true); - - } catch (Exception e) { - System.err.println(); - System.err.println("*** Terminated due to critical error ***"); - System.err.flush(); - if (e instanceof RuntimeException) { - throw ((RuntimeException) e); - } - throw new RuntimeException(e); - - } finally { - // check if executor service is shutdown - if (!executor.isShutdown()) { - executor.shutdown(); - } - - // IMPORTANT: make sure to destroy the environment - env.destroy(); - - System.out.println(); - System.out.println( - "Searches successfully completed : " + successCount); - System.out.println( - "Total entities found via searches : " + foundEntities.size()); - System.out.println( - "Searches failed with errors : " + errorCount); - - // check on any retry records - if (retryWriter != null) { - retryWriter.flush(); - retryWriter.close(); - } - if (retryCount > 0) { - System.out.println(retryCount + " searches to be retried in " + retryFile); - } - System.out.flush(); - - } - + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); } - private static void handlePendingFutures(Map, Criteria> pendingFutures, - boolean blocking) - throws Exception - { - // check for completed futures - Iterator, Criteria>> iter - = pendingFutures.entrySet().iterator(); - - // loop through the pending futures - while (iter.hasNext()) { - // get the next pending future - Map.Entry, Criteria> entry = iter.next(); - Future future = entry.getKey(); - Criteria criteria = entry.getValue(); - - // if not blocking and this one is not done then continue - if (!blocking && !future.isDone()) { - continue; - } + // create a descriptive instance name (can be anything) + String instanceName = SearchViaFutures.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + // create the thread pool and executor service + ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); + + // keep track of pending futures and don't backlog too many for memory's sake + Map, Criteria> pendingFutures = new IdentityHashMap<>(); + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + boolean eof = false; + + while (!eof) { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.size() < MAXIMUM_BACKLOG) { + // read the next line + String line = br.readLine(); + lineNumber++; + + // check for EOF + if (line == null) { + eof = true; + break; + } + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) { + continue; + } + + // skip any commented lines + if (line.startsWith("#")) { + continue; + } + + // construct the Record instance + Criteria criteria = new Criteria(lineNumber, line); + + try { + Future future = executor.submit(() -> { + // call the searchByAttributes() function with default flags + return engine.searchByAttributes( + criteria.line, SZ_SEARCH_BY_ATTRIBUTES_DEFAULT_FLAGS); + }); + + // add the future to the pending future list + pendingFutures.put(future, criteria); + + } catch (JsonException e) { + logFailedSearch(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + } + } - // remove the pending future from the map - iter.remove(); + do { + // handle any pending futures WITHOUT blocking to reduce the backlog + handlePendingFutures(pendingFutures, false); + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.size() >= MAXIMUM_BACKLOG) { try { - try { - // get the value and check for an exception - String results = future.get(); - - // if we get here then increment the success count - successCount++; - - // parse the results - JsonObject jsonObj = Json.createReader( - new StringReader(results)).readObject(); - - JsonArray jsonArr = jsonObj.getJsonArray("RESOLVED_ENTITIES"); - for (JsonObject obj : jsonArr.getValuesAs(JsonObject.class)) { - obj = obj.getJsonObject("ENTITY"); - obj = obj.getJsonObject("RESOLVED_ENTITY"); - long entityId = obj.getJsonNumber("ENTITY_ID").longValue(); - foundEntities.add(entityId); - } - - - } catch (InterruptedException e) { - // this could only happen if blocking is true, just - // rethrow as retryable and log the interruption - throw e; - - } catch (ExecutionException e) { - // if execution failed with an exception then retrhow - Throwable cause = e.getCause(); - if ((cause == null) || !(cause instanceof Exception)) { - // rethrow the execution exception - throw e; - } - // cast to an Exception and rethrow - throw ((Exception) cause); - } - - } catch (SzBadInputException e) { - logFailedSearch(ERROR, e, criteria.lineNumber, criteria.line); - errorCount++; // increment the error count - - } catch (SzRetryableException | InterruptedException | CancellationException e) { - // handle thread interruption and cancellation as retries - logFailedSearch(WARNING, e, criteria.lineNumber, criteria.line); - errorCount++; // increment the error count - retryCount++; // increment the retry count - - // track the retry record so it can be retried later - if (retryFile == null) { - retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); - retryWriter = new PrintWriter( - new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); - } - retryWriter.println(criteria.line); - - } catch (Exception e) { - // catch any other exception (incl. SzException) here - logFailedSearch(CRITICAL, e, criteria.lineNumber, criteria.line); - errorCount++; - throw e; // rethrow since exception is critical + Thread.sleep(PAUSE_TIMEOUT); + + } catch (InterruptedException ignore) { + // do nothing } - } - } + } + } while (pendingFutures.size() >= MAXIMUM_BACKLOG); + } + + // shutdown the executor service + executor.shutdown(); + + // after we have submitted all records we need to handle the remaining + // pending futures so this time we block on each future + handlePendingFutures(pendingFutures, true); + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // check if executor service is shutdown + if (!executor.isShutdown()) { + executor.shutdown(); + } + + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println( + "Searches successfully completed : " + successCount); + System.out.println( + "Total entities found via searches : " + foundEntities.size()); + System.out.println( + "Searches failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " searches to be retried in " + retryFile); + } + System.out.flush(); - /** - * Example method for logging failed records. - * - * @param errorType The error type description. - * @param exception The exception itself. - * @param lineNumber The line number of the failed record in the JSON input file. - * @param criteriaJson The JSON text for the failed search criteria. - */ - private static void logFailedSearch(String errorType, - Exception exception, - int lineNumber, - String criteriaJson) - { - System.err.println(); - System.err.println( - "** " + errorType + " ** FAILED TO SEARCH CRITERIA AT LINE " + lineNumber + ": "); - System.err.println(criteriaJson); - System.err.println(exception); - System.err.flush(); } - private static final String DEFAULT_FILE_PATH = "../resources/data/search-5K.jsonl"; - - private static final String UTF_8 = "UTF-8"; - - private static final String RETRY_PREFIX = "retry-"; - private static final String RETRY_SUFFIX = ".jsonl"; - - private static final int THREAD_COUNT = 8; - - private static final int BACKLOG_FACTOR = 10; - - private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; - - private static final long PAUSE_TIMEOUT = 100L; - - private static final String ERROR = "ERROR"; - private static final String WARNING = "WARNING"; - private static final String CRITICAL = "CRITICAL"; - - public record Criteria(int lineNumber, String line) { } - - private static int errorCount = 0; - private static int successCount = 0; - private static int retryCount = 0; - private static File retryFile = null; - private static PrintWriter retryWriter = null; + } + + private static void handlePendingFutures(Map, Criteria> pendingFutures, + boolean blocking) + throws Exception { + // check for completed futures + Iterator, Criteria>> iter = pendingFutures.entrySet().iterator(); + + // loop through the pending futures + while (iter.hasNext()) { + // get the next pending future + Map.Entry, Criteria> entry = iter.next(); + Future future = entry.getKey(); + Criteria criteria = entry.getValue(); + + // if not blocking and this one is not done then continue + if (!blocking && !future.isDone()) { + continue; + } + + // remove the pending future from the map + iter.remove(); + + try { + try { + // get the value and check for an exception + String results = future.get(); + + // if we get here then increment the success count + successCount++; + + // parse the results + JsonObject jsonObj = Json.createReader( + new StringReader(results)).readObject(); + + JsonArray jsonArr = jsonObj.getJsonArray("RESOLVED_ENTITIES"); + for (JsonObject obj : jsonArr.getValuesAs(JsonObject.class)) { + obj = obj.getJsonObject("ENTITY"); + obj = obj.getJsonObject("RESOLVED_ENTITY"); + long entityId = obj.getJsonNumber("ENTITY_ID").longValue(); + foundEntities.add(entityId); + } + + } catch (InterruptedException e) { + // this could only happen if blocking is true, just + // rethrow as retryable and log the interruption + throw e; + + } catch (ExecutionException e) { + // if execution failed with an exception then rethrow + Throwable cause = e.getCause(); + if ((cause == null) || !(cause instanceof Exception)) { + // rethrow the execution exception + throw e; + } + // cast to an Exception and rethrow + throw ((Exception) cause); + } - private static Set foundEntities = new HashSet<>(); + } catch (SzBadInputException e) { + logFailedSearch(ERROR, e, criteria.lineNumber, criteria.line); + errorCount++; // increment the error count + + } catch (SzRetryableException | InterruptedException | CancellationException e) { + // handle thread interruption and cancellation as retries + logFailedSearch(WARNING, e, criteria.lineNumber, criteria.line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(criteria.line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedSearch(CRITICAL, e, criteria.lineNumber, criteria.line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input + * file. + * @param criteriaJson The JSON text for the failed search criteria. + */ + private static void logFailedSearch(String errorType, + Exception exception, + int lineNumber, + String criteriaJson) { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO SEARCH CRITERIA AT LINE " + lineNumber + ": "); + System.err.println(criteriaJson); + System.err.println(exception); + System.err.flush(); + } + + private static final String DEFAULT_FILE_PATH = "../resources/data/search-5K.jsonl"; + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final int THREAD_COUNT = 8; + + private static final int BACKLOG_FACTOR = 10; + + private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; + + private static final long PAUSE_TIMEOUT = 100L; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + public record Criteria(int lineNumber, String line) { + } + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + + private static Set foundEntities = new HashSet<>(); } diff --git a/java/snippets/stewardship/ForceResolve.java b/java/snippets/stewardship/ForceResolve.java index 4030aa0..bb54c5e 100644 --- a/java/snippets/stewardship/ForceResolve.java +++ b/java/snippets/stewardship/ForceResolve.java @@ -14,123 +14,123 @@ * otherwise will not resolve to one another. */ public class ForceResolve { - public static void main(String[] args) { - // get the senzing repository settings - String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); - if (settings == null) { - System.err.println("Unable to get settings."); - throw new IllegalArgumentException("Unable to get settings"); - } - - // create a descriptive instance name (can be anything) - String instanceName = ForceResolve.class.getSimpleName(); - - // initialize the Senzing environment - SzEnvironment env = SzCoreEnvironment.newBuilder() - .settings(settings) - .instanceName(instanceName) - .verboseLogging(false) - .build(); - - try { - // get the engine from the environment - SzEngine engine = env.getEngine(); - - Map recordMap = getRecords(); - // loop through the example records and add them to the repository - for (Map.Entry entry : recordMap.entrySet()) { - SzRecordKey recordKey = entry.getKey(); - String recordDefinition = entry.getValue(); - - // call the addRecord() function with no flags - engine.addRecord(recordKey, recordDefinition, SZ_NO_FLAGS); - - System.out.println("Record " + recordKey.recordId() + " added"); - System.out.flush(); - } - - System.out.println(); - for (SzRecordKey recordKey : recordMap.keySet()) { - String result = engine.getEntity(recordKey, SZ_ENTITY_BRIEF_DEFAULT_FLAGS); - JsonObject jsonObj = Json.createReader(new StringReader(result)).readObject(); - long entityId = jsonObj.getJsonObject("RESOLVED_ENTITY") - .getJsonNumber("ENTITY_ID").longValue(); - System.out.println( - "Record " + recordKey + " originally resolves to entity " + entityId); - } - System.out.println(); - System.out.println("Updating records with TRUSTED_ID to force resolve..."); - SzRecordKey key1 = SzRecordKey.of(TEST, "1"); - SzRecordKey key3 = SzRecordKey.of(TEST, "3"); - - String record1 = engine.getRecord(key1, SZ_RECORD_DEFAULT_FLAGS); - String record3 = engine.getRecord(key3, SZ_RECORD_DEFAULT_FLAGS); - - JsonObject obj1 = Json.createReader(new StringReader(record1)).readObject(); - JsonObject obj3 = Json.createReader(new StringReader(record3)).readObject(); - - obj1 = obj1.getJsonObject("JSON_DATA"); - obj3 = obj3.getJsonObject("JSON_DATA"); - - JsonObjectBuilder job1 = Json.createObjectBuilder(obj1); - JsonObjectBuilder job3 = Json.createObjectBuilder(obj3); - - for (JsonObjectBuilder job : List.of(job1, job3)) { - job.add("TRUSTED_ID_NUMBER", "TEST_R1-TEST_R3"); - job.add("TRUSTED_ID_TYPE", "FORCE_RESOLVE"); - } - - record1 = job1.build().toString(); - record3 = job3.build().toString(); - - engine.addRecord(key1, record1, SZ_NO_FLAGS); - engine.addRecord(key3, record3, SZ_NO_FLAGS); - - System.out.println(); - for (SzRecordKey recordKey : recordMap.keySet()) { - String result = engine.getEntity(recordKey, SZ_ENTITY_BRIEF_DEFAULT_FLAGS); - JsonObject jsonObj = Json.createReader(new StringReader(result)).readObject(); - long entityId = jsonObj.getJsonObject("RESOLVED_ENTITY") - .getJsonNumber("ENTITY_ID").longValue(); - System.out.println( - "Record " + recordKey + " now resolves to entity " + entityId); - } - System.out.println(); - - } catch (SzException e) { - // handle any exception that may have occurred - System.err.println("Senzing Error Message : " + e.getMessage()); - System.err.println("Senzing Error Code : " + e.getErrorCode()); - e.printStackTrace(); - throw new RuntimeException(e); - - } catch (Exception e) { - System.err.println(); - System.err.println("*** Terminated due to critical error ***"); - e.printStackTrace(); - if (e instanceof RuntimeException) { - throw ((RuntimeException) e); - } - throw new RuntimeException(e); - - } finally { - // IMPORTANT: make sure to destroy the environment - env.destroy(); - } + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + // create a descriptive instance name (can be anything) + String instanceName = ForceResolve.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + Map recordMap = getRecords(); + // loop through the example records and add them to the repository + for (Map.Entry entry : recordMap.entrySet()) { + SzRecordKey recordKey = entry.getKey(); + String recordDefinition = entry.getValue(); + + // call the addRecord() function with no flags + engine.addRecord(recordKey, recordDefinition, SZ_NO_FLAGS); + + System.out.println("Record " + recordKey.recordId() + " added"); + System.out.flush(); + } + + System.out.println(); + for (SzRecordKey recordKey : recordMap.keySet()) { + String result = engine.getEntity(recordKey, SZ_ENTITY_BRIEF_DEFAULT_FLAGS); + JsonObject jsonObj = Json.createReader(new StringReader(result)).readObject(); + long entityId = jsonObj.getJsonObject("RESOLVED_ENTITY") + .getJsonNumber("ENTITY_ID").longValue(); + System.out.println( + "Record " + recordKey + " originally resolves to entity " + entityId); + } + System.out.println(); + System.out.println("Updating records with TRUSTED_ID to force resolve..."); + SzRecordKey key1 = SzRecordKey.of(TEST, "1"); + SzRecordKey key3 = SzRecordKey.of(TEST, "3"); + + String record1 = engine.getRecord(key1, SZ_RECORD_DEFAULT_FLAGS); + String record3 = engine.getRecord(key3, SZ_RECORD_DEFAULT_FLAGS); + + JsonObject obj1 = Json.createReader(new StringReader(record1)).readObject(); + JsonObject obj3 = Json.createReader(new StringReader(record3)).readObject(); + + obj1 = obj1.getJsonObject("JSON_DATA"); + obj3 = obj3.getJsonObject("JSON_DATA"); + + JsonObjectBuilder job1 = Json.createObjectBuilder(obj1); + JsonObjectBuilder job3 = Json.createObjectBuilder(obj3); + + for (JsonObjectBuilder job : List.of(job1, job3)) { + job.add("TRUSTED_ID_NUMBER", "TEST_R1-TEST_R3"); + job.add("TRUSTED_ID_TYPE", "FORCE_RESOLVE"); + } + + record1 = job1.build().toString(); + record3 = job3.build().toString(); + + engine.addRecord(key1, record1, SZ_NO_FLAGS); + engine.addRecord(key3, record3, SZ_NO_FLAGS); + + System.out.println(); + for (SzRecordKey recordKey : recordMap.keySet()) { + String result = engine.getEntity(recordKey, SZ_ENTITY_BRIEF_DEFAULT_FLAGS); + JsonObject jsonObj = Json.createReader(new StringReader(result)).readObject(); + long entityId = jsonObj.getJsonObject("RESOLVED_ENTITY") + .getJsonNumber("ENTITY_ID").longValue(); + System.out.println( + "Record " + recordKey + " now resolves to entity " + entityId); + } + System.out.println(); + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); } - /** - * This is a support method for providing example records to add. - * - * @return A {@link Map} of {@link SzRecordKey} keys to {@link String} - * JSON text values desribing the records to be added. - */ - public static Map getRecords() { - Map records = new LinkedHashMap<>(); - records.put( - SzRecordKey.of("TEST", "1"), - """ + } + + /** + * This is a support method for providing example records to add. + * + * @return A {@link Map} of {@link SzRecordKey} keys to {@link String} + * JSON text values describing the records to be added. + */ + public static Map getRecords() { + Map records = new LinkedHashMap<>(); + records.put( + SzRecordKey.of("TEST", "1"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "1", @@ -141,10 +141,10 @@ public static Map getRecords() { "DATE_OF_BIRTH": "1/12/1990" } """); - - records.put( - SzRecordKey.of("TEST", "2"), - """ + + records.put( + SzRecordKey.of("TEST", "2"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "2", @@ -154,10 +154,10 @@ public static Map getRecords() { "DATE_OF_BIRTH": "5/4/1994" } """); - - records.put( - SzRecordKey.of("TEST", "3"), - """ + + records.put( + SzRecordKey.of("TEST", "3"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "3", @@ -166,9 +166,9 @@ public static Map getRecords() { "PHONE_NUMBER": "787-767-2688" } """); - - return records; - } - private static final String TEST = "TEST"; + return records; + } + + private static final String TEST = "TEST"; } diff --git a/java/snippets/stewardship/ForceUnresolve.java b/java/snippets/stewardship/ForceUnresolve.java index a887314..df2b5ae 100644 --- a/java/snippets/stewardship/ForceUnresolve.java +++ b/java/snippets/stewardship/ForceUnresolve.java @@ -14,122 +14,122 @@ * otherwise will not resolve to one another. */ public class ForceUnresolve { - public static void main(String[] args) { - // get the senzing repository settings - String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); - if (settings == null) { - System.err.println("Unable to get settings."); - throw new IllegalArgumentException("Unable to get settings"); - } - - // create a descriptive instance name (can be anything) - String instanceName = ForceUnresolve.class.getSimpleName(); - - // initialize the Senzing environment - SzEnvironment env = SzCoreEnvironment.newBuilder() - .settings(settings) - .instanceName(instanceName) - .verboseLogging(false) - .build(); - - try { - // get the engine from the environment - SzEngine engine = env.getEngine(); - - Map recordMap = getRecords(); - // loop through the example records and add them to the repository - for (Map.Entry entry : recordMap.entrySet()) { - SzRecordKey recordKey = entry.getKey(); - String recordDefinition = entry.getValue(); - - // call the addRecord() function with no flags - engine.addRecord(recordKey, recordDefinition, SZ_NO_FLAGS); - - System.out.println("Record " + recordKey.recordId() + " added"); - System.out.flush(); - } - - System.out.println(); - for (SzRecordKey recordKey : recordMap.keySet()) { - String result = engine.getEntity(recordKey, SZ_ENTITY_BRIEF_DEFAULT_FLAGS); - JsonObject jsonObj = Json.createReader(new StringReader(result)).readObject(); - long entityId = jsonObj.getJsonObject("RESOLVED_ENTITY") - .getJsonNumber("ENTITY_ID").longValue(); - System.out.println( - "Record " + recordKey + " originally resolves to entity " + entityId); - } - System.out.println(); - System.out.println("Updating records with TRUSTED_ID to force unresolve..."); - SzRecordKey key4 = SzRecordKey.of(TEST, "4"); - SzRecordKey key6 = SzRecordKey.of(TEST, "6"); - - String record4 = engine.getRecord(key4, SZ_RECORD_DEFAULT_FLAGS); - String record6 = engine.getRecord(key6, SZ_RECORD_DEFAULT_FLAGS); - - JsonObject obj4 = Json.createReader(new StringReader(record4)).readObject(); - JsonObject obj6 = Json.createReader(new StringReader(record6)).readObject(); - - obj4 = obj4.getJsonObject("JSON_DATA"); - obj6 = obj6.getJsonObject("JSON_DATA"); - - JsonObjectBuilder job4 = Json.createObjectBuilder(obj4); - JsonObjectBuilder job6 = Json.createObjectBuilder(obj6); - - job4.add("TRUSTED_ID_NUMBER", "TEST_R4-TEST_R6"); - job4.add("TRUSTED_ID_TYPE", "FORCE_UNRESOLVE"); - - job6.add("TRUSTED_ID_NUMBER", "TEST_R6-TEST_R4"); - job6.add("TRUSTED_ID_TYPE", "FORCE_UNRESOLVE"); - - record4 = job4.build().toString(); - record6 = job6.build().toString(); - - engine.addRecord(key4, record4, SZ_NO_FLAGS); - engine.addRecord(key6, record6, SZ_NO_FLAGS); - - System.out.println(); - for (SzRecordKey recordKey : recordMap.keySet()) { - String result = engine.getEntity(recordKey, SZ_ENTITY_BRIEF_DEFAULT_FLAGS); - JsonObject jsonObj = Json.createReader(new StringReader(result)).readObject(); - long entityId = jsonObj.getJsonObject("RESOLVED_ENTITY") - .getJsonNumber("ENTITY_ID").longValue(); - System.out.println( - "Record " + recordKey + " now resolves to entity " + entityId); - } - System.out.println(); - - } catch (SzException e) { - // handle any exception that may have occurred - System.err.println("Senzing Error Message : " + e.getMessage()); - System.err.println("Senzing Error Code : " + e.getErrorCode()); - e.printStackTrace(); - throw new RuntimeException(e); - - } catch (Exception e) { - e.printStackTrace(); - if (e instanceof RuntimeException) { - throw ((RuntimeException) e); - } - throw new RuntimeException(e); - - } finally { - // IMPORTANT: make sure to destroy the environment - env.destroy(); - } + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + // create a descriptive instance name (can be anything) + String instanceName = ForceUnresolve.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + Map recordMap = getRecords(); + // loop through the example records and add them to the repository + for (Map.Entry entry : recordMap.entrySet()) { + SzRecordKey recordKey = entry.getKey(); + String recordDefinition = entry.getValue(); + + // call the addRecord() function with no flags + engine.addRecord(recordKey, recordDefinition, SZ_NO_FLAGS); + + System.out.println("Record " + recordKey.recordId() + " added"); + System.out.flush(); + } + + System.out.println(); + for (SzRecordKey recordKey : recordMap.keySet()) { + String result = engine.getEntity(recordKey, SZ_ENTITY_BRIEF_DEFAULT_FLAGS); + JsonObject jsonObj = Json.createReader(new StringReader(result)).readObject(); + long entityId = jsonObj.getJsonObject("RESOLVED_ENTITY") + .getJsonNumber("ENTITY_ID").longValue(); + System.out.println( + "Record " + recordKey + " originally resolves to entity " + entityId); + } + System.out.println(); + System.out.println("Updating records with TRUSTED_ID to force unresolve..."); + SzRecordKey key4 = SzRecordKey.of(TEST, "4"); + SzRecordKey key6 = SzRecordKey.of(TEST, "6"); + + String record4 = engine.getRecord(key4, SZ_RECORD_DEFAULT_FLAGS); + String record6 = engine.getRecord(key6, SZ_RECORD_DEFAULT_FLAGS); + + JsonObject obj4 = Json.createReader(new StringReader(record4)).readObject(); + JsonObject obj6 = Json.createReader(new StringReader(record6)).readObject(); + + obj4 = obj4.getJsonObject("JSON_DATA"); + obj6 = obj6.getJsonObject("JSON_DATA"); + + JsonObjectBuilder job4 = Json.createObjectBuilder(obj4); + JsonObjectBuilder job6 = Json.createObjectBuilder(obj6); + + job4.add("TRUSTED_ID_NUMBER", "TEST_R4-TEST_R6"); + job4.add("TRUSTED_ID_TYPE", "FORCE_UNRESOLVE"); + + job6.add("TRUSTED_ID_NUMBER", "TEST_R6-TEST_R4"); + job6.add("TRUSTED_ID_TYPE", "FORCE_UNRESOLVE"); + + record4 = job4.build().toString(); + record6 = job6.build().toString(); + + engine.addRecord(key4, record4, SZ_NO_FLAGS); + engine.addRecord(key6, record6, SZ_NO_FLAGS); + + System.out.println(); + for (SzRecordKey recordKey : recordMap.keySet()) { + String result = engine.getEntity(recordKey, SZ_ENTITY_BRIEF_DEFAULT_FLAGS); + JsonObject jsonObj = Json.createReader(new StringReader(result)).readObject(); + long entityId = jsonObj.getJsonObject("RESOLVED_ENTITY") + .getJsonNumber("ENTITY_ID").longValue(); + System.out.println( + "Record " + recordKey + " now resolves to entity " + entityId); + } + System.out.println(); + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); } - /** - * This is a support method for providing example records to add. - * - * @return A {@link Map} of {@link SzRecordKey} keys to {@link String} - * JSON text values desribing the records to be added. - */ - public static Map getRecords() { - Map records = new LinkedHashMap<>(); - records.put( - SzRecordKey.of("TEST", "4"), - """ + } + + /** + * This is a support method for providing example records to add. + * + * @return A {@link Map} of {@link SzRecordKey} keys to {@link String} + * JSON text values describing the records to be added. + */ + public static Map getRecords() { + Map records = new LinkedHashMap<>(); + records.put( + SzRecordKey.of("TEST", "4"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "4", @@ -139,10 +139,10 @@ public static Map getRecords() { "DATE_OF_BIRTH": "1/12/1990" } """); - - records.put( - SzRecordKey.of("TEST", "5"), - """ + + records.put( + SzRecordKey.of("TEST", "5"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "5", @@ -152,10 +152,10 @@ public static Map getRecords() { "DATE_OF_BIRTH": "1/12/1990" } """); - - records.put( - SzRecordKey.of("TEST", "6"), - """ + + records.put( + SzRecordKey.of("TEST", "6"), + """ { "DATA_SOURCE": "TEST", "RECORD_ID": "6", @@ -164,9 +164,9 @@ public static Map getRecords() { "PHONE_NUMBER": "202-787-7678" } """); - - return records; - } - private static final String TEST = "TEST"; + return records; + } + + private static final String TEST = "TEST"; }