From a12b77883efca79ff92adf3fead6bebeb62bc3f3 Mon Sep 17 00:00:00 2001 From: Javier Ferrer Date: Wed, 18 Sep 2024 10:13:37 +0100 Subject: [PATCH] Fix esbulk errors esbulk seems to have been causing errors with the previous settings. We did not see them before because they do not halt the image building process, but the db becomes inconsistent, as documents fail to import. According to issues in the library github: https://github.com/miku/esbulk/issues/33 and the readme: https://github.com/miku/esbulk?tab=readme-ov-file#esbulk Using default settings might solve those errors. So far, it seems to be working. --- .../posvm/postproduction/elasticsearch/run.sh | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/terraform_create_images/scripts/posvm/postproduction/elasticsearch/run.sh b/terraform_create_images/scripts/posvm/postproduction/elasticsearch/run.sh index eca60e4..56a7dcc 100644 --- a/terraform_create_images/scripts/posvm/postproduction/elasticsearch/run.sh +++ b/terraform_create_images/scripts/posvm/postproduction/elasticsearch/run.sh @@ -45,7 +45,7 @@ function run_elasticsearch() { -e "bootstrap.memory_lock=true" \ -e "search.max_open_scroll_context=5000" \ -e ES_JAVA_OPTS="-Xms${JVM_SIZE_HALF}g -Xmx${JVM_SIZE_HALF}g" \ - -e "thread_pool.write.queue_size=1000" \ + -e "thread_pool.write.queue_size=-1" \ -v ${pos_es_docker_vol_data}:/usr/share/elasticsearch/data \ -v ${pos_es_docker_vol_logs}:/usr/share/elasticsearch/logs \ --ulimit memlock=-1:-1 \ @@ -88,19 +88,19 @@ function load_data_into_es_index() { #log "[INFO][${index_name}] Loading data file '${file}' with id '${id}'" for ((i = 1; i <= max_retries; i++)); do log "[INFO][${index_name}] Loading data file '${file}' with id '${id}' - Attempt #$i" - gsutil cp "${file}" - | esbulk -size 2000 -w 4 -index "${index_name}" -type _doc -server http://localhost:9200 -id "${id}" && break || log "[ERROR][${index_name}] Loading data file '${file}' with id '${id}' - FAILED Attempt #$i, retrying..." + gsutil cp "${file}" - | esbulk -index "${index_name}" -type _doc -server http://localhost:9200 -id "${id}" && break || log "[ERROR][${index_name}] Loading data file '${file}' with id '${id}' - FAILED Attempt #$i, retrying..." sleep 1 done if [ $i -gt $max_retries ]; then log "[ERROR][${index_name}] Loading data file '${file}' with id '${id}' - ALL ATTEMPTS FAILED." return 1 fi - #gsutil cp ${file} - | esbulk -size 2000 -w 8 -index ${index_name} -type _doc -server http://localhost:9200 -id ${id} + #gsutil cp ${file} - | esbulk -size 2000 -w 8 -index ${index_name} -type _doc -server http://localhost:9200 -id ${id} else #log "[INFO][${index_name}] Loading data file '${file}' WITHOUT id" for ((i = 1; i <= max_retries; i++)); do log "[INFO][${index_name}] Loading data file '${file}' WITHOUT id - Attempt #$i" - gsutil cp ${file} - | esbulk -size 2000 -w 4 -index ${index_name} -type _doc -server http://localhost:9200 && break || log "[ERROR][${index_name}] Loading data file '${file}' WITHOUT id - FAILED Attempt #$i, retrying..." + gsutil cp "${file}" - | esbulk -index "${index_name}" -type _doc -server http://localhost:9200 && break || log "[ERROR][${index_name}] Loading data file '${file}' WITHOUT id - FAILED Attempt #$i, retrying..." sleep 1 done if [ $i -gt $max_retries ]; then @@ -174,7 +174,7 @@ function do_load_etl_data_into_es_parallel() { ) & fi done - + # Wait for all background jobs to complete wait @@ -218,13 +218,13 @@ function do_load_etl_data_into_es_sequential() { fi fi done - + if $all_jobs_done; then break else sleep 1 fi - done + done } @@ -265,7 +265,7 @@ function load_etl_data_into_es() { job_param_index_settings["$index_name"]=${index_settings} job_param_id["$index_name"]=${id} done < "${pos_es_path_etl_ingestion_config}" - + # Add the jobs for loading Evidence data into Elastic Search for evidence_path in $( gsutil ls ${pos_data_release_path_etl_json}/evidence | grep sourceId ); do # Extract the sourceId from the path @@ -327,4 +327,4 @@ load_etl_data_into_es # Print Elastic Search summary print_es_summary # Stop Elastic Search -stop_elasticsearch \ No newline at end of file +stop_elasticsearch