Skip to content

Commit

Permalink
Merge branch 'release-10.1' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
demiankatz committed Feb 12, 2025
2 parents 688e772 + 15cfaba commit eb27517
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 39 deletions.
58 changes: 31 additions & 27 deletions harvest/batch-import-marc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ fi

# Find harvest directory for future use
HARVEST_DIR="$VUFIND_LOCAL_DIR/harvest"
if [ ! -d $HARVEST_DIR ]
if [ ! -d "$HARVEST_DIR" ]
then
HARVEST_DIR="$VUFIND_HOME/harvest"
fi
Expand All @@ -32,13 +32,13 @@ function usage {
cat <<EOF
This script processes a batch of harvested MARC records.
Usage: $(basename $0) [-dhmz] [-p properties_file] _harvest_subdirectory_
Usage: $(basename "$0") [-dhmz] [-p properties_file] _harvest_subdirectory_
_harvest_subdirectory_ is a directory name created by the OAI-PMH harvester.
This script will search the harvest subdirectories of the directories defined
by the VUFIND_LOCAL_DIR or VUFIND_HOME environment variables.
Example: $(basename $0) oai_source
Example: $(basename "$0") oai_source
Options:
-d: Use the directory path as-is, do not append it to $HARVEST_DIR.
Expand Down Expand Up @@ -91,7 +91,7 @@ then
else
BASEPATH="$HARVEST_DIR/$1"
fi
if [ ! -d $BASEPATH ]
if [ ! -d "$BASEPATH" ]
then
echo "Directory $BASEPATH does not exist!"
exit 1
Expand All @@ -100,16 +100,16 @@ fi
# Create log/processed directories as needed:
if [ $LOGGING == true ]
then
if [ ! -d $BASEPATH/log ]
if [ ! -d "$BASEPATH"/log ]
then
mkdir $BASEPATH/log
mkdir "$BASEPATH"/log
fi
fi
if [ $MOVE_DATA == true ]
then
if [ ! -d $BASEPATH/processed ]
if [ ! -d "$BASEPATH"/processed ]
then
mkdir $BASEPATH/processed
mkdir "$BASEPATH"/processed
fi
fi

Expand All @@ -127,28 +127,32 @@ else
local LOGFILE
if [ $# -eq 1 ]
then
LOGFILE=$BASEPATH/log/`basename $1`.log
> $LOGFILE
LOGFILE="$BASEPATH/log/$(basename "$1").log" > "$LOGFILE"
else
LOGFILE=$BASEPATH/log/`basename $1`_and_more.log
echo -e "This log is for the following files: \n$FILES\n" > $LOGFILE
LOGFILE="$BASEPATH/log/$(basename "$1")_and_more.log"
echo -e "This log is for the following files: \n$FILES\n" > "$LOGFILE"
fi
cat -u - >> $LOGFILE
cat -u - >> "$LOGFILE"
}
fi

# Process all the files in the target directory:
find -L $BASEPATH -maxdepth 1 \( -iname "*.xml" -o -iname "*.mrc" -o -iname "*.marc" \) -type f -print0 | sort -z | xargs -0 -r -n $MAX_BATCH_COUNT | \
while read -d $'\n' files
do
# Logging output handled by log() function
# PROPERTIES_FILE passed via environment
$VUFIND_HOME/import-marc.sh $files 2> >(log $files)
if [ "$?" -eq "0" ] && [ $MOVE_DATA == true ]
then
for file in $files
do
mv $file $BASEPATH/processed/`basename $file`
done
fi
# Collect all matching files into an array using a null-separated list
mapfile -d '' files < <(find -L "$BASEPATH" -maxdepth 1 \( -iname "*.xml" -o -iname "*.mrc" -o -iname "*.marc" \) -type f -print0 | sort -z)
total_files=${#files[@]}

# Iterate over the files in batches
for ((i = 0; i < total_files; i += MAX_BATCH_COUNT)); do
# Slice off a batch of files
batch=("${files[@]:i:MAX_BATCH_COUNT}")

# Execute the import command with the batch of files
if "$VUFIND_HOME"/import-marc.sh "${batch[@]}" 2> >(log "${batch[@]}"); then
if [ "$MOVE_DATA" == true ]; then
for file in "${batch[@]}"; do
mv "$file" "$BASEPATH/processed/$(basename "$file")"
done
fi
else
echo "Failed to process batch starting with ${batch[0]}" >&2
fi
done
41 changes: 29 additions & 12 deletions import-marc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,14 @@ fi
#####################################################
# Normalize file paths to absolute paths
#####################################################
NORMALIZED_PATHS=""
for f in $*; do
MARC_PATH=`dirname $f`
MARC_PATH=`cd $MARC_PATH && pwd`
MARC_FILE=`basename $f`
NORMALIZED_PATHS="${NORMALIZED_PATHS} $MARC_PATH/$MARC_FILE"
NORMALIZED_PATHS=()

for f in "$@"; do
MARC_PATH=$(dirname "$f")
MARC_PATH=$(cd "$MARC_PATH" && pwd) # Resolve the full path to prevent relative path issues
MARC_FILE=$(basename "$f")
# Add the full path to the array
NORMALIZED_PATHS+=("$MARC_PATH/$MARC_FILE")
done

#####################################################
Expand All @@ -153,16 +155,31 @@ then
mkdir -p $SOLRJ_DIR
for file in $VUFIND_HOME/solr/vendor/server/solr-webapp/webapp/WEB-INF/lib/solr*.jar $VUFIND_HOME/solr/vendor/server/solr-webapp/webapp/WEB-INF/lib/http*.jar
do
ln -s $file $SOLRJ_DIR/`basename $file`
ln -s $file $SOLRJ_DIR/`basename "$file"`
done
fi

#####################################################
# Execute Importer
#####################################################

RUN_CMD="$JAVA $INDEX_OPTIONS -Duser.timezone=UTC -Dlog4j.configuration=file://$LOG4J_CONFIG $EXTRA_SOLRMARC_SETTINGS -jar $JAR_FILE $PROPERTIES_FILE -solrj $SOLRJ_DIR -lib_local "$VUFIND_HOME/import/lib_local\;$VUFIND_HOME/solr/vendor/modules/analysis-extras/lib" $NORMALIZED_PATHS"
echo "Now Importing $NORMALIZED_PATHS ..."
# solrmarc writes log messages to stderr, write RUN_CMD to the same place
echo "`date '+%h %d, %H:%M:%S'` $RUN_CMD" >&2
exec $RUN_CMD
# Build the command as an array
RUN_CMD=(
"$JAVA"
$INDEX_OPTIONS
-Duser.timezone=UTC
-Dlog4j.configuration="file://$LOG4J_CONFIG"
$EXTRA_SOLRMARC_SETTINGS
-jar "$JAR_FILE"
"$PROPERTIES_FILE"
-solrj "$SOLRJ_DIR"
-lib_local "$VUFIND_HOME/import/lib_local;$VUFIND_HOME/solr/vendor/modules/analysis-extras/lib"
"${NORMALIZED_PATHS[@]}"
)

# Debugging output
echo "Now Importing: ${NORMALIZED_PATHS[*]}"
echo "$(date '+%h %d, %H:%M:%S') ${RUN_CMD[*]}" >&2

# Execute the command using the array
exec "${RUN_CMD[@]}"

0 comments on commit eb27517

Please sign in to comment.