-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPIPELINE.sh
executable file
·77 lines (64 loc) · 2.63 KB
/
PIPELINE.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/bin/bash
# Ensure the script exits on error
set -e
# Step 1: Deactivate any active virtual environments
if [[ "$VIRTUAL_ENV" != "" ]]; then
deactivate
fi
# Step 2: Navigate to the Grobid client directory and install it
cd grobid_client_python/
echo "Installing Grobid Client..."
python3 setup.py install > /dev/null 2>&1
# Step 3: Run Grobid Client in a loop
while true; do
echo "Running Grobid Client"
result_grobid=$(grobid_client --input ./data/pdf_files/ --output ./data/xml_grobid/ processFulltextDocument)
echo "$result_grobid"
read -p "Are you satisfied with the Grobid result? (y/n): " grobid_input
if [[ "$grobid_input" == "y" ]]; then
echo "Proceeding with the next step."
break
elif [[ "$grobid_input" == "n" ]]; then
echo "Re-running Grobid Client..."
else
echo "Invalid input. Please type 'y' or 'n'."
fi
done
# Step 4: Organize XML files
echo "Organizing XML files..."
cp ./data/xml_grobid/*.xml ./data/xml_files/
cp ./data/xml_grobid/*.xml ./data/json_files/from_xml/
# Step 5: Activate virtual environment for Software Mentions Client
echo "Activating virtual environment for Software Mentions Client..."
source ./software_mentions_client/venv/bin/activate
# Step 6: Navigate to the Software Mentions Client directory and install it
cd software_mentions_client/
python3 -m pip install -e . > /dev/null 2>&1
# Initialize 'processed' variable
processed="n"
# Step 7: Run Software Mentions Client in a loop
while true; do
echo "Running Software Mentions Client..."
if [[ "$processed" == "n" ]]; then
# First run without --reprocess
result_softcite=$(python3 -m software_mentions_client.client --repo-in ../data/json_files/from_xml/ --scorched-earth)
echo "$result_softcite"
processed="y" # Mark as processed
elif [[ "$processed" == "y" ]]; then
# Second run with --reprocess
result_softcite=$(python3 -m software_mentions_client.client --repo-in ../data/json_files/from_xml/ --scorched-earth --reprocess)
echo "$result_softcite"
fi
# Ask if the user is satisfied with the result
read -p "Are you satisfied with the Software Mentions Client result? (y/n): " softcite_input
if [[ "$softcite_input" == "y" ]]; then
echo "Proceeding to the end of the pipeline."
break # Exit the loop if the user is satisfied
elif [[ "$softcite_input" == "n" ]]; then
echo "Re-running Software Mentions Client..."
processed="n" # Reset processed flag to run the client again
else
echo "Invalid input. Please type 'y' or 'n'."
fi
done
echo "Pipeline completed successfully."