@@ -109,7 +109,7 @@ def clone_git_repo(data_source, destination_dir):
109
109
110
110
# Download all files from an S3 compatible storage service.
111
111
def download_from_s3 (url , destination_dir , access_key_id = None ,
112
- secret_access_key = None , session_token = None ):
112
+ secret_access_key = None , session_token = None , debug = False ):
113
113
# Get the access key ID and secret access key from the environment variables
114
114
if access_key_id is None :
115
115
access_key_id = os .environ .get ('AWS_ACCESS_KEY_ID' )
@@ -118,16 +118,21 @@ def download_from_s3(url, destination_dir, access_key_id = None,
118
118
if session_token is None :
119
119
session_token = os .environ .get ('AWS_SESSION_TOKEN' )
120
120
121
- print (f"access_key_id={ access_key_id } " )
122
- print (f"secret_access_key={ secret_access_key } " )
123
-
124
121
# Create an S3 client
125
122
parsed_url = url .split ('/' )
126
123
endpoint_url = f"{ parsed_url [0 ]} //{ parsed_url [2 ]} "
127
124
bucket_name = parsed_url [3 ]
128
125
key_prefix = "/" .join (parsed_url [4 :- 1 ])
129
126
base_file = parsed_url [- 1 ] if not url .endswith ('/' ) else ""
127
+
130
128
print (f"endpoint_url={ endpoint_url } ..." )
129
+ if debug :
130
+ print (f"access_key_id={ access_key_id } " )
131
+ print (f"secret_access_key={ secret_access_key } " )
132
+ print (f"bucket_name={ bucket_name } " )
133
+ print (f"key_prefix={ key_prefix } " )
134
+ print (f"base_file={ base_file } " )
135
+
131
136
s3 = boto3 .resource ('s3' ,
132
137
endpoint_url = endpoint_url ,
133
138
aws_access_key_id = access_key_id ,
@@ -158,7 +163,7 @@ def download_from_s3(url, destination_dir, access_key_id = None,
158
163
destination_file = os .path .join (destination_dir , base_file )
159
164
if not os .path .exists (destination_file ):
160
165
print (f"Downloading { base_file } ..." )
161
- bucket .download_file (f'{ key_prefix } /{ base_file } ' , destination_file )
166
+ bucket .download_file (f'/ { key_prefix } /{ base_file } ' , destination_file )
162
167
else :
163
168
print (f"File already exists, skipping { base_file } " )
164
169
@@ -210,7 +215,7 @@ def download_from_url(url, destination_dir):
210
215
211
216
# Perepare data will clone the git repository given by data_source into the
212
217
# destination_dir.
213
- def prepare_data (data_source , destination_dir , access_key_id = None , secret_access_key = None ):
218
+ def prepare_data (data_source , destination_dir , access_key_id = None , secret_access_key = None , debug = False ):
214
219
215
220
# Check that destination_dir is a directory. If it does not exist, then
216
221
# create it.
@@ -229,7 +234,8 @@ def prepare_data(data_source, destination_dir, access_key_id=None, secret_access
229
234
clone_git_repo (data_source , destination_dir )
230
235
elif is_s3_url (data_source ):
231
236
# Handle the case where the data source is an S3 URL
232
- download_from_s3 (data_source , destination_dir , access_key_id , secret_access_key )
237
+ download_from_s3 (url = data_source , destination_dir = destination_dir , access_key_id = access_key_id ,
238
+ secret_access_key = secret_access_key , debug = debug )
233
239
elif data_source .startswith ('http://' ) or data_source .startswith ('https://' ):
234
240
# Handle the case where the data source is a URL
235
241
download_from_url (data_source , destination_dir )
@@ -250,9 +256,10 @@ def main():
250
256
parser .add_argument ("-d" , "--dest" , required = True , help = "Destination directory to clone the repository and extract files" )
251
257
parser .add_argument ("-a" , "--access-key-id" , required = False , help = "AWS access key ID" )
252
258
parser .add_argument ("-k" , "--secret-access-key" , required = False , help = "AWS secret access key" )
259
+ parser .add_argument ("--debug" , action = 'store_true' , help = "Enable debug mode" )
253
260
254
261
args = parser .parse_args ()
255
- prepare_data (args .data_source , args .dest , args .access_key_id , args .secret_access_key )
262
+ prepare_data (args .data_source , args .dest , args .access_key_id , args .secret_access_key , args . debug )
256
263
257
264
258
265
if __name__ == "__main__" :
0 commit comments