-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathload.py
56 lines (42 loc) · 1.78 KB
/
load.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#2023/03/23
'''
This file reads the username of MongoDB in EC2 of the command line input, and store the JSON data
"La_Veranda_Reviews.json" into the MongoDB database called "La_Veranda" as the Collection "reviews"
Example: python3 load.py La_Veranda_Reviews.json mongodb://localhost:27017/
'''
'''How to know the port:
1. sudo service mongod start --- start your service
2. sudo service mongod status --- check your status if you start yout service successfully
3. cat /etc/mongod.conf --- you will see mongoDB config including your port
'''
#Please run in EC2 instance with proper MongoDB installed and identified MongoDB username plus (optional) password
import sys
import json
from pymongo import MongoClient #if not installed, try $ pip install pymongo
import ijson
if len(sys.argv) !=3:
raise ValueError("Command Incorrect!")
mongoHost = sys.argv[2]
mongoDB = "La_Veranda"
mongoCollection = "reviews"
if len(sys.argv) !=3:
raise ValueError("Command Incorrect!")
json_file = sys.argv[1]
mongoHost = sys.argv[2]
mongoDB = "La_Veranda"
mongoCollection = "reviews"
print("Input Json data:",json_file)
print("Input MongoDB Host:", mongoHost)
client = MongoClient(mongoHost)
db = client[mongoDB]
collection = db[mongoCollection]
collection.drop() #ensure, if it already exist, drop the current one and perform loading data
with open (json_file,'r') as file:
raw_data = file.read()
# the pre-processed data has no line splitter. Add the \n split for non-bug structure of json.load
formatted_data = raw_data.replace("}{", "}\n{")
json_data = [json.loads(line) for line in formatted_data.split('\n')]
result = collection.insert_many(json_data)
print(f"Inserted {json_file} into MongoDB database '{mongoDB}' in table '{mongoCollection}'")
client.close()
print('Client Closed. Execution Completed.')