3
3
import logging
4
4
import re
5
5
6
- import pymongo
7
6
import requests
8
7
from gnews .utils .constants import AVAILABLE_COUNTRIES , AVAILABLE_LANGUAGES , GOOGLE_NEWS_REGEX
9
- from pymongo import MongoClient
10
8
11
9
12
10
def lang_mapping (lang ):
@@ -17,55 +15,6 @@ def country_mapping(country):
17
15
return AVAILABLE_COUNTRIES .get (country )
18
16
19
17
20
- def connect_database (db_user , db_pw , db_name , collection_name ):
21
- """Mongo DB Establish Cluster Connection"""
22
-
23
- # .env file Structure:
24
-
25
- # DB_USER="..."
26
- # DB_PW="..."
27
- # DB_NAME="..."
28
- # COLLECTION_NAME="..."
29
-
30
- # name of the mongodb cluster as well as the database name should be "gnews"
31
-
32
- try :
33
- cluster = MongoClient (
34
- "mongodb+srv://" +
35
- db_user +
36
- ":" +
37
- db_pw +
38
- "@gnews.stjap.mongodb.net/" +
39
- db_name +
40
- "?retryWrites=true&w=majority"
41
- )
42
-
43
- db = cluster [db_name ]
44
- collection = db [collection_name ]
45
-
46
- return collection
47
-
48
- except Exception as e :
49
- print ("Connection Error." , e )
50
-
51
-
52
- def post_database (collection , news ):
53
- """post unique news articles to mongodb database"""
54
- doc = {
55
- "_id" : hashlib .sha256 (str (json .dumps (news )).encode ('utf-8' )).hexdigest (),
56
- "title" : news ['title' ],
57
- "description" : news ['description' ],
58
- "published_date" : news ['published date' ],
59
- "url" : news ['url' ],
60
- "publisher" : news ['publisher' ]
61
- }
62
-
63
- try :
64
- collection .update_one (doc , {'$set' : doc }, upsert = True )
65
- except pymongo .errors .DuplicateKeyError :
66
- logging .error ("Posting to database failed." )
67
-
68
-
69
18
def process_url (item , exclude_websites ):
70
19
source = item .get ('source' ).get ('href' )
71
20
if not all ([not re .match (website , source ) for website in
0 commit comments