-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNewsSummaryNLP-Update.py
167 lines (126 loc) · 4.89 KB
/
NewsSummaryNLP-Update.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import tkinter as tk
import nltk
from textblob import TextBlob
from newspaper import Article
from transformers import BertTokenizer, BertModel
import torch
# Function to Summarize
def summarize():
url = utext.get("1.0", "end").strip()
# Summarization
nltk.download('punkt') # Must uncomment and download first. Then not needed after first downloaded
article = Article(url)
article.download()
article.parse()
article.nlp()
title.config(state="normal")
author.config(state="normal")
publication.config(state="normal")
summary.config(state="normal")
sentiment.config(state="normal")
keywords.config(state="normal")
title.delete("1.0", "end")
title.insert("1.0", article.title)
author.delete("1.0", "end")
author.insert("1.0", article.authors)
publication.delete("1.0", "end")
publication.insert("1.0", article.publish_date)
summary.delete("1.0", "end")
summary.insert("1.0", article.summary)
# Sentiment - Polarity of Article
analysis = TextBlob(article.text)
sentiment.delete("1.0", "end")
sentiment.insert("1.0",
f'Polarity: {analysis.polarity}, Sentiment: {"positive" if analysis.polarity > 0 else "negative" if analysis.polarity < 0 else "neutral"}')
# Keyword Extraction using BERT embeddings
keywords_list = extract_keywords_bert(article.text)
keywords.delete("1.0", "end")
keywords.insert("1.0", "\n".join(keywords_list))
title.config(state="disable")
author.config(state="disable")
publication.config(state="disable")
summary.config(state="disable")
sentiment.config(state="disable")
keywords.config(state="disable")
# Function to Refresh
def refresh():
title.config(state="normal")
author.config(state="normal")
publication.config(state="normal")
summary.config(state="normal")
sentiment.config(state="normal")
keywords.config(state="normal")
title.delete("1.0", "end")
author.delete("1.0", "end")
publication.delete("1.0", "end")
summary.delete("1.0", "end")
sentiment.delete("1.0", "end")
keywords.delete("1.0", "end")
utext.delete("1.0", "end")
title.config(state="disable")
author.config(state="disable")
publication.config(state="disable")
summary.config(state="disable")
sentiment.config(state="disable")
keywords.config(state="disable")
# Function to extract keywords using BERT embeddings
def extract_keywords_bert(text):
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')
# Tokenize and get embeddings
tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(text)))
inputs = tokenizer(text, return_tensors="pt", add_special_tokens=True)
outputs = model(**inputs)
# Extract embeddings for each token
embeddings = outputs.last_hidden_state
# Average the embeddings to get sentence embeddings
sentence_embeddings = torch.mean(embeddings, dim=1).squeeze().detach().numpy()
# Identify important tokens by sorting them based on their attention score
attention_scores = torch.sum(outputs.attentions[-1], dim=1).squeeze().detach().numpy()
sorted_indices = attention_scores.argsort()[-5:][::-1]
# Get corresponding words from tokens
keywords_list = [tokens[i] for i in sorted_indices]
return keywords_list
# GUI - Graphical User Interface
root = tk.Tk()
root.title("News Summarizer")
root.geometry("1200x600")
tlabel = tk.Label(root, text="Title")
tlabel.pack()
title = tk.Text(root, height=1, width=140)
title.config(state="disabled", bg="#dddddd")
title.pack()
alabel = tk.Label(root, text="Author")
alabel.pack()
author = tk.Text(root, height=1, width=140)
author.config(state="disabled", bg="#dddddd")
author.pack()
plabel = tk.Label(root, text="Publication Date")
plabel.pack()
publication = tk.Text(root, height=1, width=140)
publication.config(state="disabled", bg="#dddddd")
publication.pack()
slabel = tk.Label(root, text="Summary")
slabel.pack()
summary = tk.Text(root, height=20, width=140)
summary.config(state="disabled", bg="#dddddd")
summary.pack()
selabel = tk.Label(root, text="Sentiment Analysis")
selabel.pack()
sentiment = tk.Text(root, height=1, width=140)
sentiment.config(state="disabled", bg="#dddddd")
sentiment.pack()
klabel = tk.Label(root, text="Keywords")
klabel.pack()
keywords = tk.Text(root, height=5, width=140)
keywords.config(state="disabled", bg="#dddddd")
keywords.pack()
ulabel = tk.Label(root, text="URL")
ulabel.pack()
utext = tk.Text(root, height=1, width=140)
utext.pack()
btn_summarize = tk.Button(root, text="Summarize", command=summarize)
btn_summarize.pack(side="left")
btn_refresh = tk.Button(root, text="Refresh", command=refresh)
btn_refresh.pack(side="left")
root.mainloop()