Skip to content

Commit

Permalink
closed #109
Browse files Browse the repository at this point in the history
  • Loading branch information
Samyu Comandur committed Apr 9, 2020
1 parent 457d116 commit 47ff00c
Showing 1 changed file with 35 additions and 33 deletions.
68 changes: 35 additions & 33 deletions Main/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ def button(request):
return render(request, 'home.html')

# runs tf-idf algorithm, returns ranked list
def tfidf(txt, sw):
def tfidf(txt, present_txt, sw):
print(present_txt)
tokens = []
s = ''
for elem in txt:
Expand Down Expand Up @@ -76,7 +77,7 @@ def tfidf(txt, sw):

txt_hl = ''
outputstring = ''
for para in txt:
for para in present_txt:
for word in para.split():
if word.startswith('<strong>'):
word = '<br><br>' + word
Expand All @@ -92,6 +93,7 @@ def tfidf(txt, sw):
word = '<span style="background-color:' + colors[top15_freqs_sort.index(freq)] + '">' + word + '</span>'
txt_hl += word + ' '
outputstring += "<table style='padding:15px;margin-left:auto;margin-right:auto;'>"
txt_hl += '<br>'

top15 = ranking[['feat','rank']][0:15]
for i in range(len(top15)):
Expand All @@ -102,10 +104,10 @@ def tfidf(txt, sw):
outputstring += '<tr> <td style="background-color:' + colors[top15_freqs_sort.index(top15.iloc[i,1])] + '">' + top15.iloc[i,0] + '</td>'
outputstring += '<td style="background-color:' + colors[top15_freqs_sort.index(top15.iloc[i,1])] + '">' +str(round(top15.iloc[i,1],4)) + '</td></tr>'
outputstring += "</table>"

return ranking[['feat','rank']][0:15], outputstring, txt_hl

def lda(txt, sw, noOfTopics):
def lda(txt, present_txt, sw, noOfTopics):
outputstring = ""
documents = []
ignoreList = []
Expand Down Expand Up @@ -176,7 +178,7 @@ def lda(txt, sw, noOfTopics):
outputstring += "</table>"

txt_hl = ''
for para in txt:
for para in present_txt:
for word in para.split():
if word.startswith('<strong>'):
word = '<br><br>' + word
Expand All @@ -189,7 +191,7 @@ def lda(txt, sw, noOfTopics):
txt_hl += word + ' '
return outputstring, file_string, txt_hl

def pos(txt, sw):
def pos(txt, present_txt, sw):
cnt = 1
outputstring = ""
file_string = ''
Expand Down Expand Up @@ -258,7 +260,8 @@ def pos(txt, sw):
output_string += "<span style=color:white;background-color:" + colors[2] + ">adjectives</span>, and "
output_string += "<span style=color:white;background-color:" + colors[3] + ">adverbs</span>, respectively."
output_string += "<table style='margin-left:auto;margin-right:auto;'>"
txt_hl = doc
txt_hl = present_txt

for i in tokenized:
wordsList = nltk.word_tokenize(i)
wordsList = [w for w in wordsList if not w in stop_words]
Expand Down Expand Up @@ -342,7 +345,7 @@ def result(request):
print("Error while deleting file")
if algorithm == 'tfidf':
try:
textout, newtext = tfidfprocess(txt, sw)
textout, newtext = tfidfprocess(txt, txt, sw)
except ValueError:
context = {
'output_error_text': "<br><br>The text you input likely contains only stopwords. Try again.",
Expand All @@ -356,7 +359,7 @@ def result(request):
}
return render(request, 'result.html', context = context)
if algorithm == 'pos':
outputstring, file_string, textout = posprocess(txt, sw)
outputstring, file_string, textout = posprocess(txt, txt, sw)
#change outputstring to formatted with txt file
file1 = open(filename,"w+")
file1.write(file_string)
Expand All @@ -372,7 +375,7 @@ def result(request):
return render(request, 'result.html', context= context)
if algorithm == 'lda':
try:
outputstring, file_string, textout = ldaprocess(txt, sw, num_of_topics)
outputstring, file_string, textout = ldaprocess(txt, txt, sw, num_of_topics)
except ValueError:
context = {
'output_error_text': "<br><br>The text you input does not contain enough unique terms for LDA!",
Expand Down Expand Up @@ -537,7 +540,7 @@ def analyze_doc_tfidf(request, document_id):
return render(request, 'result.html', context = context)
sw = request.POST.get('sws')
try:
textout, newtext = tfidfprocess(txt, sw)
textout, newtext = tfidfprocess(txt, txt, sw)
except ValueError:
context = {
'output_error_text': "<br><br>The text you input likely contains only stopwords. Try again.",
Expand Down Expand Up @@ -569,7 +572,7 @@ def analyze_doc_pos(request, document_id):
}
return render(request, 'result.html', context = context)
sw = request.POST.get('sws')
outputstring, file_string, textout = posprocess(txt, sw)
outputstring, file_string, textout = posprocess(txt, txt, sw)
#change outputstring to formatted with txt file
file1 = open(filename,"w+")
file1.write(file_string)
Expand Down Expand Up @@ -604,7 +607,7 @@ def analyze_doc_lda(request, document_id):
return render(request, 'result.html', context = context)
sw = request.POST.get('sws')
try:
outputstring, file_string, textout = ldaprocess(txt, sw, num_of_topics)
outputstring, file_string, textout = ldaprocess(txt, txt, sw, num_of_topics)
except ValueError:
context = {
'output_error_text': "<br><br>The text you input does not contain enough unique terms for LDA!",
Expand Down Expand Up @@ -704,7 +707,7 @@ def multi_tfidf(request, project_id):
i = i + 1
text = doc.text
present_text = present_text + "<strong>Document " + str(i) + "</strong>\r\n" + text + "\r\n"
text = text.replace("\r\n", "")
#text = text.replace("\r\n", "")
entire_text = entire_text + text + "\r\n"
check_txt = entire_text.replace(' ', '')
if check_txt == '':
Expand All @@ -714,8 +717,7 @@ def multi_tfidf(request, project_id):
return render(request, 'result.html', context = context)
sw = request.POST.get('sws')
try:
newtext = tfidfprocess(entire_text, sw)[1]
textout = tfidfprocess(entire_text, sw)[0]
textout, newtext = tfidfprocess(entire_text, present_text, sw)
except ValueError:
context = {
'output_error_text': "<br><br>The text you input likely contains only stopwords. Try again.",
Expand All @@ -728,8 +730,8 @@ def multi_tfidf(request, project_id):
except:
print('file not found exception')
'''
txt = clean_up(present_text)
present_text = '<br><br>'.join(txt)
#txt = clean_up(present_text)
#present_text = '<br><br>'.join(txt)
context = {
'text': textout,
'newtext': newtext,
Expand Down Expand Up @@ -757,24 +759,22 @@ def multi_pos(request, project_id):
for doc in docs:
i = i + 1
text = doc.text
#present_text += "<br><br> <strong>Document " + str(i) + "</strong> <br> "
#out = posprocess(text, sw)[1]
#present_text += out
text = text.replace("\r\n", "")
present_text += "<br><br><strong>Document " + str(i) + "</strong><br> "
present_text += text
#text = text.replace("\r\n", "")
entire_text = entire_text + text + "\r\n"
check_txt = entire_text.replace(' ', '')
if check_txt == '':
context = {
'output_error_text': "<br>The document is empty!<br><br>"
}
return render(request, 'result.html', context = context)
outputstring, file_string, textout = posprocess(entire_text, sw)
outputstring, file_string, textout = posprocess(entire_text, present_text, sw)
#change outputstring to formatted with txt file
file1 = open(filename,"w+")
file1.write(file_string)
file1.close()
freq_display_str = outputstring.replace("\n", "<br>")
#textout = present_text
context = {
'text': textout,
'outputstring': outputstring,
Expand Down Expand Up @@ -814,7 +814,7 @@ def multi_lda(request, project_id):
sw = request.POST.get('sws')
num_of_topics = request.POST.get('numoftopics')
try:
outputstring, file_string, textout = ldaprocess(entire_text, sw, num_of_topics)
outputstring, file_string, textout = ldaprocess(entire_text, present_text, sw, num_of_topics)
except ValueError:
context = {
'output_error_text': "<br><br>The text you input does not contain enough unique terms for LDA!",
Expand Down Expand Up @@ -853,25 +853,27 @@ def make_sw_list(sw):
stopwords = text.ENGLISH_STOP_WORDS.union(user_stopwords)
return stopwords

def tfidfprocess(txt, sw):
def tfidfprocess(txt, present_txt, sw):
txt = clean_up(txt)
present_txt = clean_up(present_txt)
sws = make_sw_list(sw)
filename = 'output-' + str(date.today()) + '.txt'
tfidf(txt, sws)[0].to_csv(filename, header=None, index=None, sep=' ', mode='w')
newtext = tfidf(txt, sws)[1]
tfidf(txt, present_txt, sws)[0].to_csv(filename, header=None, index=None, sep=' ', mode='w')
newtext = tfidf(txt, present_txt, sws)[1]
#textout = '<br>'.join(txt)
textout = tfidf(txt, sws)[2]
textout = tfidf(txt, present_txt, sws)[2]
return textout, newtext

#needs work
def posprocess(txt, sw):
def posprocess(txt, present_txt, sw):
txt = clean_up(txt)
outputstring, file_string, textout = pos(txt, sw)
outputstring, file_string, textout = pos(txt, present_txt, sw)
return outputstring, file_string, textout

def ldaprocess(txt, sw, numberoftopics):
def ldaprocess(txt, present_txt, sw, numberoftopics):
txt = clean_up(txt)
outputstring, file_string, newtext = lda(txt, sw, numberoftopics)
present_txt = clean_up(present_txt)
outputstring, file_string, newtext = lda(txt, present_txt, sw, numberoftopics)
return outputstring, file_string, newtext

#TODO (Ainsley):
Expand Down

0 comments on commit 47ff00c

Please sign in to comment.