-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWord_Cloud.py
42 lines (39 loc) · 1.69 KB
/
Word_Cloud.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# -*- coding: utf-8 -*-
"""
@author: Vinoj
"""
import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
print('Start!')
# Load the data from the CSV file
df = pd.read_csv('C:/Users/Vinoj/OneDrive/Desktop/Ashoka_PEDP/Project 1/KCC12-22_updated.csv')
print('read')
# Define the 'QueryType' values you're interested in
query_types = ['Nutrient Management', 'Fertilizer Use and Availability'] # Replace this with your actual QueryType values
print('filter loaded')
# List of words to be removed
remove_words = ['NUTRIENT MANAGEMENT', 'nutrient management', 'Information regarding', 'information regarding', 'fertilizer management', 'FARMER ASKED', 'Farmer want', 'know', 'crop', 'asked','weather', 'FARMER ASKED', 'Farmer want', 'Information control' 'INFORMATION REGARDING']
#print('word filter loaded')
# Filter the DataFrame for the specific 'QueryType' values
df_filtered = df[df['QueryType'].isin(query_types)]
print('filter applied')
# Replace NaN values with an empty string
df_filtered.loc[:, 'QueryText'] = df_filtered['QueryText'].fillna('')
print('NaN filled')
# Convert 'QueryText' values to string and join
text = ' '.join(df_filtered['QueryText'].astype(str))
print('text ready')
# Remove the specified words
for word in remove_words:
text = text.replace(' ' + word + ' ', ' ')
print('word filtered')
# Create a WordCloud object
wordcloud = WordCloud(width = 1000, height = 500).generate(text)
print('WordCloud Ready')
# Display the generated image
plt.figure(figsize=(15,8))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.savefig('C:/Users/Vinoj/OneDrive/Desktop/Ashoka_PEDP/Project 1/Nutrient_wordcloud.png')
print('Done')