Create 6.py

fatalcoder524 · web-flow · commit cf8ef9762775 · 2019-11-20T09:16:34.000+05:30
Assuming a set of documents that need to be classified, use the naïve Bayesian Classifier model to perform this task. Built-in Java classes/API can be used to write the program. Calculate the accuracy, precision, and recall for your data set.
diff --git a/6.py b/6.py
@@ -0,0 +1,26 @@
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.naive_bayes import MultinomialNB
+from sklearn import metrics
+msg=pd.read_csv('naivetext.csv',names=['message','label'])
+print('The dimensions of the dataset',msg.shape)
+msg['labelnum']=msg.label.map({'pos':1,'neg':0})
+X=msg.message
+y=msg.labelnum
+xtrain,xtest,ytrain,ytest=train_test_split(X,y)
+print ('\n the total number of Training Data :',ytrain.shape)
+print ('\n the total number of Test Data :',ytest.shape)
+cv = CountVectorizer()
+xtrain_dtm = cv.fit_transform(xtrain)
+xtest_dtm=cv.transform(xtest)
+print('\n The words or Tokens in the text documents \n')
+print(cv.get_feature_names())
+df=pd.DataFrame(xtrain_dtm.toarray(),columns=cv.get_feature_names())
+clf = MultinomialNB().fit(xtrain_dtm,ytrain)
+predicted = clf.predict(xtest_dtm)
+print('\n Accuracy of the classifier is',metrics.accuracy_score(ytest,predicted))
+print('\n Confusion matrix')
+print(metrics.confusion_matrix(ytest,predicted))
+print('\n The value of Precision', metrics.precision_score(ytest,predicted))
+print('\n The value of Recall', metrics.recall_score(ytest,predicted))