-
Notifications
You must be signed in to change notification settings - Fork 0
/
ML_Proj
33 lines (32 loc) · 1.45 KB
/
ML_Proj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def predict(test_data):
Test_df = pd.read_csv(test_data,header = 0, delimiter = ',', encoding="iso-8859-1")
X_test = Test_df.text
y_test = Test_df.IndustryCategory
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_test)
X_test = tokenizer.texts_to_sequences(X_test)
maxlen = 100
X_test = pad_sequences(X_test, padding='post', maxlen=maxlen)
encoder = LabelEncoder()
encoder.fit(y_test)
y_test = encoder.transform(y_test)
num_classes = np.max(y_test) + 1
y_test = utils.to_categorical(y_test, num_classes)
loaded_model=load_model('model_num.hdf5')
loaded_model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
loss, Test_accuracy = loaded_model.evaluate(X_test, y_test, verbose=False)
prediction_new=loaded_model.predict_classes(X_test)
prediction_class=encoder.inverse_transform(prediction_new)
df_pre = pd.DataFrame({'prediction':prediction_class})
Test_df=Test_df.reset_index(drop=True)
df_out = pd.concat([Test_df,df_pre],axis=1)
df_outt=df_out.drop(columns=['text'])
pred=[]
pred.append(loaded_model.predict_proba(X_test))
precision=[]
for i in pred:
for x, y, z,n in i:
precision.append(round(max(x, y,z,n),2))
df_pre = pd.DataFrame({'precision':precision})
df_final=pd.concat([df_outt,df_pre],axis=1)
return (df_final.to_csv("category_prediction.csv", sep=',', encoding='iso-8859-1',index = False))