-
Notifications
You must be signed in to change notification settings - Fork 0
/
decodedFile.py
89 lines (73 loc) · 3.52 KB
/
decodedFile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
from tensorflow.keras.models import load_model,Model
from keras.layers import Input
import pickle as pkl
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
print(os.getcwd())
model_loaded = load_model('Model.h5')
latent_dim = 50
#inference encoder
encoder_inputs_inf = model_loaded.input[0] #Trained encoder input layer
encoder_outputs_inf, inf_state_h, inf_state_c = model_loaded.layers[4].output # retoring the encoder lstm output and states
encoder_inf_states = [inf_state_h,inf_state_c]
encoder_model = Model(encoder_inputs_inf,encoder_inf_states)
#inference decoder
# The following tensor will store the state of the previous timestep in the "starting the encoder final time step"
decoder_state_h_input = Input(shape=(latent_dim,)) #becase during training we have set the lstm unit to be of 50
decoder_state_c_input = Input(shape=(latent_dim,))
decoder_state_input = [decoder_state_h_input,decoder_state_c_input]
# # inference decoder input
decoder_input_inf = model_loaded.input[1] #Trained decoder input layer
# decoder_input_inf._name='decoder_input'
decoder_emb_inf = model_loaded.layers[3](decoder_input_inf)
decoder_lstm_inf = model_loaded.layers[5]
decoder_output_inf, decoder_state_h_inf, decoder_state_c_inf = decoder_lstm_inf(decoder_emb_inf, initial_state =decoder_state_input)
decoder_state_inf = [decoder_state_h_inf,decoder_state_c_inf]
#inference dense layer
dense_inf = model_loaded.layers[6]
decoder_output_final = dense_inf(decoder_output_inf)# A dense softmax layer to generate prob dist. over the target vocabulary
decoder_model = Model([decoder_input_inf]+decoder_state_input,[decoder_output_final]+decoder_state_inf)
with open('tokenizer_input.pkl','rb') as f:
tokenizer_input = pkl.load(f)
with open('tokenizer_target.pkl','rb') as f:
tokenizer_target = pkl.load(f)
# Creating the reverse mapping to get the word from the index in the sequence
reverse_word_map_input = dict(map(reversed, tokenizer_input.word_index.items()))
reverse_word_map_target = dict(map(reversed, tokenizer_target.word_index.items()))
# Code to predct the input sentences translation
def decode_seq(input_seq):
state_values_encoder = encoder_model.predict(input_seq)
target_seq = np.zeros((1,1))
target_seq[0, 0] = tokenizer_target.word_index['start']
stop_condition = False
decoder_sentance = ''
# print("Beforee the while loop")
while not stop_condition:
sample_word , decoder_h,decoder_c= decoder_model.predict([target_seq] + state_values_encoder)
# print("sample_word: =>",sample_word)
sample_word_index = np.argmax(sample_word[0,-1,:])
# print("sample_word_index: ",sample_word_index)
decoder_word = reverse_word_map_target[sample_word_index]
decoder_sentance += ' '+ decoder_word
if (decoder_word == 'end' or
len(decoder_sentance) > 80):
stop_condition = True
target_seq[0, 0] = sample_word_index
state_values_encoder = [decoder_h,decoder_c]
return decoder_sentance
# sentence = 'नाव'
# input_seq = tokenizer_input.texts_to_sequences([sentence])
# pad_sequence = pad_sequences(input_seq, maxlen= 30, padding='post')
# prediction = decode_seq(input_seq)
# print("sentance: ",sentence)
# print("predicted Translate:",prediction)
def EncodeAndDecode(text):
x = tokenizer_input.texts_to_sequences([text])
pad_sequence = pad_sequences(x, maxlen= 30, padding='post')
y = decode_seq(pad_sequence)
y = y.rstrip('end')
return y
# list1 = [ 'पळा', 'कोण', 'वाह']
# for text in list1:
# print(EncodeAndDecode(text))