Skip to content

Commit c3928a1

Browse files
committed
first push
1 parent 7b65b06 commit c3928a1

File tree

11 files changed

+2910
-0
lines changed

11 files changed

+2910
-0
lines changed

README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,17 @@
11
# QnA-Bot
22
QnA bot powered by CoQA + BERT
3+
4+
## Installation
5+
6+
pip3 install torch
7+
pip3 install flask
8+
pip3 install git+https://github.com/huggingface/pytorch-pretrained-BERT.git
9+
10+
Download pretrained model from : https://drive.google.com/file/d/15HOJmRizBrgoPPVDHKpvSO2tNf0k-d8f/view?usp=sharing
11+
12+
python3 server.py
13+
14+
15+
## Running
16+
17+
![alt text](https://raw.githubusercontent.com/username/projectname/branch/path/to/img.png)

infer.py

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
import argparse
2+
import collections
3+
import json
4+
import logging
5+
import math
6+
import os
7+
import random
8+
import sys
9+
from io import open
10+
import time
11+
12+
import numpy as np
13+
import torch
14+
from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,TensorDataset)
15+
from torch.utils.data.distributed import DistributedSampler
16+
from tqdm import tqdm, trange
17+
18+
from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE
19+
from modeling import BertForQuestionAnswering, BertConfig
20+
from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear
21+
from pytorch_pretrained_bert.tokenization import (BasicTokenizer,BertTokenizer,whitespace_tokenize)
22+
23+
if sys.version_info[0] == 2:
24+
import cPickle as pickle
25+
else:
26+
import pickle
27+
28+
from infer_utils import *
29+
import spacy
30+
nlp = spacy.load('en_core_web_md')
31+
32+
def is_whitespace(c):
33+
if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
34+
return True
35+
return False
36+
37+
def is_punc(c):
38+
if c in '?,.!()[]-_\'"':
39+
return True
40+
return False
41+
42+
def punc_sep(s):
43+
tokens = []
44+
is_prev_white = True
45+
for c in s:
46+
if is_whitespace(c):
47+
is_prev_white = True
48+
else:
49+
if is_punc(c):
50+
tokens.append(c)
51+
is_prev_white = True
52+
else:
53+
if is_prev_white:
54+
is_prev_white = False
55+
tokens.append(c)
56+
else:
57+
tokens[-1]+=c
58+
return ' '.join(tokens)
59+
60+
def str_to_coqa_example(contenxt, question, prev_ques, prev_answ):
61+
paragraph_text = contenxt
62+
doc_tokens = []
63+
char_to_word_offset = []
64+
prev_is_whitespace = True
65+
for c in paragraph_text:
66+
if is_whitespace(c):
67+
prev_is_whitespace = True
68+
else:
69+
if prev_is_whitespace:
70+
doc_tokens.append(c)
71+
prev_is_whitespace = False
72+
else:
73+
doc_tokens[-1] += c
74+
75+
char_to_word_offset.append(len(doc_tokens) - 1)
76+
77+
question_text = question
78+
79+
example = CoQAExample(
80+
qas_id='random',
81+
question_text=question_text,
82+
doc_tokens=doc_tokens,
83+
orig_answer_text="",
84+
start_position=0,
85+
end_position=0,
86+
is_impossible=False,
87+
is_yes= False,
88+
is_no=False,
89+
answer_span="",
90+
prev_ques=prev_ques,
91+
prev_answ=prev_answ)
92+
return example
93+
94+
class InferCoQA():
95+
def __init__(self, model_path, lower_case = True):
96+
self.model_path = model_path
97+
self.tokenizer = BertTokenizer.from_pretrained(model_path, do_lower_case=lower_case)
98+
self.model = BertForQuestionAnswering.from_pretrained(model_path)
99+
self.model.cuda()
100+
self.model.eval()
101+
102+
def predict(self, contenxt, question, prev_ques, prev_answ):
103+
t = time.time()
104+
coqa_example = str_to_coqa_example(contenxt, question, prev_ques, prev_answ)
105+
coqa_features = convert_examples_to_features([coqa_example], self.tokenizer, max_seq_length=512,doc_stride=128, max_query_length=100, is_training=False)
106+
107+
all_input_ids = torch.tensor([f.input_ids for f in coqa_features], dtype=torch.long)
108+
all_input_mask = torch.tensor([f.input_mask for f in coqa_features], dtype=torch.long)
109+
all_segment_ids = torch.tensor([f.segment_ids for f in coqa_features], dtype=torch.long)
110+
all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long)
111+
coqa_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index)
112+
113+
coqa_sampler = SequentialSampler(coqa_data)
114+
coqa_dataloader = DataLoader(coqa_data, sampler=coqa_sampler, batch_size=1)
115+
all_results = []
116+
for input_ids, input_mask, segment_ids, example_indices in coqa_dataloader:
117+
input_ids = input_ids.cuda()
118+
input_mask = input_mask.cuda()
119+
segment_ids = segment_ids.cuda()
120+
121+
122+
with torch.no_grad():
123+
score = self.model(input_ids, segment_ids, input_mask)
124+
125+
coqa_feature = coqa_features[example_indices[0].item()]
126+
unique_id = int(coqa_feature.unique_id)
127+
all_results.append(RawResult(unique_id=unique_id,score=score[0].cpu(),length=input_ids.size(1)))
128+
129+
output_prediction_file = "predictions.json"
130+
output_nbest_file = "nbest_predictions.json"
131+
output_null_log_odds_file = "null_odds.json"
132+
write_predictions([coqa_example], coqa_features, all_results,
133+
1, 100,
134+
True, output_prediction_file,
135+
output_nbest_file, output_null_log_odds_file, False,
136+
False, 0.0)
137+
os.remove(output_nbest_file)
138+
res = json.loads(open(output_prediction_file).read())['random']
139+
os.remove(output_prediction_file)
140+
print('inference time :',time.time() - t )
141+
return res
142+
143+
# iq = InferCoQA('coqa_ynu_history_1')
144+
# print('done loading model ..')
145+
# context = input("Context : ")
146+
147+
148+
# prev_q = ""
149+
# prev_a = ""
150+
# while True:
151+
# q = input("Question : ")
152+
# a = iq.predict(context,q,prev_q,prev_a)
153+
# print("Answer :",a)
154+
# prev_q = q
155+
# prev_a = a
156+
157+

0 commit comments

Comments
 (0)