Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit a4b296e

Browse files
committedSep 1, 2023
better output messages.. this should really use logging
1 parent fa75bab commit a4b296e

File tree

1 file changed

+94
-9
lines changed

1 file changed

+94
-9
lines changed
 

‎client.py

Lines changed: 94 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,31 @@
11
#!/usr/bin/env python3
22
# github.com/deadbits/vector-embedding-api
33
# client.py
4+
import os
5+
import sys
46
import json
57
import argparse
68
import requests
9+
from pydantic import BaseModel
10+
from typing import List, Optional
11+
from datetime import datetime
712

813

9-
def send_request(text, model_type='local'):
14+
def timestamp_str():
15+
return datetime.isoformat(datetime.utcnow())
16+
17+
18+
class Embedding(BaseModel):
19+
text: str = ''
20+
embedding: List[float] = []
21+
metadata: Optional[dict] = {}
22+
23+
24+
def send_request(text_batch, model_type='local'):
1025
url = 'http://127.0.0.1:5000/submit'
1126
headers = {'Content-Type': 'application/json'}
1227
payload = {
13-
'text': text,
28+
'text': text_batch,
1429
'model': model_type
1530
}
1631

@@ -20,14 +35,33 @@ def send_request(text, model_type='local'):
2035
headers=headers,
2136
data=json.dumps(payload)
2237
)
23-
38+
2439
response.raise_for_status()
2540
return response.json()
2641
except requests.RequestException as err:
2742
print(f'[error] exception sending http request: {err}')
2843
return None
2944

3045

46+
def process_batch(text_batch, model_type, embeddings_list, chunk_num, total_chunks):
47+
print(f'[status] {timestamp_str()} - Processing chunk {chunk_num} of {total_chunks}')
48+
result = send_request(text_batch, model_type)
49+
if result:
50+
if result[0]['status'] == 'error':
51+
print(f'[error] {timestamp_str()} - Received error: {result[0]["message"]}')
52+
return
53+
else:
54+
print(f'[status] {timestamp_str()} - Received embeddings: {len(result[0]["embeddings"])} ')
55+
for text, em in zip(text_batch, result[0]['embeddings']):
56+
metadata = {
57+
'status': result[0]['status'],
58+
'elapsed': result[0]['elapsed'],
59+
'model': result[0]['model']
60+
}
61+
embedding = Embedding(text=text, embedding=em, metadata=metadata)
62+
embeddings_list.append(embedding.dict())
63+
64+
3165
if __name__ == '__main__':
3266
parser = argparse.ArgumentParser()
3367

@@ -40,7 +74,7 @@ def send_request(text, model_type='local'):
4074
group.add_argument(
4175
'-f', '--file',
4276
type=argparse.FileType('r'),
43-
help='text file to embed'
77+
help='text file to embed (one text per line)'
4478
)
4579

4680
parser.add_argument(
@@ -50,14 +84,65 @@ def send_request(text, model_type='local'):
5084
default='local'
5185
)
5286

87+
parser.add_argument(
88+
'-o', '--output',
89+
help='output file',
90+
default='embeddings.json'
91+
)
92+
5393
args = parser.parse_args()
94+
model_type = args.model
95+
output_file = args.output
96+
embeddings_list = []
97+
98+
if os.path.exists(output_file):
99+
print(f'[error] {timestamp_str()} - Output file already exists')
100+
sys.exit(1)
54101

55102
if args.file:
56-
text = args.file.read()
103+
if not os.path.exists(args.file.name):
104+
print(f'[error] {timestamp_str()} - File does not exist')
105+
sys.exit(1)
106+
107+
print(f'[status] {timestamp_str()} - Processing file: {args.file.name}')
108+
109+
text_batch = []
110+
chunk_size = 100
111+
total_lines = sum(1 for _ in args.file)
112+
args.file.seek(0)
113+
total_chunks = (total_lines + chunk_size - 1) // chunk_size
114+
115+
print(f'[info] {timestamp_str()} - Total chunks: {total_chunks}')
116+
117+
chunk_num = 1
118+
119+
for line in args.file:
120+
text = line.strip()
121+
text_batch.append(text)
122+
if len(text_batch) == chunk_size:
123+
process_batch(text_batch, model_type, embeddings_list, chunk_num, total_chunks)
124+
text_batch = []
125+
chunk_num += 1
126+
127+
if text_batch:
128+
process_batch(text_batch, model_type, embeddings_list, chunk_num, total_chunks)
57129
else:
130+
print(f'[status] {timestamp_str()} - Processing text input')
58131
text = args.text
132+
result = send_request([text], model_type)
133+
if result:
134+
for res in result:
135+
metadata = {'status': res['status'], 'elapsed': res['elapsed'], 'model': res['model']}
136+
embedding = Embedding(text=text, embedding=res['embedding'], metadata=metadata)
137+
embeddings_list.append(embedding.dict())
138+
139+
try:
140+
print(f'[status] {timestamp_str()} - Saving embeddings to {output_file}')
141+
with open(output_file, 'w') as f:
142+
json.dump(embeddings_list, f)
143+
144+
print(f'[status] {timestamp_str()} - Embeddings saved to embeddings.json')
145+
except Exception as err:
146+
print(f'[error] {timestamp_str()} - exception saving embeddings: {err}')
147+
sys.exit(1)
59148

60-
model_type = args.model
61-
result = send_request(text, model_type)
62-
if result is not None:
63-
print(result)

0 commit comments

Comments
 (0)
Please sign in to comment.