-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_batch.py
90 lines (65 loc) · 2.18 KB
/
create_batch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import csv
import json
import os
from dotenv import load_dotenv
from pydantic import BaseModel
load_dotenv()
CATEGORIES_PROMPT = '''
You are an expert assistant that specializes in categorizing items based on their description.
You will be provided with an item description and your job is to assign it to one of the predefined categories.
The category name is listed between ** ** and below the name you will find a few example items that you should use for categorization.
Here are the categories:
**Groceries**
- Albert Heijn
- Lidl
- Supermarkt
- Markt
- Jumbo
**Shopping**
- Zara
- Primark
**Eating out**
- McDonalds
- Kebab
Answer in JSON in the specified format.
---------
Categorize the following item
'''
class CategorizedItem(BaseModel):
item: str
category: str
def extract_items() -> list[str]:
with open('dummy-transactions.csv') as csv_file:
items = csv.DictReader(csv_file)
return [ item['Name / Description'] for item in items]
def create_batch_transactions(items: list[str]) -> list[str]:
transactions = []
for index, item in enumerate(items, start=1):
batch_transaction = {
'custom_id': f'category-{index}',
'method': 'POST',
'url': '/v1/chat/completions',
'body': {
'model': os.environ.get('CHAT_COMPLETION_MODEL'),
'messages': [
{'role': 'system', 'content': CATEGORIES_PROMPT},
{'role': 'user', 'content': item}
],
'response_format': {
'type': 'json_schema',
'json_schema': {
'description': 'Categorized item JSON schema',
'name': 'CategorizedItem',
'schema': CategorizedItem.model_json_schema()
}
}
}
}
transactions.append(json.dumps(batch_transaction))
return transactions
items = extract_items()
batch_transactions = create_batch_transactions(items)
with open('my-batch.jsonl', 'w') as batch_file:
for transaction in batch_transactions:
batch_file.write(transaction)
batch_file.write('\n')