-
Notifications
You must be signed in to change notification settings - Fork 0
/
llm_service.py
152 lines (122 loc) · 4.64 KB
/
llm_service.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import random
import boto3
from botocore.exceptions import EventStreamError
import json
from cornsnake import util_color, util_print, util_wait
import config
def _build_prompt(application_name, functions, user_prompt):
return f"""
Human: Take the user prompt and generate a list of functions for the '{application_name}' application to perform.
User prompt: '{user_prompt}'.
The output MUST be as *valid* JSON only.
DO NOT write any text outside the ```json``` block.
FUNCTIONS:
```
{functions}
```
PROCESS:
1. Take the user prompt use the given functions to generate appropriate function calls.
2. Add appropriate values to match the function parameters.
3. Output the function calls in JSON format.
EXAMPLE OUTPUT: ```json
{{
"functionCalls": [
{{ "name":"my-function",
"parameters": [ {{"name": "p1", "value": 5 }} ]
}}
],
"explanation": "These actions will ..."
}}
```
Assistant:
```json
"""
def _prompt_llm(prompt):
generated_text = ""
util_print.print_section("RESPONSE")
body = json.dumps({
"prompt": prompt,
"max_tokens_to_sample": config.MAX_OUTPUT_TOKENS,
"temperature": config.TEMPERATURE,
"top_k": 250,
"top_p": 1,
"stop_sequences": ["\\n\\nHuman:"],
"anthropic_version": "bedrock-2023-05-31"
})
session = boto3.Session(region_name=config.REGION_NAME, profile_name=config.AWS_PROFILE_NAME)
brt = session.client('bedrock-runtime')
response = brt.invoke_model_with_response_stream(
modelId=config.ANTHROPIC_CLAUDE_MODEL,
body=body
)
generated_text = ""
stream = response.get('body')
if stream:
for event in stream:
chunk = event.get('chunk')
if chunk:
rsp = json.loads(chunk.get('bytes').decode())
generated_line_part = rsp['completion']
generated_text += generated_line_part
print(generated_line_part, end='')
def _clean_text__assuming_gave_json_start_at_end_of_prompt(text): # assumes prompt ended with ```json
PRELIM_IF_CHAT = "```json"
if PRELIM_IF_CHAT in text:
text = text.split(PRELIM_IF_CHAT)[1]
END = "```"
if END in text:
text = text.split(END)[0]
return text
generated_text = _clean_text__assuming_gave_json_start_at_end_of_prompt(generated_text)
return generated_text
def _prompt_llm_with_retry_if_throttled(active_prompt):
retries = 3
while retries > 0:
retries -= 1
try:
return _prompt_llm(active_prompt)
except EventStreamError as ese:
if "Too many requests" in str(ese) and retries > 0:
print(" (throtttled) retrying...")
util_wait.wait_seconds(random.randint(3, 10))
else:
raise
def _generate(application_name, functions, user_prompt, history_text):
util_print.print_section(f"PROMPT")
active_prompt = ""
if not history_text:
active_prompt = _build_prompt(application_name, functions, user_prompt)
if config.IS_DEBUG:
util_print.print_result(active_prompt)
history_text = active_prompt
else:
history_text += "\n\nHuman: " + user_prompt + "\n\nAssistant:\n"
active_prompt = history_text
response = _prompt_llm_with_retry_if_throttled(active_prompt)
util_print.print_with_color(f"\n\nUSER >>" + user_prompt, util_color.QUESTION_COLOR)
util_print.print_result(f"\nPOST PROC >>" + response)
history_text += response
return (response, history_text)
def _user_wants_to_exit(user_prompt):
BYE = ['quit', 'exit', 'close']
for bye in BYE:
if bye in user_prompt:
return True
return False
def call_llm(application_name, functions, user_prompt, is_chat):
generated_text = ""
if not is_chat:
(generated_text, new_history) = _generate(application_name, functions, user_prompt, "")
else:
history_text = ""
last_generated = ""
while((user_prompt)):
print(">> USER: " + (user_prompt))
(generated_text, new_history) = _generate(application_name, functions, user_prompt, history_text)
history_text += new_history
if _user_wants_to_exit((user_prompt)):
break
last_generated = generated_text
(user_prompt) = input("\n\nHow can I help? >>")
generated_text = last_generated # we don't want to capture the 'bye' text of the LLM
return generated_text