@@ -40,39 +40,72 @@ def render_response_payload(messages, tools:, temperature:, model:, cache_prompt
4040 payload
4141 end
4242
43- def format_input ( messages ) # rubocop:disable Metrics/PerceivedComplexity
43+ def format_input ( messages )
4444 all_tool_calls = messages . flat_map do |m |
4545 m . tool_calls &.values || [ ]
4646 end
47- messages . flat_map do |msg |
48- if msg . tool_call?
49- msg . tool_calls . map do |_ , tc |
50- {
51- type : 'function_call' ,
52- call_id : tc . id ,
53- name : tc . name ,
54- arguments : JSON . generate ( tc . arguments ) ,
55- status : 'completed'
56- }
57- end
58- elsif msg . role == :tool
59- {
60- type : 'function_call_output' ,
61- call_id : all_tool_calls . detect { |tc | tc . id == msg . tool_call_id } &.id ,
62- output : msg . content ,
63- status : 'completed'
64- }
65- else
66- {
67- type : 'message' ,
68- role : format_role ( msg . role ) ,
69- content : ResponseMedia . format_content ( msg . content ) ,
70- status : 'completed'
71- } . compact
72- end
47+ messages . flat_map { |msg | format_message_input ( msg , all_tool_calls ) } . flatten
48+ end
49+
50+ def format_message_input ( msg , all_tool_calls )
51+ if msg . tool_call?
52+ format_tool_call_message ( msg )
53+ elsif msg . role == :tool
54+ format_tool_response_message ( msg , all_tool_calls )
55+ elsif assistant_message_with_image_attachment? ( msg )
56+ format_image_generation_message ( msg )
57+ else
58+ format_regular_message ( msg )
7359 end
7460 end
7561
62+ def format_tool_call_message ( msg )
63+ msg . tool_calls . map do |_ , tc |
64+ {
65+ type : 'function_call' ,
66+ call_id : tc . id ,
67+ name : tc . name ,
68+ arguments : JSON . generate ( tc . arguments ) ,
69+ status : 'completed'
70+ }
71+ end
72+ end
73+
74+ def format_tool_response_message ( msg , all_tool_calls )
75+ {
76+ type : 'function_call_output' ,
77+ call_id : all_tool_calls . detect { |tc | tc . id == msg . tool_call_id } &.id ,
78+ output : msg . content ,
79+ status : 'completed'
80+ }
81+ end
82+
83+ def format_image_generation_message ( msg )
84+ items = [ ]
85+ image_attachment = msg . content . attachments . first
86+ if image_attachment . reasoning_id
87+ items << {
88+ type : 'reasoning' ,
89+ id : image_attachment . reasoning_id ,
90+ summary : [ ]
91+ }
92+ end
93+ items << {
94+ type : 'image_generation_call' ,
95+ id : image_attachment . id
96+ }
97+ items
98+ end
99+
100+ def format_regular_message ( msg )
101+ {
102+ type : 'message' ,
103+ role : format_role ( msg . role ) ,
104+ content : ResponseMedia . format_content ( msg . content ) ,
105+ status : 'completed'
106+ } . compact
107+ end
108+
76109 def format_role ( role )
77110 case role
78111 when :system
@@ -93,23 +126,81 @@ def parse_respond_response(response)
93126
94127 Message . new (
95128 role : :assistant ,
96- content : all_output_text ( outputs ) ,
129+ content : all_output_content ( outputs ) ,
97130 tool_calls : parse_response_tool_calls ( outputs ) ,
98131 input_tokens : data [ 'usage' ] [ 'input_tokens' ] ,
99132 output_tokens : data [ 'usage' ] [ 'output_tokens' ] ,
100133 cached_tokens : data . dig ( 'usage' , 'input_tokens_details' , 'cached_tokens' ) ,
101134 model_id : data [ 'model' ] ,
135+ reasoning_id : extract_reasoning_id ( outputs ) ,
102136 raw : response
103137 )
104138 end
105139
140+ def all_output_content ( outputs )
141+ @current_outputs = outputs
142+ text_content = extract_text_content ( outputs )
143+ image_outputs = outputs . select { |o | o [ 'type' ] == 'image_generation_call' }
144+
145+ return text_content unless image_outputs . any?
146+
147+ build_content_with_images ( text_content , image_outputs )
148+ end
149+
150+ private
151+
152+ def extract_text_content ( outputs )
153+ outputs . select { |o | o [ 'type' ] == 'message' } . flat_map do |o |
154+ o [ 'content' ] . filter_map do |c |
155+ c [ 'type' ] == 'output_text' && c [ 'text' ]
156+ end
157+ end . join ( "\n " )
158+ end
159+
160+ def build_content_with_images ( text_content , image_outputs )
161+ content = RubyLLM ::Content . new ( text_content )
162+ reasoning_id = extract_reasoning_id ( @current_outputs )
163+ image_outputs . each do |output |
164+ attach_image_to_content ( content , output , reasoning_id )
165+ end
166+ content
167+ end
168+
169+ def attach_image_to_content ( content , output , reasoning_id )
170+ image_data = output [ 'result' ]
171+ output_format = output [ 'output_format' ] || 'png'
172+ mime_type = "image/#{ output_format } "
173+
174+ content . attach (
175+ RubyLLM ::ImageAttachment . new (
176+ data : image_data ,
177+ mime_type : mime_type ,
178+ model_id : nil ,
179+ id : output [ 'id' ] ,
180+ reasoning_id : reasoning_id
181+ )
182+ )
183+ end
184+
106185 def all_output_text ( outputs )
107186 outputs . select { |o | o [ 'type' ] == 'message' } . flat_map do |o |
108187 o [ 'content' ] . filter_map do |c |
109188 c [ 'type' ] == 'output_text' && c [ 'text' ]
110189 end
111190 end . join ( "\n " )
112191 end
192+
193+ def assistant_message_with_image_attachment? ( msg )
194+ msg . role == :assistant &&
195+ msg . content . is_a? ( RubyLLM ::Content ) &&
196+ msg . content . attachments . any? &&
197+ msg . content . attachments . first . is_a? ( RubyLLM ::ImageAttachment )
198+ end
199+
200+ def extract_reasoning_id ( outputs )
201+ reasoning_item = outputs . find { |o | o [ 'type' ] == 'reasoning' }
202+ reasoning_item &.dig ( 'id' )
203+ end
113204 end
114205 end
115206 end
0 commit comments