Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions lib/ruby_llm/image_attachment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
module RubyLLM
# A class representing a file attachment that is an image generated by an LLM.
class ImageAttachment < Attachment
attr_reader :image, :content
attr_reader :image, :content, :id, :reasoning_id

def initialize(data:, mime_type:, model_id:)
def initialize(data:, mime_type:, model_id:, id: nil, reasoning_id: nil)
super(nil, filename: nil)
@image = Image.new(data:, mime_type:, model_id:)
@content = Base64.strict_decode64(data)
@mime_type = mime_type
@id = id
@reasoning_id = reasoning_id
end

def image?
Expand Down
6 changes: 4 additions & 2 deletions lib/ruby_llm/message.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ class Message
ROLES = %i[system user assistant tool].freeze

attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :raw,
:cached_tokens, :cache_creation_tokens
:cached_tokens, :cache_creation_tokens, :reasoning_id
attr_writer :content

def initialize(options = {})
Expand All @@ -19,6 +19,7 @@ def initialize(options = {})
@tool_call_id = options[:tool_call_id]
@cached_tokens = options[:cached_tokens]
@cache_creation_tokens = options[:cache_creation_tokens]
@reasoning_id = options[:reasoning_id]
@raw = options[:raw]

ensure_valid_role
Expand Down Expand Up @@ -54,7 +55,8 @@ def to_h
output_tokens: output_tokens,
model_id: model_id,
cache_creation_tokens: cache_creation_tokens,
cached_tokens: cached_tokens
cached_tokens: cached_tokens,
reasoning_id: reasoning_id
}.compact
end

Expand Down
147 changes: 119 additions & 28 deletions lib/ruby_llm/providers/openai/response.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,39 +40,72 @@ def render_response_payload(messages, tools:, temperature:, model:, cache_prompt
payload
end

def format_input(messages) # rubocop:disable Metrics/PerceivedComplexity
def format_input(messages)
all_tool_calls = messages.flat_map do |m|
m.tool_calls&.values || []
end
messages.flat_map do |msg|
if msg.tool_call?
msg.tool_calls.map do |_, tc|
{
type: 'function_call',
call_id: tc.id,
name: tc.name,
arguments: JSON.generate(tc.arguments),
status: 'completed'
}
end
elsif msg.role == :tool
{
type: 'function_call_output',
call_id: all_tool_calls.detect { |tc| tc.id == msg.tool_call_id }&.id,
output: msg.content,
status: 'completed'
}
else
{
type: 'message',
role: format_role(msg.role),
content: ResponseMedia.format_content(msg.content),
status: 'completed'
}.compact
end
messages.flat_map { |msg| format_message_input(msg, all_tool_calls) }.flatten
end

def format_message_input(msg, all_tool_calls)
if msg.tool_call?
format_tool_call_message(msg)
elsif msg.role == :tool
format_tool_response_message(msg, all_tool_calls)
elsif assistant_message_with_image_attachment?(msg)
format_image_generation_message(msg)
else
format_regular_message(msg)
end
end

def format_tool_call_message(msg)
msg.tool_calls.map do |_, tc|
{
type: 'function_call',
call_id: tc.id,
name: tc.name,
arguments: JSON.generate(tc.arguments),
status: 'completed'
}
end
end

def format_tool_response_message(msg, all_tool_calls)
{
type: 'function_call_output',
call_id: all_tool_calls.detect { |tc| tc.id == msg.tool_call_id }&.id,
output: msg.content,
status: 'completed'
}
end

def format_image_generation_message(msg)
items = []
image_attachment = msg.content.attachments.first
if image_attachment.reasoning_id
items << {
type: 'reasoning',
id: image_attachment.reasoning_id,
summary: []
}
end
items << {
type: 'image_generation_call',
id: image_attachment.id
}
items
end

def format_regular_message(msg)
{
type: 'message',
role: format_role(msg.role),
content: ResponseMedia.format_content(msg.content),
status: 'completed'
}.compact
end

def format_role(role)
case role
when :system
Expand All @@ -93,23 +126,81 @@ def parse_respond_response(response)

Message.new(
role: :assistant,
content: all_output_text(outputs),
content: all_output_content(outputs),
tool_calls: parse_response_tool_calls(outputs),
input_tokens: data['usage']['input_tokens'],
output_tokens: data['usage']['output_tokens'],
cached_tokens: data.dig('usage', 'input_tokens_details', 'cached_tokens'),
model_id: data['model'],
reasoning_id: extract_reasoning_id(outputs),
raw: response
)
end

def all_output_content(outputs)
@current_outputs = outputs
text_content = extract_text_content(outputs)
image_outputs = outputs.select { |o| o['type'] == 'image_generation_call' }

return text_content unless image_outputs.any?

build_content_with_images(text_content, image_outputs)
end

private

def extract_text_content(outputs)
outputs.select { |o| o['type'] == 'message' }.flat_map do |o|
o['content'].filter_map do |c|
c['type'] == 'output_text' && c['text']
end
end.join("\n")
end

def build_content_with_images(text_content, image_outputs)
content = RubyLLM::Content.new(text_content)
reasoning_id = extract_reasoning_id(@current_outputs)
image_outputs.each do |output|
attach_image_to_content(content, output, reasoning_id)
end
content
end

def attach_image_to_content(content, output, reasoning_id)
image_data = output['result']
output_format = output['output_format'] || 'png'
mime_type = "image/#{output_format}"

content.attach(
RubyLLM::ImageAttachment.new(
data: image_data,
mime_type: mime_type,
model_id: nil,
id: output['id'],
reasoning_id: reasoning_id
)
)
end

def all_output_text(outputs)
outputs.select { |o| o['type'] == 'message' }.flat_map do |o|
o['content'].filter_map do |c|
c['type'] == 'output_text' && c['text']
end
end.join("\n")
end

def assistant_message_with_image_attachment?(msg)
msg.role == :assistant &&
msg.content.is_a?(RubyLLM::Content) &&
msg.content.attachments.any? &&
msg.content.attachments.first.is_a?(RubyLLM::ImageAttachment)
end

def extract_reasoning_id(outputs)
reasoning_item = outputs.find { |o| o['type'] == 'reasoning' }
reasoning_item&.dig('id')
end
end
end
end
Expand Down
4 changes: 2 additions & 2 deletions lib/ruby_llm/providers/openai/response_media.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ class OpenAI
module ResponseMedia
module_function

def format_content(content)
def format_content(content) # rubocop:disable Metrics/PerceivedComplexity
return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
return content unless content.is_a?(Content)

parts = []
parts << format_text(content.text) if content.text
parts << format_text(content.text) if content.text && !content.text.empty?

content.attachments.each do |attachment|
case attachment.type
Expand Down
131 changes: 105 additions & 26 deletions lib/ruby_llm/providers/openai/streaming.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,42 +27,64 @@ def build_chunk(data)
def build_responses_chunk(data)
case data['type']
when 'response.output_text.delta'
Chunk.new(
role: :assistant,
model_id: nil,
content: data['delta'],
tool_calls: nil,
input_tokens: nil,
output_tokens: nil
)
build_text_delta_chunk(data)
when 'response.function_call_arguments.delta'
build_tool_call_delta_chunk(data)
when 'response.image_generation_call.partial_image'
build_partial_image_chunk(data)
when 'response.output_item.added'
if data.dig('item', 'type') == 'function_call'
build_tool_call_start_chunk(data)
else
build_empty_chunk(data)
end
handle_output_item_added(data)
when 'response.output_item.done'
if data.dig('item', 'type') == 'function_call'
build_tool_call_complete_chunk(data)
else
build_empty_chunk(data)
end
handle_output_item_done(data)
when 'response.completed'
Chunk.new(
role: :assistant,
model_id: data.dig('response', 'model'),
content: nil,
tool_calls: nil,
input_tokens: data.dig('response', 'usage', 'input_tokens'),
output_tokens: data.dig('response', 'usage', 'output_tokens')
)
build_completion_chunk(data)
else
build_empty_chunk(data)
end
end

def build_text_delta_chunk(data)
Chunk.new(
role: :assistant,
model_id: nil,
content: data['delta'],
tool_calls: nil,
input_tokens: nil,
output_tokens: nil
)
end

def handle_output_item_added(data)
if data.dig('item', 'type') == 'function_call'
build_tool_call_start_chunk(data)
elsif data.dig('item', 'type') == 'reasoning'
build_reasoning_chunk(data)
else
build_empty_chunk(data)
end
end

def handle_output_item_done(data)
if data.dig('item', 'type') == 'function_call'
build_tool_call_complete_chunk(data)
elsif data.dig('item', 'type') == 'image_generation_call'
build_completed_image_chunk(data)
else
build_empty_chunk(data)
end
end

def build_completion_chunk(data)
Chunk.new(
role: :assistant,
model_id: data.dig('response', 'model'),
content: nil,
tool_calls: nil,
input_tokens: data.dig('response', 'usage', 'input_tokens'),
output_tokens: data.dig('response', 'usage', 'output_tokens')
)
end

def build_chat_completions_chunk(data)
Chunk.new(
role: :assistant,
Expand Down Expand Up @@ -145,6 +167,63 @@ def build_empty_chunk(_data)
)
end

def build_partial_image_chunk(data)
content = build_image_content(data['partial_image_b64'], 'image/png', nil, nil)

Chunk.new(
role: :assistant,
model_id: nil,
content: content,
tool_calls: nil,
input_tokens: nil,
output_tokens: nil
)
end

def build_completed_image_chunk(data)
item = data['item']
image_data = item['result']
output_format = item['output_format'] || 'png'
mime_type = "image/#{output_format}"
revised_prompt = item['revised_prompt']

content = build_image_content(image_data, mime_type, nil, revised_prompt)

Chunk.new(
role: :assistant,
model_id: nil,
content: content,
tool_calls: nil,
input_tokens: nil,
output_tokens: nil
)
end

def build_reasoning_chunk(data)
Chunk.new(
role: :assistant,
model_id: nil,
content: nil,
tool_calls: nil,
input_tokens: nil,
output_tokens: nil,
reasoning_id: data.dig('item', 'id')
)
end

def build_image_content(base64_data, mime_type, model_id, revised_prompt = nil)
text_content = revised_prompt || ''
content = RubyLLM::Content.new(text_content)
content.attach(
RubyLLM::ImageAttachment.new(
data: base64_data,
mime_type: mime_type,
model_id: model_id
)
)
content
end

def create_streaming_tool_call(tool_call_data)
ToolCall.new(
id: tool_call_data['id'],
Expand Down
Loading