Skip to content

Commit 3aa8c73

Browse files
authored
Support passing images to factorial-agent (#29)
Adds support for images forwarding to factorial-agent. Only blobs are supported, not remote urls
1 parent 7567d71 commit 3aa8c73

File tree

10 files changed

+578
-2
lines changed

10 files changed

+578
-2
lines changed

README.md

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,68 @@ messages = [
8989
]
9090
```
9191

92+
**Sending Images:**
93+
94+
For vision-capable agents, you can include images in your messages:
95+
96+
```ruby
97+
# Read image from file
98+
image_data = File.binread('path/to/image.png')
99+
100+
# Create a message with text and image
101+
message = Ai.user_message_with_image(
102+
"What objects are in this image?",
103+
image_data,
104+
"image/png"
105+
)
106+
107+
messages = [message]
108+
```
109+
110+
For multiple images in one message, use manual construction:
111+
112+
```ruby
113+
image1 = File.binread('photo1.jpg')
114+
image2 = File.binread('photo2.png')
115+
116+
message = Ai::Message.new(
117+
role: Ai::MessageRole::User,
118+
content: [
119+
Ai::TextPart.new(text: "Compare these images:"),
120+
Ai::ImagePart.new(image_data: image1, media_type: "image/jpeg"),
121+
Ai::ImagePart.new(image_data: image2, media_type: "image/png")
122+
]
123+
)
124+
```
125+
126+
**Using Image URLs:**
127+
128+
Instead of sending image data, you can send a URL for the agent to fetch the image:
129+
130+
```ruby
131+
# Create a message with text and image URL
132+
message = Ai.user_message_with_image_url(
133+
"What objects are in this image?",
134+
"https://example.com/photo.jpg",
135+
"image/jpeg"
136+
)
137+
138+
messages = [message]
139+
```
140+
141+
For multiple image URLs or mixing URLs with text:
142+
143+
```ruby
144+
message = Ai::Message.new(
145+
role: Ai::MessageRole::User,
146+
content: [
147+
Ai::TextPart.new(text: "Compare these images from the web:"),
148+
Ai::ImagePart.new(image_url: "https://example.com/image1.jpg", media_type: "image/jpeg"),
149+
Ai::ImagePart.new(image_url: "https://example.com/image2.png", media_type: "image/png")
150+
]
151+
)
152+
```
153+
92154
#### Step 3.3: Call the Agent
93155

94156
```ruby

lib/ai.rb

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ class Error < StandardError
2727
autoload :LanguageModelUsage, 'ai/types/language_model_usage'
2828
autoload :MessageRole, 'ai/types/message_role'
2929
autoload :Message, 'ai/types/message'
30+
autoload :TextPart, 'ai/types/text_part'
31+
autoload :ImagePart, 'ai/types/image_part'
3032
autoload :ReasoningDetail, 'ai/types/reasoning_detail'
3133
autoload :ResponseMessage, 'ai/types/response_message'
3234
autoload :ResponseMetadata, 'ai/types/response_metadata'
@@ -57,6 +59,22 @@ def self.system_message(content)
5759
Ai::Message.new(role: Ai::MessageRole::System, content: content)
5860
end
5961

62+
sig { params(text: String, image_data: String, media_type: String).returns(Ai::Message) }
63+
def self.user_message_with_image(text, image_data, media_type)
64+
Ai::Message.new(
65+
role: Ai::MessageRole::User,
66+
content: [Ai::TextPart.new(text: text), Ai::ImagePart.new(image_data: image_data, media_type: media_type)]
67+
)
68+
end
69+
70+
sig { params(text: String, image_url: String, media_type: String).returns(Ai::Message) }
71+
def self.user_message_with_image_url(text, image_url, media_type)
72+
Ai::Message.new(
73+
role: Ai::MessageRole::User,
74+
content: [Ai::TextPart.new(text: text), Ai::ImagePart.new(image_url: image_url, media_type: media_type)]
75+
)
76+
end
77+
6078
sig { returns(Ai::Client) }
6179
def self.client
6280
@client ||=

lib/ai/clients/mastra.rb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,8 @@ def response(url:, messages:, options:)
244244

245245
# convert to camelCase and unpacking for API compatibility
246246
camelized_options = deep_camelize_keys(options)
247-
request.body = { messages: messages, **camelized_options }.to_json
247+
serialized_messages = messages.map(&:as_json)
248+
request.body = { messages: serialized_messages, **camelized_options }.to_json
248249

249250
response = http.request(request)
250251

lib/ai/types/image_part.rb

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# typed: strict
2+
3+
require 'base64'
4+
5+
module Ai
6+
class ImagePart < T::Struct
7+
extend T::Sig
8+
9+
const :type, String, default: 'image'
10+
const :image_data, T.nilable(String), default: nil
11+
const :image_url, T.nilable(String), default: nil
12+
const :media_type, String
13+
14+
sig do
15+
params(
16+
media_type: String,
17+
type: String,
18+
image_data: T.nilable(String),
19+
image_url: T.nilable(String)
20+
).void
21+
end
22+
def initialize(media_type:, type: 'image', image_data: nil, image_url: nil)
23+
super
24+
validate!
25+
end
26+
27+
sig { returns(T::Hash[Symbol, String]) }
28+
def as_json
29+
image_value =
30+
if image_url
31+
T.must(image_url)
32+
else
33+
encoded = Base64.strict_encode64(T.must(image_data).b)
34+
"data:#{media_type};base64,#{encoded}"
35+
end
36+
37+
{ type: type, image: image_value, mediaType: media_type }
38+
end
39+
40+
private
41+
42+
sig { void }
43+
def validate!
44+
if image_data.nil? && image_url.nil?
45+
raise ArgumentError, "Either image_data or image_url must be provided"
46+
end
47+
48+
return unless !image_data.nil? && !image_url.nil?
49+
50+
raise ArgumentError, "Cannot provide both image_data and image_url"
51+
end
52+
end
53+
end

lib/ai/types/message.rb

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,31 @@
11
# typed: strict
22

33
module Ai
4+
MessageContent = T.type_alias { T.any(String, T::Array[T.any(Ai::TextPart, Ai::ImagePart)]) }
5+
46
class Message < T::Struct
7+
extend T::Sig
8+
59
const :role, Ai::MessageRole
6-
const :content, String
10+
const :content, MessageContent
11+
12+
sig do
13+
returns(
14+
T::Hash[
15+
Symbol,
16+
T.any(String, T::Array[T::Hash[Symbol, String]])
17+
]
18+
)
19+
end
20+
def as_json
21+
serialized_content =
22+
if content.is_a?(String)
23+
content
24+
else
25+
T.cast(content, T::Array[T.any(Ai::TextPart, Ai::ImagePart)]).map(&:as_json)
26+
end
27+
28+
{ role: role.serialize, content: serialized_content }
29+
end
730
end
831
end

lib/ai/types/text_part.rb

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# typed: strict
2+
3+
module Ai
4+
class TextPart < T::Struct
5+
extend T::Sig
6+
7+
const :type, String, default: 'text'
8+
const :text, String
9+
10+
sig { returns(T::Hash[Symbol, String]) }
11+
def as_json
12+
{ type: type, text: text }
13+
end
14+
end
15+
end

spec/lib/ai/ai_spec.rb

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,4 +70,159 @@
7070
expect(message.role).to eq(Ai::MessageRole::System)
7171
end
7272
end
73+
74+
describe '.user_message_with_image' do
75+
it 'creates a user message with text and image parts' do
76+
text = 'What is in this image?'
77+
image_data = 'binary image data'
78+
media_type = 'image/png'
79+
80+
message = Ai.user_message_with_image(text, image_data, media_type)
81+
82+
expect(message).to be_a(Ai::Message)
83+
expect(message.role).to eq(Ai::MessageRole::User)
84+
expect(message.content).to be_an(Array)
85+
expect(message.content.length).to eq(2)
86+
end
87+
88+
it 'creates proper content parts' do
89+
text = 'Describe this'
90+
image_data = 'image bytes'
91+
media_type = 'image/jpeg'
92+
93+
message = Ai.user_message_with_image(text, image_data, media_type)
94+
content = message.content
95+
96+
expect(content[0]).to be_a(Ai::TextPart)
97+
expect(content[0].text).to eq(text)
98+
99+
expect(content[1]).to be_a(Ai::ImagePart)
100+
expect(content[1].image_data).to eq(image_data)
101+
expect(content[1].media_type).to eq(media_type)
102+
end
103+
104+
it 'serializes correctly for API calls' do
105+
text = 'Analyze this'
106+
image_data = 'test image'
107+
media_type = 'image/png'
108+
109+
message = Ai.user_message_with_image(text, image_data, media_type)
110+
json = message.as_json
111+
112+
expect(json[:role]).to eq('user')
113+
expect(json[:content]).to be_an(Array)
114+
expect(json[:content][0][:type]).to eq('text')
115+
expect(json[:content][0][:text]).to eq(text)
116+
expect(json[:content][1][:type]).to eq('image')
117+
expect(json[:content][1][:image]).to start_with('data:image/png;base64,')
118+
end
119+
120+
it 'handles different image formats' do
121+
formats = ['image/png', 'image/jpeg', 'image/gif', 'image/webp']
122+
123+
formats.each do |format|
124+
message = Ai.user_message_with_image('Test', 'data', format)
125+
json = message.as_json
126+
127+
expect(json[:content][1][:mediaType]).to eq(format)
128+
expect(json[:content][1][:image]).to start_with("data:#{format};base64,")
129+
end
130+
end
131+
132+
it 'properly encodes binary data to base64' do
133+
# Simulate actual binary data (PNG header bytes)
134+
binary_data = [137, 80, 78, 71, 13, 10, 26, 10].pack('C*')
135+
136+
message = Ai.user_message_with_image('What is this?', binary_data, 'image/png')
137+
json = message.as_json
138+
139+
# Extract and verify the base64 portion
140+
base64_data = json[:content][1][:image].gsub('data:image/png;base64,', '')
141+
decoded = Base64.strict_decode64(base64_data)
142+
143+
expect(decoded).to eq(binary_data)
144+
end
145+
end
146+
147+
describe '.user_message_with_image_url' do
148+
it 'creates a user message with text and image URL parts' do
149+
text = 'What is in this image?'
150+
image_url = 'https://example.com/photo.jpg'
151+
media_type = 'image/jpeg'
152+
153+
message = Ai.user_message_with_image_url(text, image_url, media_type)
154+
155+
expect(message).to be_a(Ai::Message)
156+
expect(message.role).to eq(Ai::MessageRole::User)
157+
expect(message.content).to be_an(Array)
158+
expect(message.content.length).to eq(2)
159+
end
160+
161+
it 'creates proper content parts with URL' do
162+
text = 'Describe this'
163+
image_url = 'https://cdn.example.com/image.png'
164+
media_type = 'image/png'
165+
166+
message = Ai.user_message_with_image_url(text, image_url, media_type)
167+
content = message.content
168+
169+
expect(content[0]).to be_a(Ai::TextPart)
170+
expect(content[0].text).to eq(text)
171+
172+
expect(content[1]).to be_a(Ai::ImagePart)
173+
expect(content[1].image_url).to eq(image_url)
174+
expect(content[1].image_data).to be_nil
175+
expect(content[1].media_type).to eq(media_type)
176+
end
177+
178+
it 'serializes correctly for API calls with URL' do
179+
text = 'Analyze this'
180+
image_url = 'https://example.com/test.jpg'
181+
media_type = 'image/jpeg'
182+
183+
message = Ai.user_message_with_image_url(text, image_url, media_type)
184+
json = message.as_json
185+
186+
expect(json[:role]).to eq('user')
187+
expect(json[:content]).to be_an(Array)
188+
expect(json[:content].length).to eq(2)
189+
190+
# Check text part
191+
expect(json[:content][0][:type]).to eq('text')
192+
expect(json[:content][0][:text]).to eq(text)
193+
194+
# Check image part - should be URL, not base64
195+
expect(json[:content][1][:type]).to eq('image')
196+
expect(json[:content][1][:image]).to eq(image_url)
197+
expect(json[:content][1][:image]).not_to include('base64')
198+
expect(json[:content][1][:mediaType]).to eq(media_type)
199+
end
200+
201+
it 'handles different image URL formats' do
202+
urls = [
203+
['https://example.com/image.png', 'image/png'],
204+
['http://test.org/photo.jpg', 'image/jpeg'],
205+
['https://cdn.example.com/images/12345.webp', 'image/webp']
206+
]
207+
208+
urls.each do |url, media_type|
209+
message = Ai.user_message_with_image_url('Test', url, media_type)
210+
json = message.as_json
211+
212+
expect(json[:content][1][:image]).to eq(url)
213+
expect(json[:content][1][:mediaType]).to eq(media_type)
214+
end
215+
end
216+
217+
it 'works with HTTPS URLs' do
218+
message = Ai.user_message_with_image_url(
219+
'Analyze',
220+
'https://secure.example.com/image.jpg',
221+
'image/jpeg'
222+
)
223+
json = message.as_json
224+
225+
expect(json[:content][1][:image]).to start_with('https://')
226+
end
227+
end
73228
end

0 commit comments

Comments
 (0)