Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/proxy/guardrails.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ use aisix_guardrail::{
use async_trait::async_trait;
use thiserror::Error;

pub(crate) mod streaming;

use crate::{
config::entities::{Model, ResourceEntry, ResourceRegistry, guardrails::GuardrailConfig},
gateway::{
Expand Down
188 changes: 188 additions & 0 deletions src/proxy/guardrails/streaming.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
use std::collections::VecDeque;

#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) struct StreamCheckpoint(pub u64);

#[derive(Debug, Clone, PartialEq, Eq)]
#[allow(dead_code)]
pub(crate) enum StreamGuardrailDecision {
Pending,
Allow { approved_through: StreamCheckpoint },
Block { reason: String },
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) enum WholeResponseReplayAction<Chunk> {
Buffered(Chunk),
Emit(Chunk),
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum WholeResponseReplayFinalize {
NeedsGuardrailCheck,
Finished,
}

#[derive(Debug)]
pub(crate) struct WholeResponseReplay<Chunk> {
next_checkpoint: u64,
buffered_chunks: VecDeque<(StreamCheckpoint, Chunk)>,
}

impl<Chunk> Default for WholeResponseReplay<Chunk> {
fn default() -> Self {
Self {
next_checkpoint: 0,
buffered_chunks: VecDeque::new(),
}
}
}

impl<Chunk> WholeResponseReplay<Chunk> {
pub(crate) fn push(&mut self, chunk: Chunk) -> StreamGuardrailDecision {
let checkpoint = StreamCheckpoint(self.next_checkpoint);
self.next_checkpoint = self.next_checkpoint.saturating_add(1);
self.buffered_chunks.push_back((checkpoint, chunk));
StreamGuardrailDecision::Pending
}

pub(crate) fn allow_all(self) -> (StreamGuardrailDecision, VecDeque<Chunk>) {
let approved_through = self
.buffered_chunks
.back()
.map(|(checkpoint, _)| *checkpoint)
.unwrap_or(StreamCheckpoint(0));
let buffered_chunks = self
.buffered_chunks
.into_iter()
.map(|(_, chunk)| chunk)
.collect();

(
StreamGuardrailDecision::Allow { approved_through },
buffered_chunks,
)
}
}

#[derive(Debug, Default)]
pub(crate) struct WholeResponseReplayDriver<Chunk> {
replay: Option<WholeResponseReplay<Chunk>>,
replay_queue: VecDeque<Chunk>,
upstream_finished: bool,
}

impl<Chunk> WholeResponseReplayDriver<Chunk> {
pub(crate) fn new(enabled: bool) -> Self {
Self {
replay: enabled.then(WholeResponseReplay::default),
replay_queue: VecDeque::new(),
upstream_finished: false,
}
}

pub(crate) fn take_replay_chunk(&mut self) -> Option<Chunk> {
self.replay_queue.pop_front()
}

pub(crate) fn finish_upstream(&mut self) -> WholeResponseReplayFinalize {
if self.replay.is_some() {
WholeResponseReplayFinalize::NeedsGuardrailCheck
} else {
self.upstream_finished = true;
WholeResponseReplayFinalize::Finished
}
}

pub(crate) fn is_upstream_finished(&self) -> bool {
self.upstream_finished
}

pub(crate) fn is_buffering(&self) -> bool {
self.replay.is_some()
}
}

impl<Chunk: Clone> WholeResponseReplayDriver<Chunk> {
pub(crate) fn push_upstream_chunk(&mut self, chunk: Chunk) -> WholeResponseReplayAction<Chunk> {
if let Some(replay) = self.replay.as_mut() {
let decision = replay.push(chunk.clone());
debug_assert!(matches!(decision, StreamGuardrailDecision::Pending));
WholeResponseReplayAction::Buffered(chunk)
} else {
WholeResponseReplayAction::Emit(chunk)
}
}

pub(crate) fn approve_buffered(&mut self) -> StreamGuardrailDecision {
let Some(replay) = self.replay.take() else {
debug_assert!(
false,
"approve_buffered called without buffered replay state"
);
return StreamGuardrailDecision::Allow {
approved_through: StreamCheckpoint(0),
};
};

let (decision, drained_chunks) = replay.allow_all();
self.replay_queue = drained_chunks;
self.upstream_finished = true;
decision
}
}

#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;

use super::{
StreamGuardrailDecision, WholeResponseReplayAction, WholeResponseReplayDriver,
WholeResponseReplayFinalize,
};

#[test]
fn whole_response_replay_driver_passes_through_when_disabled() {
let mut driver = WholeResponseReplayDriver::new(false);

assert_eq!(
driver.push_upstream_chunk(7_u8),
WholeResponseReplayAction::Emit(7)
);
assert!(!driver.is_buffering());
assert_eq!(
driver.finish_upstream(),
WholeResponseReplayFinalize::Finished
);
assert!(driver.is_upstream_finished());
assert_eq!(driver.take_replay_chunk(), None);
}

#[test]
fn whole_response_replay_driver_replays_buffered_chunks_after_approval() {
let mut driver = WholeResponseReplayDriver::new(true);

assert_eq!(
driver.push_upstream_chunk(String::from("safe ")),
WholeResponseReplayAction::Buffered(String::from("safe ")),
);
assert_eq!(
driver.push_upstream_chunk(String::from("response")),
WholeResponseReplayAction::Buffered(String::from("response")),
);
assert!(driver.is_buffering());
assert_eq!(
driver.finish_upstream(),
WholeResponseReplayFinalize::NeedsGuardrailCheck,
);
assert!(matches!(
driver.approve_buffered(),
StreamGuardrailDecision::Allow { .. }
));
assert!(!driver.is_buffering());
assert!(driver.is_upstream_finished());
assert_eq!(driver.take_replay_chunk(), Some(String::from("safe ")));
assert_eq!(driver.take_replay_chunk(), Some(String::from("response")));
assert_eq!(driver.take_replay_chunk(), None);
}
}
32 changes: 32 additions & 0 deletions src/proxy/handlers/chat_completions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use axum::response::sse::Event as SseEvent;
use fastrace::Span;
use opentelemetry_semantic_conventions::attribute::GEN_AI_RESPONSE_FINISH_REASONS;
use reqwest::Url;
use serde_json::json;
use span_attributes::{
StreamOutputCollector, chunk_span_properties, request_span_properties, response_span_properties,
};
Expand All @@ -30,6 +31,19 @@ use crate::{

pub(crate) struct ChatCompletionsAdapter;

fn openai_error_sse_event(message: String) -> SseEvent {
SseEvent::default().data(
json!({
"error": {
"message": message,
"type": "invalid_request_error",
"code": "gateway_error",
}
})
.to_string(),
)
}

impl FormatHandlerAdapter for ChatCompletionsAdapter {
type Format = OpenAIChatFormat;
type Request = ChatCompletionRequest;
Expand Down Expand Up @@ -155,6 +169,24 @@ impl FormatHandlerAdapter for ChatCompletionsAdapter {
Ok(())
}

fn guardrail_stream_output_payload(
_lifecycle_state: &Self::LifecycleState,
collector: &Self::Collector,
) -> Result<Option<crate::guardrail::traits::GuardrailCheckPayload>, Self::Error> {
let payload = output_guardrail_payload_from_chat_messages(&collector.output_messages())
.map(crate::guardrail::traits::GuardrailCheckPayload::Output)
.map_err(bridge_error)?;
Ok(Some(payload))
}

fn lifecycle_error_event(error: &Self::Error) -> Option<SseEvent> {
let message = match error {
ChatCompletionError::GatewayError(err) => err.to_string(),
_ => error.to_string(),
};
Some(openai_error_sse_event(message))
}

fn end_of_stream_event(saw_item: bool) -> Option<SseEvent> {
saw_item.then(|| SseEvent::default().data("[DONE]"))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ use std::collections::BTreeMap;

use super::message_attributes::{MessageContentView, MessageView, OutputMessageView, ToolCallView};
use crate::{
gateway::types::openai::ChatCompletionChunk,
gateway::types::openai::{
ChatCompletionChunk, ChatMessage, FunctionCall, MessageContent, ToolCall,
},
proxy::utils::trace::span_message_attributes::output_message_span_properties,
};

Expand Down Expand Up @@ -70,6 +72,36 @@ impl StreamOutputCollector {
output_message_span_properties(&self.output_message_views())
}

pub(crate) fn output_messages(&self) -> Vec<ChatMessage> {
self.choices
.values()
.map(|choice| ChatMessage {
role: choice.role.clone().unwrap_or_else(|| "assistant".into()),
content: (!choice.content.is_empty())
.then(|| MessageContent::Text(choice.content.clone())),
name: None,
tool_calls: (!choice.tool_calls.is_empty()).then(|| {
choice
.tool_calls
.values()
.filter_map(|tool_call| {
let name = tool_call.name.clone()?;
Some(ToolCall {
id: tool_call.id.clone().unwrap_or_default(),
r#type: "function".into(),
function: FunctionCall {
name,
arguments: tool_call.arguments.clone(),
},
})
})
.collect()
}),
tool_call_id: None,
})
.collect()
}

fn output_message_views(&self) -> Vec<OutputMessageView> {
self.choices
.values()
Expand Down
Loading