Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,32 @@

"""Validators package init."""

from ._validation_constants import MessageRole, ContentType, EvaluationLevel
from ._validator_interface import ValidatorInterface
from ._conversation_validator import ConversationValidator
from ._tool_definitions_validator import ToolDefinitionsValidator
from ._tool_calls_validator import ToolCallsValidator
from ._task_navigation_efficiency_validator import TaskNavigationEfficiencyValidator
from ._messages_or_query_response_validator import MessagesOrQueryResponseInputValidator
from ._evaluation_level_utils import (
_resolve_evaluation_level,
_merge_query_response_messages,
_split_messages_at_latest_user,
_wrap_string_messages,
)

__all__ = [
"MessageRole",
"ContentType",
"EvaluationLevel",
"ValidatorInterface",
"ConversationValidator",
"ToolDefinitionsValidator",
"ToolCallsValidator",
"TaskNavigationEfficiencyValidator",
"MessagesOrQueryResponseInputValidator",
"_resolve_evaluation_level",
"_merge_query_response_messages",
"_split_messages_at_latest_user",
"_wrap_string_messages",
]
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def _validate_text_content_item(self, content_item: Dict[str, Any], role: str) -

if not isinstance(content_item["text"], str):
return EvaluationException(
message=f"The 'text' field must be a string in content items.",
message="The 'text' field must be a string in content items.",
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.INVALID_VALUE,
target=self.error_target,
Expand Down Expand Up @@ -196,16 +196,16 @@ def _validate_assistant_message(self, message: Dict[str, Any]) -> Optional[Evalu
"""Validate assistant message content."""
content = message["content"]

valid_assistant_content_types = [
ContentType.TEXT,
ContentType.OUTPUT_TEXT,
ContentType.TOOL_CALL,
ContentType.FUNCTION_CALL,
ContentType.MCP_APPROVAL_REQUEST,
ContentType.OPENAPI_CALL,
]
valid_assistant_content_types_as_strings = [t.value for t in valid_assistant_content_types]
if isinstance(content, list):
valid_assistant_content_types = [
ContentType.TEXT,
ContentType.OUTPUT_TEXT,
ContentType.TOOL_CALL,
ContentType.FUNCTION_CALL,
ContentType.MCP_APPROVAL_REQUEST,
ContentType.OPENAPI_CALL,
]
valid_assistant_content_types_as_strings = [t.value for t in valid_assistant_content_types]
for content_item in content:
content_type = content_item["type"]
if content_type not in valid_assistant_content_types:
Expand All @@ -225,19 +225,21 @@ def _validate_assistant_message(self, message: Dict[str, Any]) -> Optional[Evalu
if error:
return error

# Raise error in case of unsupported tools for evaluators that enabled check_for_unsupported_tools
if self.check_for_unsupported_tools:
if content_type == ContentType.TOOL_CALL or content_type == ContentType.OPENAPI_CALL:
name = (
"openapi_call" if content_type == ContentType.OPENAPI_CALL else content_item["name"].lower()
)
if name in self.UNSUPPORTED_TOOLS:
return EvaluationException(
message=f"{name} tool call is currently not supported for {self.error_target.value} evaluator.",
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.NOT_APPLICABLE,
target=self.error_target,
# Raise error in case of unsupported tools for evaluators that enabled check_for_unsupported_tools
if self.check_for_unsupported_tools:
if content_type == ContentType.TOOL_CALL or content_type == ContentType.OPENAPI_CALL:
name = (
"openapi_call"
if content_type == ContentType.OPENAPI_CALL
else content_item["name"].lower()
)
if name in self.UNSUPPORTED_TOOLS:
return EvaluationException(
message=f"{name} tool call is currently not supported for {self.error_target.value} evaluator.",
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.NOT_APPLICABLE,
target=self.error_target,
)
return None

def _validate_tool_message(self, message: Dict[str, Any]) -> Optional[EvaluationException]:
Expand Down Expand Up @@ -314,31 +316,30 @@ def _validate_message_dict(self, message: Dict[str, Any]) -> Optional[Evaluation
)
if not content_is_string_or_list_of_dicts:
return EvaluationException(
message=f"The 'content' field must be a string or a list of dictionaries messages.",
message="The 'content' field must be a string or a list of dictionaries messages.",
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.INVALID_VALUE,
target=self.error_target,
)

if len(content) == 0:
return EvaluationException(
message=f"The 'content' field can't be empty.",
message="The 'content' field can't be empty.",
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.INVALID_VALUE,
target=self.error_target,
)

if isinstance(content, list):
all_messages_have_type_field = all("type" in item for item in content)
if not all_messages_have_type_field:
if not all("type" in item for item in content):
return EvaluationException(
message=f"Each content item in the 'content' list must contain a 'type' field.",
message="Each content item in the 'content' list must contain a 'type' field.",
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.INVALID_VALUE,
target=self.error_target,
)

if role in [MessageRole.USER, MessageRole.SYSTEM]:
if role in [MessageRole.USER, MessageRole.SYSTEM, MessageRole.DEVELOPER]:
error = self._validate_user_or_system_message(message, role)
if error:
return error
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""
Utilities for resolving evaluation levels and reshaping query/response/messages inputs.
"""

from typing import List, Optional, Tuple, Union
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
from ._validation_constants import MessageRole, EvaluationLevel


def _resolve_evaluation_level(
evaluation_level: Optional[Union[EvaluationLevel, str]],
error_target: ErrorTarget,
) -> Optional[EvaluationLevel]:
"""Validate and normalize the evaluation_level parameter.

:param evaluation_level: The evaluation level to resolve.
:type evaluation_level: Optional[Union[EvaluationLevel, str]]
:param error_target: The error target for exceptions.
:type error_target: ErrorTarget
:return: The resolved EvaluationLevel or None for auto-detect.
:rtype: Optional[EvaluationLevel]
"""
valid = [level.value for level in EvaluationLevel]
if evaluation_level is None or evaluation_level == "":
return None
if isinstance(evaluation_level, EvaluationLevel):
return evaluation_level
if isinstance(evaluation_level, str):
try:
return EvaluationLevel(evaluation_level)
except ValueError as exc:
raise EvaluationException(
message=(f"Invalid evaluation_level '{evaluation_level}'. " f"Must be one of: {valid}."),
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.INVALID_VALUE,
target=error_target,
) from exc
raise EvaluationException(
message=(f"Invalid evaluation_level '{evaluation_level}'. " f"Must be one of: {valid}."),
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.INVALID_VALUE,
target=error_target,
)


def _merge_query_response_messages(query: List[dict], response: List[dict]) -> List[dict]:
"""Merge query and response message lists into a single conversation."""
return [*query, *response]


def _split_messages_at_latest_user(messages: List[dict]) -> Tuple[List[dict], List[dict]]:
"""Split messages into query/response slices at the latest user turn."""
latest_user_index = max(
(i for i, message in enumerate(messages) if message.get("role") == MessageRole.USER.value),
default=-1,
)
if latest_user_index == -1:
raise ValueError("messages must contain at least one message with role 'user'.")
return messages[: latest_user_index + 1], messages[latest_user_index + 1 :]
Comment thread
Copilot marked this conversation as resolved.


def _wrap_string_messages(query: str, response: str) -> Tuple[List[dict], List[dict]]:
"""Wrap string query/response into separate message lists."""
return (
[{"role": "user", "content": [{"type": "text", "text": query}]}],
[{"role": "assistant", "content": [{"type": "text", "text": response}]}],
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""
Validator that supports both single-turn (query/response) and multi-turn (messages) inputs.
"""

from typing import Any, Dict
from typing_extensions import override
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
from ._validation_constants import MessageRole, ContentType
from ._conversation_validator import ConversationValidator
from ._tool_definitions_validator import ToolDefinitionsValidator


class MessagesOrQueryResponseInputValidator(ToolDefinitionsValidator):
"""Validator that supports both single-turn (query/response) and multi-turn (messages) inputs.

A single implementation serves all evaluators via two behavior flags:
- ``enforce_tool_definitions`` (default True): validate ``tool_definitions`` in both the
messages path and the query/response path. Set False for evaluators that do not accept
tool definitions (parity with a plain ``ConversationValidator``).
- ``deep_validate_messages`` (default False): additionally run full per-message
``_validate_message_dict`` checks in the messages path.
"""

enforce_tool_definitions: bool = True
deep_validate_messages: bool = False

def __init__(
self,
error_target: ErrorTarget,
requires_query: bool = True,
optional_tool_definitions: bool = True,
check_for_unsupported_tools: bool = False,
*,
enforce_tool_definitions: bool = True,
deep_validate_messages: bool = False,
):
"""Initialize MessagesOrQueryResponseInputValidator."""
super().__init__(error_target, requires_query, optional_tool_definitions, check_for_unsupported_tools)
self.enforce_tool_definitions = enforce_tool_definitions
self.deep_validate_messages = deep_validate_messages

@override
def validate_eval_input(self, eval_input: Dict[str, Any]) -> bool:
"""Validate evaluation input, supporting messages as an alternative to query/response."""
# Multi-turn path (messages list)
messages = eval_input.get("messages")
if messages is not None:
if not isinstance(messages, list):
raise EvaluationException(
message="messages must be provided as a list of message dictionaries.",
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.INVALID_VALUE,
target=self.error_target,
)
if len(messages) == 0:
raise EvaluationException(
message="messages list must not be empty.",
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.INVALID_VALUE,
target=self.error_target,
)

# Per-message structural checks
valid_roles = {role.value for role in MessageRole}
roles_present = set()
for index, message in enumerate(messages):
if not isinstance(message, dict):
raise EvaluationException(
message=(
f"Each item in 'messages' must be a dictionary, "
f"but item at index {index} is {type(message).__name__}."
),
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.INVALID_VALUE,
target=self.error_target,
)
role = message.get("role")
if role is None:
raise EvaluationException(
message=f"Each message must contain a 'role' key, but message at index {index} is missing it.",
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.INVALID_VALUE,
target=self.error_target,
)
if role not in valid_roles:
raise EvaluationException(
message=(
f"Invalid role '{role}' at message index {index}. "
f"Must be one of: {sorted(valid_roles)}."
),
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.INVALID_VALUE,
target=self.error_target,
)
roles_present.add(role)
Comment thread
m7md7sien marked this conversation as resolved.

# Conversation-level checks
if MessageRole.USER.value not in roles_present:
raise EvaluationException(
message="messages must contain at least one message with role 'user'.",
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.INVALID_VALUE,
target=self.error_target,
)
if MessageRole.ASSISTANT.value not in roles_present:
raise EvaluationException(
message="messages must contain at least one message with role 'assistant'.",
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.INVALID_VALUE,
target=self.error_target,
)
# The final assistant message must contain text
last_content = messages[-1].get("content", "")
if isinstance(last_content, list):
has_text = any(
(
isinstance(content_item, dict)
and content_item.get("type")
in (
ContentType.TEXT,
ContentType.INPUT_TEXT,
ContentType.OUTPUT_TEXT,
)
)
or isinstance(content_item, str)
for content_item in last_content
)
if not has_text:
raise EvaluationException(
message=(
"The last message must contain text content, "
"not only tool calls. The conversation appears to be "
"mid-execution \u2014 provide the agent's final text response."
),
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.INVALID_VALUE,
target=self.error_target,
)

if self.deep_validate_messages:
for message in messages:
error = self._validate_message_dict(message)
if error:
raise error

if self.enforce_tool_definitions:
tool_definitions = eval_input.get("tool_definitions")
tool_definitions_validation_exception = self._validate_tool_definitions(tool_definitions)
if tool_definitions_validation_exception:
raise tool_definitions_validation_exception
return True

if self.enforce_tool_definitions:
return super().validate_eval_input(eval_input)
return ConversationValidator.validate_eval_input(self, eval_input)
Loading
Loading