forked from ggml-org/llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathchat-peg-parser.h
More file actions
195 lines (160 loc) · 10.4 KB
/
chat-peg-parser.h
File metadata and controls
195 lines (160 loc) · 10.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
#pragma once
#include "chat.h"
#include "peg-parser.h"
#include <map>
#include <optional>
#include <vector>
class common_chat_peg_mapper {
public:
common_chat_msg & result;
common_chat_peg_mapper(common_chat_msg & msg) : result(msg) {}
virtual ~common_chat_peg_mapper() = default;
virtual void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
virtual void map(const common_peg_ast_node & node);
protected:
virtual std::string normalize_container_value(const std::string & input);
private:
// Tool call handling state
std::optional<common_chat_tool_call> pending_tool_call; // Tool call waiting for name
common_chat_tool_call * current_tool = nullptr;
int arg_count = 0;
bool closing_quote_pending = false;
std::string args_buffer; // Buffer to delay arguments until tool name is known
// Returns a reference to the active argument destination string.
// Before tool_name is known, writes go to args_buffer; after, to current_tool->arguments.
std::string & args_target();
};
class common_chat_peg_gemma4_mapper : public common_chat_peg_mapper {
public:
common_chat_peg_gemma4_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
virtual void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
private:
void visit(const common_peg_ast_arena & arena, common_peg_ast_id id);
};
struct content_structure;
struct tool_call_structure;
class common_chat_peg_builder : public common_peg_parser_builder {
public:
// Tag constants (from former common_chat_peg_base_builder)
static constexpr const char * REASONING_BLOCK = "reasoning-block";
static constexpr const char * REASONING = "reasoning";
static constexpr const char * CONTENT = "content";
// Tag constants
static constexpr const char * TOOL = "tool";
static constexpr const char * TOOL_OPEN = "tool-open";
static constexpr const char * TOOL_CLOSE = "tool-close";
static constexpr const char * TOOL_ID = "tool-id";
static constexpr const char * TOOL_NAME = "tool-name";
static constexpr const char * TOOL_ARGS = "tool-args";
static constexpr const char * TOOL_ARG = "tool-arg";
static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open";
static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close";
static constexpr const char * TOOL_ARG_NAME = "tool-arg-name";
static constexpr const char * TOOL_ARG_VALUE = "tool-arg-value";
static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value"; // For schema-declared string types
// Low-level tag methods (from former common_chat_peg_base_builder)
common_peg_parser reasoning_block(const common_peg_parser & p) { return tag(REASONING_BLOCK, p); }
common_peg_parser reasoning(const common_peg_parser & p) { return tag(REASONING, p); }
common_peg_parser content(const common_peg_parser & p) { return tag(CONTENT, p); }
common_peg_parser tag_with_safe_content(const std::string & tag_name,
const std::string & marker,
const common_peg_parser & p);
// Low-level tag methods
common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
common_peg_parser tool_arg(const common_peg_parser & p) { return tag(TOOL_ARG, p); }
common_peg_parser tool_arg_open(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_OPEN, p)); }
common_peg_parser tool_arg_close(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_CLOSE, p)); }
common_peg_parser tool_arg_name(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_NAME, p)); }
common_peg_parser tool_arg_value(const common_peg_parser & p) { return tag(TOOL_ARG_VALUE, p); }
// Use for schema-declared string types - won't be treated as potential JSON container
common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); }
common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_VALUE, p)); }
// Return a parser that parses the prefix of a string, up to a given delimiter.
common_peg_parser prefix(const std::string & s, const std::string & delimiter = {});
// Legacy-compatible helper for building standard JSON tool calls
// Used by tests and manual parsers
// name_key/args_key: JSON key names for function name and arguments
// Empty or "name"/"arguments" will accept both common variations
// Supports dot notation for nested objects (e.g., "function.name")
// array_wrapped: if true, tool calls are wrapped in JSON array [...]
// function_is_key: if true, function name is the JSON key (e.g., {"func_name": {...}})
// call_id_key: JSON key for string call ID (e.g., "id")
// gen_call_id_key: JSON key for generated integer call ID (e.g., "tool_call_id")
// parameters_order: order in which JSON fields should be parsed
common_peg_parser standard_json_tools(const std::string & section_start,
const std::string & section_end,
const nlohmann::ordered_json & tools,
bool parallel_tool_calls,
bool force_tool_calls,
const std::string & name_key = "",
const std::string & args_key = "",
bool array_wrapped = false,
bool function_is_key = false,
const std::string & call_id_key = "",
const std::string & gen_call_id_key = "",
const std::vector<std::string> & parameters_order = {});
// Legacy-compatible helper for building XML/tagged style tool calls
// Used by tests and manual parsers
common_peg_parser standard_constructed_tools(const std::map<std::string, std::string> & markers,
const nlohmann::ordered_json & tools,
bool parallel_tool_calls,
bool force_tool_calls);
// Helper for Python-style function call format: name(arg1="value1", arg2=123)
// Used by LFM2 and similar templates
common_peg_parser python_style_tool_calls(const nlohmann::ordered_json & tools,
bool parallel_tool_calls);
private:
// Implementation helpers for standard_json_tools — one per JSON tool call layout mode
common_peg_parser build_json_tools_function_is_key(const nlohmann::ordered_json & tools,
const std::string & args_key,
const std::string & effective_args_key,
const std::string & call_id_key,
const std::string & gen_call_id_key);
common_peg_parser build_json_tools_nested_keys(const nlohmann::ordered_json & tools,
const std::string & effective_name_key,
const std::string & effective_args_key,
const std::string & call_id_key,
const std::string & gen_call_id_key);
common_peg_parser build_json_tools_flat_keys(const nlohmann::ordered_json & tools,
const std::string & effective_name_key,
const std::string & effective_args_key,
const std::string & call_id_key,
const std::string & gen_call_id_key,
const std::vector<std::string> & parameters_order);
};
inline common_peg_arena build_chat_peg_parser(
const std::function<common_peg_parser(common_chat_peg_builder & builder)> & fn) {
common_chat_peg_builder builder;
builder.set_root(fn(builder));
return builder.build();
}
class tag_based_peg_mapper {
public:
std::map<std::string, std::string> tags;
void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
};
struct tagged_parse_result {
common_peg_parse_result result;
std::map<std::string, std::string> tags;
};
struct tagged_peg_parser {
common_peg_arena arena;
common_peg_parse_flags flags = COMMON_PEG_PARSE_FLAG_NONE;
tagged_peg_parser & withDebug() {
flags |= COMMON_PEG_PARSE_FLAG_DEBUG;
return *this;
}
tagged_peg_parser & withoutDebug() {
flags = flags & ~COMMON_PEG_PARSE_FLAG_DEBUG;
return *this;
}
tagged_parse_result parse_and_extract(const std::string & input, common_peg_parse_flags extra_flags = COMMON_PEG_PARSE_FLAG_NONE) const;
tagged_parse_result parse_anywhere_and_extract(const std::string & input) const;
};
tagged_peg_parser build_tagged_peg_parser(
const std::function<common_peg_parser(common_peg_parser_builder & builder)> & fn);