Skip to content

Commit 1fa83dd

Browse files
committed
update examples
1 parent b6ef4c0 commit 1fa83dd

File tree

2 files changed

+298
-0
lines changed

2 files changed

+298
-0
lines changed

examples/basic/1d-screen-click.py

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
"""
2+
3+
A Bit-Shooter Game played on a 1-dimensional binary screen.
4+
5+
Given an LLM Agent access to a 1-dimensional "screen" represented
6+
as a string of bits (0s and 1s), e.g. "101010",
7+
and equip it with a "Click tool" (like a mouse click) that allows it to
8+
click on a bit -- clicking the bit causes it to flip.
9+
10+
The Agent plays a "Bit Shooter" game where the goal is to get rid of all
11+
1s in the "screen".
12+
13+
To use the Click tool, the Agent must specify the position (zero-based)
14+
where it wants to click. This causes the bit to flip.
15+
The LLM is then presented with the new state of the screen,
16+
and the process repeats until all 1s are gone.
17+
18+
Clearly the Agent (LLM) needs to be able to accurately count the bit positions,
19+
to be able to correctly click on the 1s.
20+
21+
Run like this (--model is optional, defaults to GPT4o):
22+
23+
python3 examples/basic/1d-screen-click.py --model litellm/anthropic/claude-3-5-sonnet-20241022
24+
25+
At the beginning you get to specify the initial state of the screen:
26+
- size of the screen (how many bits)
27+
- the (0-based) locations of the 1s (SPACE-separated) in the screen.
28+
29+
E.g. try this:
30+
- size = 50,
31+
- 1-indices: 0 20 30 40
32+
33+
The loop is set to run in interactive mode (to prevent runaway loops),
34+
so you have to keep hitting enter to see the LLM's next move.
35+
36+
The main observation is that when you run it with claude-3.5-sonnet,
37+
the accuracy of the Agent's clicks is far superior to other LLMs like GPT-4o
38+
and even GPT-4.
39+
40+
To try with other LLMs, you can set the --model param to, for example:
41+
- gpt-4 (set OPENAI_API_KEY in your env or .env file)
42+
- gpt-4o (ditto, set OPENAI_API_KEY)
43+
- groq/llama-3.1-70b-versatile (set GROQ_API_KEY in your env or .env file)
44+
- cerebras/llama3.1-70b (set CEREBRAS_API_KEY in your env or .env file)
45+
- ollama/qwen2.5-coder:latest
46+
47+
See here for a full guide on local/open LLM setup with Langroid:
48+
https://langroid.github.io/langroid/tutorials/local-llm-setup/
49+
And here for how to use with other non-OpenAPI LLMs:
50+
https://langroid.github.io/langroid/tutorials/non-openai-llms/
51+
"""
52+
53+
from typing import List, Tuple
54+
55+
from mypy.dmypy.client import show_stats
56+
57+
import langroid as lr
58+
import langroid.language_models as lm
59+
from langroid.agent.tools.orchestration import AgentDoneTool
60+
from langroid.pydantic_v1 import BaseModel
61+
from langroid.utils.globals import GlobalState
62+
from rich.prompt import Prompt
63+
import fire
64+
65+
66+
class ScreenState(BaseModel):
67+
"""
68+
Represents the state of the 1-dimensional binary screen
69+
"""
70+
71+
screen: str | None = None # binary string, e.g. "101010"
72+
73+
def __init__(
74+
self,
75+
one_indices: List[int] = [1],
76+
size: int = 1,
77+
):
78+
super().__init__()
79+
# Initialize with all zeros
80+
screen_list = ["0"] * size
81+
82+
# Set 1s at specified indices
83+
for idx in one_indices:
84+
if 0 <= idx < size:
85+
screen_list[idx] = "1"
86+
87+
# Join into string
88+
self.screen = "".join(screen_list)
89+
90+
@classmethod
91+
def set_state(
92+
cls,
93+
one_indices: List[int],
94+
size: int,
95+
) -> "ScreenState":
96+
"""
97+
Factory method to create and set initial state.
98+
"""
99+
initial_state = cls(
100+
one_indices=one_indices,
101+
size=size,
102+
)
103+
GlobalScreenState.set_values(state=initial_state)
104+
105+
def flip(self, i: int):
106+
"""
107+
Flip the i-th bit
108+
"""
109+
if self.screen is None or i < 0 or i >= len(self.screen):
110+
return
111+
112+
screen_list = list(self.screen)
113+
screen_list[i] = "1" if screen_list[i] == "0" else "0"
114+
self.screen = "".join(screen_list)
115+
116+
117+
class GlobalScreenState(GlobalState):
118+
state: ScreenState = ScreenState()
119+
120+
121+
def get_state() -> ScreenState:
122+
return GlobalScreenState.get_value("state")
123+
124+
125+
class ClickTool(lr.ToolMessage):
126+
request: str = "click_tool"
127+
purpose: str = """
128+
To click at <position> on the 1-dimensional binary screen,
129+
which causes the bit at that position to FLIP.
130+
IMPORTANT: the position numbering starts from 0!!!
131+
"""
132+
133+
position: int
134+
135+
@classmethod
136+
def examples(cls) -> List[lr.ToolMessage | Tuple[str, lr.ToolMessage]]:
137+
return [
138+
cls(position=3),
139+
(
140+
"I want to click at position 5",
141+
cls(position=5),
142+
),
143+
]
144+
145+
def handle(self) -> str | AgentDoneTool:
146+
state = get_state()
147+
state.flip(self.position)
148+
print("SCREEN STATE = ", state.screen)
149+
if "1" not in state.screen:
150+
return AgentDoneTool()
151+
return state.screen
152+
153+
154+
def main(model: str = ""):
155+
llm_config = lm.OpenAIGPTConfig(
156+
chat_model=model or lm.OpenAIChatModel.GPT4o,
157+
)
158+
click_tool_name = ClickTool.default_value("request")
159+
agent = lr.ChatAgent(
160+
lr.ChatAgentConfig(
161+
name="Clicker",
162+
llm=llm_config,
163+
use_functions_api=False,
164+
use_tools=True,
165+
show_stats=False,
166+
system_message=f"""
167+
You are an expert at COMPUTER USE.
168+
In this task you only have to be able to understand a 1-dimensional
169+
screen presented to you as a string of bits (0s and 1s).
170+
You will play a 1-dimensional BIT-shooter game!
171+
172+
Your task is to CLICK ON THE LEFTMOST 1 in the bit-string,
173+
to flip it to a 0.
174+
175+
Always try to click on the LEFTMOST 1 in the bit-sequence.
176+
177+
To CLICK on the screen you
178+
must use the TOOL `{click_tool_name}` where the
179+
`position` field specifies the position (zero-based) to click.
180+
If you CORRECTLY click on a 1, the bit at that position will be
181+
turned to 0.
182+
But if you click on a 0, it will turn into a 1,
183+
taking you further from your goal.
184+
185+
So you MUST ACCURATELY specify the position of the LEFTMOST 1 to click,
186+
making SURE there is a 1 at that position.
187+
In other words, it is critical that you are able to ACCURATELY COUNT
188+
the bit positions so that you are able to correctly identify the position
189+
of the LEFTMOST 1 bit in the "screen" given to you as a string of bits.
190+
""",
191+
)
192+
)
193+
194+
agent.enable_message(ClickTool)
195+
196+
task = lr.Task(agent, interactive=True, only_user_quits_root=False)
197+
198+
# kick it off with initial screen state (set below by user)
199+
task.run(get_state())
200+
201+
202+
if __name__ == "__main__":
203+
size = int(Prompt.ask("Size of screen (how many bits)"))
204+
ones = Prompt.ask("Indices of 1s (SPACE-separated)").split(" ")
205+
ones = [int(x) for x in ones]
206+
ScreenState.set_state(ones, size)
207+
print("SCREEN STATE = ", get_state().screen)
208+
fire.Fire(main)

examples/basic/xml_tool.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
"""
2+
Example of defining a variant of an existing tool, but inheriting from XMLToolMessage,
3+
to have the LLM use XML rather than JSON to generate the tool.
4+
5+
This will not work with built-in functions/tools of OpenAI,
6+
so in the `ChatAgentConfig` , you have to set the following to ensure
7+
that Langroid's built-in XML Tool calls are activated:
8+
- `use_functions_api = False`
9+
- `use_tools = True`
10+
11+
Run like this (--model is optional, defaults to GPT4o):
12+
13+
python3 examples/basic/xml_tool.py --model groq/llama-3.1-8b-instant
14+
"""
15+
16+
import langroid as lr
17+
import langroid.language_models as lm
18+
from langroid.pydantic_v1 import Field
19+
from langroid.agent.tools.orchestration import SendTool
20+
from langroid.agent.xml_tool_message import XMLToolMessage
21+
import fire
22+
23+
24+
class XMLSendTool(SendTool, XMLToolMessage):
25+
"""
26+
Variant of SendTool, using XML rather than JSON.
27+
"""
28+
29+
request: str = "xml_send_tool"
30+
purpose: str = """
31+
To send <content> to an entity/agent identified in the <to> field.
32+
"""
33+
34+
content: str = Field(
35+
...,
36+
description="The content to send",
37+
verbatim=True, # enforces content enclosed within CDATA block in xml.
38+
)
39+
to: str
40+
41+
42+
xml_send_tool_name = XMLSendTool.default_value("request")
43+
44+
45+
def main(model: str = ""):
46+
llm_config = lm.OpenAIGPTConfig(
47+
chat_model=model or lm.OpenAIChatModel.GPT4o,
48+
)
49+
alice = lr.ChatAgent(
50+
lr.ChatAgentConfig(
51+
name="Alice",
52+
llm=llm_config,
53+
use_functions_api=False,
54+
use_tools=True,
55+
system_message=f"""
56+
Whatever number you receive, send it to Bob using the
57+
`{xml_send_tool_name}` tool.
58+
""",
59+
)
60+
)
61+
62+
bob = lr.ChatAgent(
63+
lr.ChatAgentConfig(
64+
name="Bob",
65+
llm=llm_config,
66+
use_functions_api=False,
67+
use_tools=True,
68+
system_message=f"""
69+
Whatever number you receive, add 1 to it and send
70+
the result back to Alice
71+
using the `{xml_send_tool_name}` tool.
72+
""",
73+
)
74+
)
75+
76+
alice.enable_message(XMLSendTool)
77+
bob.enable_message(XMLSendTool)
78+
79+
# specialize alice_task to return an int
80+
alice_task = lr.Task(alice, interactive=False)[int]
81+
bob_task = lr.Task(bob, interactive=False)
82+
83+
alice_task.add_sub_task(bob_task)
84+
85+
result = alice_task.run("5", turns=6)
86+
assert result == 7
87+
88+
89+
if __name__ == "__main__":
90+
fire.Fire(main)

0 commit comments

Comments
 (0)