-
Notifications
You must be signed in to change notification settings - Fork 0
/
lilianweng_post_agents.html
884 lines (881 loc) · 269 KB
/
lilianweng_post_agents.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
<!DOCTYPE html>
<html>
<body id="viewsource" class="highlight" style="tab-size: 4">
<pre id="line1">
<span id="line10"></span></span><span><<span class="start-tag">meta</span> <span class="attribute-name">name</span>="<a class="attribute-value">description</a>" <span class="attribute-name">content</span>="<a class="attribute-value">Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
<span id="line11"></span>Agent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent<span class="entity"><span>&</span>rsquo;</span>s brain, complemented by several key components:</a>"></span><span>
<span id="line12"></span></span><span><<span class="start-tag">meta</span> <span class="attribute-name">name</span>="<a class="attribute-value">author</a>" <span class="attribute-name">content</span>="<a class="attribute-value">Lilian Weng</a>"></span><span>
<span id="line13"></span></span><span><<span class="start-tag">link</span> <span class="attribute-name">rel</span>="<a class="attribute-value">canonical</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/">https://lilianweng.github.io/posts/2023-06-23-agent/</a>" <span>/</span>></span><span>
<span id="line14"></span></span><span><<span class="start-tag">link</span> <span class="attribute-name">crossorigin</span>="<a class="attribute-value">anonymous</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/assets/css/stylesheet.min.67a6fb6e33089cb29e856bcc95d7aa39f70049a42b123105531265a0d9f1258b.css">/assets/css/stylesheet.min.67a6fb6e33089cb29e856bcc95d7aa39f70049a42b123105531265a0d9f1258b.css</a>" <span class="attribute-name">integrity</span>="<a class="attribute-value">sha256-Z6b7bjMInLKehWvMldeqOfcASaQrEjEFUxJloNnxJYs=</a>" <span class="attribute-name">rel</span>="<a class="attribute-value">preload stylesheet</a>" <span class="attribute-name">as</span>="<a class="attribute-value">style</a>"></span><span>
<span id="line15"></span></span><span><<span class="start-tag">script</span> <span class="attribute-name">defer</span> <span class="attribute-name">crossorigin</span>="<a class="attribute-value">anonymous</a>" <span class="attribute-name">src</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/assets/js/highlight.min.5b9ae0304f93db6cc493f51846f012428af399c614b4f2fbdb7fa59dd4d5ef5b.js">/assets/js/highlight.min.5b9ae0304f93db6cc493f51846f012428af399c614b4f2fbdb7fa59dd4d5ef5b.js</a>" <span class="attribute-name">integrity</span>="<a class="attribute-value">sha256-W5rgME<span class="entity"><span>&</span>#43;</span>T22zEk/UYRvASQorzmcYUtPL723<span class="entity"><span>&</span>#43;</span>lndTV71s=</a>"
<span id="line16"></span> <span class="attribute-name">onload</span>="<a class="attribute-value">hljs.initHighlightingOnLoad();</a>"></span><span></span><span></<span class="end-tag">script</span>></span><span>
<span id="line17"></span></span><span><<span class="start-tag">link</span> <span class="attribute-name">rel</span>="<a class="attribute-value">icon</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/favicon_peach.ico">https://lilianweng.github.io/favicon_peach.ico</a>"></span><span>
<span id="line18"></span></span><span><<span class="start-tag">link</span> <span class="attribute-name">rel</span>="<a class="attribute-value">icon</a>" <span class="attribute-name">type</span>="<a class="attribute-value">image/png</a>" <span class="attribute-name">sizes</span>="<a class="attribute-value">16x16</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/favicon-16x16.png">https://lilianweng.github.io/favicon-16x16.png</a>"></span><span>
<span id="line19"></span></span><span><<span class="start-tag">link</span> <span class="attribute-name">rel</span>="<a class="attribute-value">icon</a>" <span class="attribute-name">type</span>="<a class="attribute-value">image/png</a>" <span class="attribute-name">sizes</span>="<a class="attribute-value">32x32</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/favicon-32x32.png">https://lilianweng.github.io/favicon-32x32.png</a>"></span><span>
<span id="line20"></span></span><span><<span class="start-tag">link</span> <span class="attribute-name">rel</span>="<a class="attribute-value">apple-touch-icon</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/apple-touch-icon.png">https://lilianweng.github.io/apple-touch-icon.png</a>"></span><span>
<span id="line21"></span></span><span><<span class="start-tag">link</span> <span class="attribute-name">rel</span>="<a class="attribute-value">mask-icon</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/safari-pinned-tab.svg">https://lilianweng.github.io/safari-pinned-tab.svg</a>"></span><span>
<span id="line22"></span></span><span><<span class="start-tag">meta</span> <span class="attribute-name">name</span>="<a class="attribute-value">theme-color</a>" <span class="attribute-name">content</span>="<a class="attribute-value">#2e2e33</a>"></span><span>
<span id="line23"></span></span><span><<span class="start-tag">meta</span> <span class="attribute-name">name</span>="<a class="attribute-value">msapplication-TileColor</a>" <span class="attribute-name">content</span>="<a class="attribute-value">#2e2e33</a>"></span><span>
<span id="line24"></span></span><span><<span class="start-tag">noscript</span>></span><span>
<span id="line25"></span> </span><span><</span><span>style>
<span id="line26"></span> #theme-toggle,
<span id="line27"></span> .top-link {
<span id="line28"></span> display: none;
<span id="line29"></span> }
<span id="line30"></span>
<span id="line31"></span> </span><span></<span></span>s</span><span>tyle>
<span id="line32"></span> </span><span><</span><span>style>
<span id="line33"></span> @media (prefers-color-scheme: dark) {
<span id="line34"></span> :root {
<span id="line35"></span> --theme: rgb(29, 30, 32);
<span id="line36"></span> --entry: rgb(46, 46, 51);
<span id="line37"></span> --primary: rgb(218, 218, 219);
<span id="line38"></span> --secondary: rgb(155, 156, 157);
<span id="line39"></span> --tertiary: rgb(65, 66, 68);
<span id="line40"></span> --content: rgb(196, 196, 197);
<span id="line41"></span> --hljs-bg: rgb(46, 46, 51);
<span id="line42"></span> --code-bg: rgb(55, 56, 62);
<span id="line43"></span> --border: rgb(51, 51, 51);
<span id="line44"></span> }
<span id="line45"></span>
<span id="line46"></span> .list {
<span id="line47"></span> background: var(--theme);
<span id="line48"></span> }
<span id="line49"></span>
<span id="line50"></span> .list:not(.dark)::-webkit-scrollbar-track {
<span id="line51"></span> background: 0 0;
<span id="line52"></span> }
<span id="line53"></span>
<span id="line54"></span> .list:not(.dark)::-webkit-scrollbar-thumb {
<span id="line55"></span> border-color: var(--theme);
<span id="line56"></span> }
<span id="line57"></span> }
<span id="line58"></span>
<span id="line59"></span> </span><span></<span></span>s</span><span>tyle>
<span id="line60"></span></span><span></<span class="end-tag">noscript</span>></span><span>
<span id="line61"></span></span><span><<span class="start-tag">script</span> <span class="attribute-name">async</span> <span class="attribute-name">src</span>="<a class="attribute-value" href="view-source:https://www.googletagmanager.com/gtag/js?id=G-HFT45VFBX6">https://www.googletagmanager.com/gtag/js?id=G-HFT45VFBX6</a>"></span><span></span><span></<span class="end-tag">script</span>></span><span>
<span id="line62"></span></span><span><<span class="start-tag">script</span>></span><span>
<span id="line63"></span>var doNotTrack = false;
<span id="line64"></span>if (!doNotTrack) {
<span id="line65"></span> window.dataLayer = window.dataLayer || [];
<span id="line66"></span> function gtag(){dataLayer.push(arguments);}
<span id="line67"></span> gtag('js', new Date());
<span id="line68"></span> gtag('config', 'G-HFT45VFBX6', { 'anonymize_ip': false });
<span id="line69"></span>}
<span id="line70"></span></span><span></<span class="end-tag">script</span>></span><span>
<span id="line71"></span></span><span><<span class="start-tag">meta</span> <span class="attribute-name">property</span>="<a class="attribute-value">og:title</a>" <span class="attribute-name">content</span>="<a class="attribute-value">LLM Powered Autonomous Agents</a>" <span>/</span>></span><span>
<span id="line72"></span></span><span><<span class="start-tag">meta</span> <span class="attribute-name">property</span>="<a class="attribute-value">og:description</a>" <span class="attribute-name">content</span>="<a class="attribute-value">Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
<span id="line73"></span>Agent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent<span class="entity"><span>&</span>rsquo;</span>s brain, complemented by several key components:</a>" <span>/</span>></span><span>
<span id="line74"></span></span><span><<span class="start-tag">meta</span> <span class="attribute-name">property</span>="<a class="attribute-value">og:type</a>" <span class="attribute-name">content</span>="<a class="attribute-value">article</a>" <span>/</span>></span><span>
<span id="line75"></span></span><span><<span class="start-tag">meta</span> <span class="attribute-name">property</span>="<a class="attribute-value">og:url</a>" <span class="attribute-name">content</span>="<a class="attribute-value">https://lilianweng.github.io/posts/2023-06-23-agent/</a>" <span>/</span>></span><span></span><span><<span class="start-tag">meta</span> <span class="attribute-name">property</span>="<a class="attribute-value">article:section</a>" <span class="attribute-name">content</span>="<a class="attribute-value">posts</a>" <span>/</span>></span><span>
<span id="line76"></span></span><span><<span class="start-tag">meta</span> <span class="attribute-name">property</span>="<a class="attribute-value">article:published_time</a>" <span class="attribute-name">content</span>="<a class="attribute-value">2023-06-23T00:00:00<span class="entity"><span>&</span>#43;</span>00:00</a>" <span>/</span>></span><span>
<span id="line77"></span></span><span><<span class="start-tag">meta</span> <span class="attribute-name">property</span>="<a class="attribute-value">article:modified_time</a>" <span class="attribute-name">content</span>="<a class="attribute-value">2023-06-23T00:00:00<span class="entity"><span>&</span>#43;</span>00:00</a>" <span>/</span>></span><span>
<span id="line78"></span>
<span id="line79"></span></span><span><<span class="start-tag">meta</span> <span class="attribute-name">name</span>="<a class="attribute-value">twitter:card</a>" <span class="attribute-name">content</span>="<a class="attribute-value">summary</a>"<span>/</span>></span><span>
<span id="line80"></span></span><span><<span class="start-tag">meta</span> <span class="attribute-name">name</span>="<a class="attribute-value">twitter:title</a>" <span class="attribute-name">content</span>="<a class="attribute-value">LLM Powered Autonomous Agents</a>"<span>/</span>></span><span>
<span id="line81"></span></span><span><<span class="start-tag">meta</span> <span class="attribute-name">name</span>="<a class="attribute-value">twitter:description</a>" <span class="attribute-name">content</span>="<a class="attribute-value">Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
<span id="line82"></span>Agent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent<span class="entity"><span>&</span>rsquo;</span>s brain, complemented by several key components:</a>"<span>/</span>></span><span>
<span id="line83"></span>
<span id="line84"></span>
<span id="line85"></span></span><span><<span class="start-tag">script</span> <span class="attribute-name">type</span>="<a class="attribute-value">application/ld+json</a>"></span><span>
<span id="line86"></span>{
<span id="line87"></span> "@context": "https://schema.org",
<span id="line88"></span> "@type": "BreadcrumbList",
<span id="line89"></span> "itemListElement": [
<span id="line90"></span> {
<span id="line91"></span> "@type": "ListItem",
<span id="line92"></span> "position": 1 ,
<span id="line93"></span> "name": "Posts",
<span id="line94"></span> "item": "https://lilianweng.github.io/posts/"
<span id="line95"></span> },
<span id="line96"></span> {
<span id="line97"></span> "@type": "ListItem",
<span id="line98"></span> "position": 2 ,
<span id="line99"></span> "name": "LLM Powered Autonomous Agents",
<span id="line100"></span> "item": "https://lilianweng.github.io/posts/2023-06-23-agent/"
<span id="line101"></span> }
<span id="line102"></span> ]
<span id="line103"></span>}
<span id="line104"></span></span><span></<span class="end-tag">script</span>></span><span>
<span id="line105"></span></span><span><<span class="start-tag">script</span> <span class="attribute-name">type</span>="<a class="attribute-value">application/ld+json</a>"></span><span>
<span id="line106"></span>{
<span id="line107"></span> "@context": "https://schema.org",
<span id="line108"></span> "@type": "BlogPosting",
<span id="line109"></span> "headline": "LLM Powered Autonomous Agents",
<span id="line110"></span> "name": "LLM Powered Autonomous Agents",
<span id="line111"></span> "description": "Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent\u0026rsquo;s brain, complemented by several key components:",
<span id="line112"></span> "keywords": [
<span id="line113"></span> "nlp", "language-model", "agent", "steerability", "prompting"
<span id="line114"></span> ],
<span id="line115"></span> "articleBody": "Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\nPlanning Subgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks. Reflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results. Memory Short-term memory: I would consider all the in-context learning (See Prompt Engineering) as utilizing short-term memory of the model to learn. Long-term memory: This provides the agent with the capability to retain and recall (infinite) information over extended periods, often by leveraging an external vector store and fast retrieval. Tool use The agent learns to call external APIs for extra information that is missing from the model weights (often hard to change after pre-training), including current information, code execution capability, access to proprietary information sources and more. Fig. 1. Overview of a LLM-powered autonomous agent system. Component One: Planning A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition Chain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.\nAnother quite distinct approach, LLM+P (Liu et al. 2023), involves relying on an external classical planner to do long-horizon planning. This approach utilizes the Planning Domain Definition Language (PDDL) as an intermediate interface to describe the planning problem. In this process, LLM (1) translates the problem into “Problem PDDL”, then (2) requests a classical planner to generate a PDDL plan based on an existing “Domain PDDL”, and finally (3) translates the PDDL plan back into natural language. Essentially, the planning step is outsourced to an external tool, assuming the availability of domain-specific PDDL and a suitable planner which is common in certain robotic setups but not in many other domains.\nSelf-Reflection Self-reflection is a vital aspect that allows autonomous agents to improve iteratively by refining past action decisions and correcting previous mistakes. It plays a crucial role in real-world tasks where trial and error are inevitable.\nReAct (Yao et al. 2023) integrates reasoning and acting within LLM by extending the action space to be a combination of task-specific discrete actions and the language space. The former enables LLM to interact with the environment (e.g. use Wikipedia search API), while the latter prompting LLM to generate reasoning traces in natural language.\nThe ReAct prompt template incorporates explicit steps for LLM to think, roughly formatted as:\nThought: ... Action: ... Observation: ... ... (Repeated many times) Fig. 2. Examples of reasoning trajectories for knowledge-intensive tasks (e.g. HotpotQA, FEVER) and decision-making tasks (e.g. AlfWorld Env, WebShop). (Image source: Yao et al. 2023). In both experiments on knowledge-intensive tasks and decision-making tasks, ReAct works better than the Act-only baseline where Thought: … step is removed.\nReflexion (Shinn \u0026 Labash 2023) is a framework to equips agents with dynamic memory and self-reflection capabilities to improve reasoning skills. Reflexion has a standard RL setup, in which the reward model provides a simple binary reward and the action space follows the setup in ReAct where the task-specific action space is augmented with language to enable complex reasoning steps. After each action $a_t$, the agent computes a heuristic $h_t$ and optionally may decide to reset the environment to start a new trial depending on the self-reflection results.\nFig. 3. Illustration of the Reflexion framework. (Image source: Shinn \u0026 Labash, 2023) The heuristic function determines when the trajectory is inefficient or contains hallucination and should be stopped. Inefficient planning refers to trajectories that take too long without success. Hallucination is defined as encountering a sequence of consecutive identical actions that lead to the same observation in the environment.\nSelf-reflection is created by showing two-shot examples to LLM and each example is a pair of (failed trajectory, ideal reflection for guiding future changes in the plan). Then reflections are added into the agent’s working memory, up to three, to be used as context for querying LLM.\nFig. 4. Experiments on AlfWorld Env and HotpotQA. Hallucination is a more common failure than inefficient planning in AlfWorld. (Image source: Shinn \u0026 Labash, 2023) Chain of Hindsight (CoH; Liu et al. 2023) encourages the model to improve on its own outputs by explicitly presenting it with a sequence of past outputs, each annotated with feedback. Human feedback data is a collection of $D_h = \\{(x, y_i , r_i , z_i)\\}_{i=1}^n$, where $x$ is the prompt, each $y_i$ is a model completion, $r_i$ is the human rating of $y_i$, and $z_i$ is the corresponding human-provided hindsight feedback. Assume the feedback tuples are ranked by reward, $r_n \\geq r_{n-1} \\geq \\dots \\geq r_1$ The process is supervised fine-tuning where the data is a sequence in the form of $\\tau_h = (x, z_i, y_i, z_j, y_j, \\dots, z_n, y_n)$, where $\\leq i \\leq j \\leq n$. The model is finetuned to only predict $y_n$ where conditioned on the sequence prefix, such that the model can self-reflect to produce better output based on the feedback sequence. The model can optionally receive multiple rounds of instructions with human annotators at test time.\nTo avoid overfitting, CoH adds a regularization term to maximize the log-likelihood of the pre-training dataset. To avoid shortcutting and copying (because there are many common words in feedback sequences), they randomly mask 0% - 5% of past tokens during training.\nThe training dataset in their experiments is a combination of WebGPT comparisons, summarization from human feedback and human preference dataset.\nFig. 5. After fine-tuning with CoH, the model can follow instructions to produce outputs with incremental improvement in a sequence. (Image source: Liu et al. 2023) The idea of CoH is to present a history of sequentially improved outputs in context and train the model to take on the trend to produce better outputs. Algorithm Distillation (AD; Laskin et al. 2023) applies the same idea to cross-episode trajectories in reinforcement learning tasks, where an algorithm is encapsulated in a long history-conditioned policy. Considering that an agent interacts with the environment many times and in each episode the agent gets a little better, AD concatenates this learning history and feeds that into the model. Hence we should expect the next predicted action to lead to better performance than previous trials. The goal is to learn the process of RL instead of training a task-specific policy itself.\nFig. 6. Illustration of how Algorithm Distillation (AD) works. (Image source: Laskin et al. 2023). The paper hypothesizes that any algorithm that generates a set of learning histories can be distilled into a neural network by performing behavioral cloning over actions. The history data is generated by a set of source policies, each trained for a specific task. At the training stage, during each RL run, a random task is sampled and a subsequence of multi-episode history is used for training, such that the learned policy is task-agnostic.\nIn reality, the model has limited context window length, so episodes should be short enough to construct multi-episode history. Multi-episodic contexts of 2-4 episodes are necessary to learn a near-optimal in-context RL algorithm. The emergence of in-context RL requires long enough context.\nIn comparison with three baselines, including ED (expert distillation, behavior cloning with expert trajectories instead of learning history), source policy (used for generating trajectories for distillation by UCB), RL^2 (Duan et al. 2017; used as upper bound since it needs online RL), AD demonstrates in-context RL with performance getting close to RL^2 despite only using offline RL and learns much faster than other baselines. When conditioned on partial training history of the source policy, AD also improves much faster than ED baseline.\nFig. 7. Comparison of AD, ED, source policy and RL^2 on environments that require memory and exploration. Only binary reward is assigned. The source policies are trained with A3C for \"dark\" environments and DQN for watermaze.(Image source: Laskin et al. 2023) Component Two: Memory (Big thank you to ChatGPT for helping me draft this section. I’ve learned a lot about the human brain and data structure for fast MIPS in my conversations with ChatGPT.)\nTypes of Memory Memory can be defined as the processes used to acquire, store, retain, and later retrieve information. There are several types of memory in human brains.\nSensory Memory: This is the earliest stage of memory, providing the ability to retain impressions of sensory information (visual, auditory, etc) after the original stimuli have ended. Sensory memory typically only lasts for up to a few seconds. Subcategories include iconic memory (visual), echoic memory (auditory), and haptic memory (touch).\nShort-Term Memory (STM) or Working Memory: It stores information that we are currently aware of and needed to carry out complex cognitive tasks such as learning and reasoning. Short-term memory is believed to have the capacity of about 7 items (Miller 1956) and lasts for 20-30 seconds.\nLong-Term Memory (LTM): Long-term memory can store information for a remarkably long time, ranging from a few days to decades, with an essentially unlimited storage capacity. There are two subtypes of LTM:\nExplicit / declarative memory: This is memory of facts and events, and refers to those memories that can be consciously recalled, including episodic memory (events and experiences) and semantic memory (facts and concepts). Implicit / procedural memory: This type of memory is unconscious and involves skills and routines that are performed automatically, like riding a bike or typing on a keyboard. Fig. 8. Categorization of human memory. We can roughly consider the following mappings:\nSensory memory as learning embedding representations for raw inputs, including text, image or other modalities; Short-term memory as in-context learning. It is short and finite, as it is restricted by the finite context window length of Transformer. Long-term memory as the external vector store that the agent can attend to at query time, accessible via fast retrieval. Maximum Inner Product Search (MIPS) The external memory can alleviate the restriction of finite attention span. A standard practice is to save the embedding representation of information into a vector store database that can support fast maximum inner-product search (MIPS). To optimize the retrieval speed, the common choice is the approximate nearest neighbors (ANN) algorithm to return approximately top k nearest neighbors to trade off a little accuracy lost for a huge speedup.\nA couple common choices of ANN algorithms for fast MIPS:\nLSH (Locality-Sensitive Hashing): It introduces a hashing function such that similar input items are mapped to the same buckets with high probability, where the number of buckets is much smaller than the number of inputs. ANNOY (Approximate Nearest Neighbors Oh Yeah): The core data structure are random projection trees, a set of binary trees where each non-leaf node represents a hyperplane splitting the input space into half and each leaf stores one data point. Trees are built independently and at random, so to some extent, it mimics a hashing function. ANNOY search happens in all the trees to iteratively search through the half that is closest to the query and then aggregates the results. The idea is quite related to KD tree but a lot more scalable. HNSW (Hierarchical Navigable Small World): It is inspired by the idea of small world networks where most nodes can be reached by any other nodes within a small number of steps; e.g. “six degrees of separation” feature of social networks. HNSW builds hierarchical layers of these small-world graphs, where the bottom layers contain the actual data points. The layers in the middle create shortcuts to speed up search. When performing a search, HNSW starts from a random node in the top layer and navigates towards the target. When it can’t get any closer, it moves down to the next layer, until it reaches the bottom layer. Each move in the upper layers can potentially cover a large distance in the data space, and each move in the lower layers refines the search quality. FAISS (Facebook AI Similarity Search): It operates on the assumption that in high dimensional space, distances between nodes follow a Gaussian distribution and thus there should exist clustering of data points. FAISS applies vector quantization by partitioning the vector space into clusters and then refining the quantization within clusters. Search first looks for cluster candidates with coarse quantization and then further looks into each cluster with finer quantization. ScaNN (Scalable Nearest Neighbors): The main innovation in ScaNN is anisotropic vector quantization. It quantizes a data point $x_i$ to $\\tilde{x}_i$ such that the inner product $\\langle q, x_i \\rangle$ is as similar to the original distance of $\\angle q, \\tilde{x}_i$ as possible, instead of picking the closet quantization centroid points. Fig. 9. Comparison of MIPS algorithms, measured in recall@10. (Image source: Google Blog, 2020) Check more MIPS algorithms and performance comparison in ann-benchmarks.com.\nComponent Three: Tool Use Tool use is a remarkable and distinguishing characteristic of human beings. We create, modify and utilize external objects to do things that go beyond our physical and cognitive limits. Equipping LLMs with external tools can significantly extend the model capabilities.\nFig. 10. A picture of a sea otter using rock to crack open a seashell, while floating in the water. While some other animals can use tools, the complexity is not comparable with humans. (Image source: Animals using tools) MRKL (Karpas et al. 2022), short for “Modular Reasoning, Knowledge and Language”, is a neuro-symbolic architecture for autonomous agents. A MRKL system is proposed to contain a collection of “expert” modules and the general-purpose LLM works as a router to route inquiries to the best suitable expert module. These modules can be neural (e.g. deep learning models) or symbolic (e.g. math calculator, currency converter, weather API).\nThey did an experiment on fine-tuning LLM to call a calculator, using arithmetic as a test case. Their experiments showed that it was harder to solve verbal math problems than explicitly stated math problems because LLMs (7B Jurassic1-large model) failed to extract the right arguments for the basic arithmetic reliably. The results highlight when the external symbolic tools can work reliably, knowing when to and how to use the tools are crucial, determined by the LLM capability.\nBoth TALM (Tool Augmented Language Models; Parisi et al. 2022) and Toolformer (Schick et al. 2023) fine-tune a LM to learn to use external tool APIs. The dataset is expanded based on whether a newly added API call annotation can improve the quality of model outputs. See more details in the “External APIs” section of Prompt Engineering.\nChatGPT Plugins and OpenAI API function calling are good examples of LLMs augmented with tool use capability working in practice. The collection of tool APIs can be provided by other developers (as in Plugins) or self-defined (as in function calls).\nHuggingGPT (Shen et al. 2023) is a framework to use ChatGPT as the task planner to select models available in HuggingFace platform according to the model descriptions and summarize the response based on the execution results.\nFig. 11. Illustration of how HuggingGPT works. (Image source: Shen et al. 2023) The system comprises of 4 stages:\n(1) Task planning: LLM works as the brain and parses the user requests into multiple tasks. There are four attributes associated with each task: task type, ID, dependencies, and arguments. They use few-shot examples to guide LLM to do task parsing and planning.\nInstruction:\nThe AI assistant can parse user input to several tasks: [{\"task\": task, \"id\", task_id, \"dep\": dependency_task_ids, \"args\": {\"text\": text, \"image\": URL, \"audio\": URL, \"video\": URL}}]. The \"dep\" field denotes the id of the previous task which generates a new resource that the current task relies on. A special tag \"-task_id\" refers to the generated text image, audio and video in the dependency task with id as task_id. The task MUST be selected from the following options: {{ Available Task List }}. There is a logical relationship between tasks, please note their order. If the user input can't be parsed, you need to reply empty JSON. Here are several cases for your reference: {{ Demonstrations }}. The chat history is recorded as {{ Chat History }}. From this chat history, you can find the path of the user-mentioned resources for your task planning. (2) Model selection: LLM distributes the tasks to expert models, where the request is framed as a multiple-choice question. LLM is presented with a list of models to choose from. Due to the limited context length, task type based filtration is needed.\nInstruction:\nGiven the user request and the call command, the AI assistant helps the user to select a suitable model from a list of models to process the user request. The AI assistant merely outputs the model id of the most appropriate model. The output must be in a strict JSON format: \"id\": \"id\", \"reason\": \"your detail reason for the choice\". We have a list of models for you to choose from {{ Candidate Models }}. Please select one model from the list. (3) Task execution: Expert models execute on the specific tasks and log results.\nInstruction:\nWith the input and the inference results, the AI assistant needs to describe the process and results. The previous stages can be formed as - User Input: {{ User Input }}, Task Planning: {{ Tasks }}, Model Selection: {{ Model Assignment }}, Task Execution: {{ Predictions }}. You must first answer the user's request in a straightforward manner. Then describe the task process and show your analysis and model inference results to the user in the first person. If inference results contain a file path, must tell the user the complete file path. (4) Response generation: LLM receives the execution results and provides summarized results to users.\nTo put HuggingGPT into real world usage, a couple challenges need to solve: (1) Efficiency improvement is needed as both LLM inference rounds and interactions with other models slow down the process; (2) It relies on a long context window to communicate over complicated task content; (3) Stability improvement of LLM outputs and external model services.\nAPI-Bank (Li et al. 2023) is a benchmark for evaluating the performance of tool-augmented LLMs. It contains 53 commonly used API tools, a complete tool-augmented LLM workflow, and 264 annotated dialogues that involve 568 API calls. The selection of APIs is quite diverse, including search engines, calculator, calendar queries, smart home control, schedule management, health data management, account authentication workflow and more. Because there are a large number of APIs, LLM first has access to API search engine to find the right API to call and then uses the corresponding documentation to make a call.\nFig. 12. Pseudo code of how LLM makes an API call in API-Bank. (Image source: Li et al. 2023) In the API-Bank workflow, LLMs need to make a couple of decisions and at each step we can evaluate how accurate that decision is. Decisions include:\nWhether an API call is needed. Identify the right API to call: if not good enough, LLMs need to iteratively modify the API inputs (e.g. deciding search keywords for Search Engine API). Response based on the API results: the model can choose to refine and call again if results are not satisfied. This benchmark evaluates the agent’s tool use capabilities at three levels:\nLevel-1 evaluates the ability to call the API. Given an API’s description, the model needs to determine whether to call a given API, call it correctly, and respond properly to API returns. Level-2 examines the ability to retrieve the API. The model needs to search for possible APIs that may solve the user’s requirement and learn how to use them by reading documentation. Level-3 assesses the ability to plan API beyond retrieve and call. Given unclear user requests (e.g. schedule group meetings, book flight/hotel/restaurant for a trip), the model may have to conduct multiple API calls to solve it. Case Studies Scientific Discovery Agent ChemCrow (Bran et al. 2023) is a domain-specific example in which LLM is augmented with 13 expert-designed tools to accomplish tasks across organic synthesis, drug discovery, and materials design. The workflow, implemented in LangChain, reflects what was previously described in the ReAct and MRKLs and combines CoT reasoning with tools relevant to the tasks:\nThe LLM is provided with a list of tool names, descriptions of their utility, and details about the expected input/output. It is then instructed to answer a user-given prompt using the tools provided when necessary. The instruction suggests the model to follow the ReAct format - Thought, Action, Action Input, Observation. One interesting observation is that while the LLM-based evaluation concluded that GPT-4 and ChemCrow perform nearly equivalently, human evaluations with experts oriented towards the completion and chemical correctness of the solutions showed that ChemCrow outperforms GPT-4 by a large margin. This indicates a potential problem with using LLM to evaluate its own performance on domains that requires deep expertise. The lack of expertise may cause LLMs not knowing its flaws and thus cannot well judge the correctness of task results.\nBoiko et al. (2023) also looked into LLM-empowered agents for scientific discovery, to handle autonomous design, planning, and performance of complex scientific experiments. This agent can use tools to browse the Internet, read documentation, execute code, call robotics experimentation APIs and leverage other LLMs.\nFor example, when requested to \"develop a novel anticancer drug\", the model came up with the following reasoning steps:\ninquired about current trends in anticancer drug discovery; selected a target; requested a scaffold targeting these compounds; Once the compound was identified, the model attempted its synthesis. They also discussed the risks, especially with illicit drugs and bioweapons. They developed a test set containing a list of known chemical weapon agents and asked the agent to synthesize them. 4 out of 11 requests (36%) were accepted to obtain a synthesis solution and the agent attempted to consult documentation to execute the procedure. 7 out of 11 were rejected and among these 7 rejected cases, 5 happened after a Web search while 2 were rejected based on prompt only.\nGenerative Agents Simulation Generative Agents (Park, et al. 2023) is super fun experiment where 25 virtual characters, each controlled by a LLM-powered agent, are living and interacting in a sandbox environment, inspired by The Sims. Generative agents create believable simulacra of human behavior for interactive applications.\nThe design of generative agents combines LLM with memory, planning and reflection mechanisms to enable agents to behave conditioned on past experience, as well as to interact with other agents.\nMemory stream: is a long-term memory module (external database) that records a comprehensive list of agents’ experience in natural language. Each element is an observation, an event directly provided by the agent. - Inter-agent communication can trigger new natural language statements. Retrieval model: surfaces the context to inform the agent’s behavior, according to relevance, recency and importance. Recency: recent events have higher scores Importance: distinguish mundane from core memories. Ask LM directly. Relevance: based on how related it is to the current situation / query. Reflection mechanism: synthesizes memories into higher level inferences over time and guides the agent’s future behavior. They are higher-level summaries of past events (\u003c- note that this is a bit different from self-reflection above) Prompt LM with 100 most recent observations and to generate 3 most salient high-level questions given a set of observations/statements. Then ask LM to answer those questions. Planning \u0026 Reacting: translate the reflections and the environment information into actions Planning is essentially in order to optimize believability at the moment vs in time. Prompt template: {Intro of an agent X}. Here is X's plan today in broad strokes: 1) Relationships between agents and observations of one agent by another are all taken into consideration for planning and reacting. Environment information is present in a tree structure. Fig. 13. The generative agent architecture. (Image source: Park et al. 2023) This fun simulation results in emergent social behavior, such as information diffusion, relationship memory (e.g. two agents continuing the conversation topic) and coordination of social events (e.g. host a party and invite many others).\nProof-of-Concept Examples AutoGPT has drawn a lot of attention into the possibility of setting up autonomous agents with LLM as the main controller. It has quite a lot of reliability issues given the natural language interface, but nevertheless a cool proof-of-concept demo. A lot of code in AutoGPT is about format parsing.\nHere is the system message used by AutoGPT, where {{...}} are user inputs:\nYou are {{ai-name}}, {{user-provided AI bot description}}. Your decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications. GOALS: 1. {{user-provided goal 1}} 2. {{user-provided goal 2}} 3. ... 4. ... 5. ... Constraints: 1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files. 2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember. 3. No user assistance 4. Exclusively use the commands listed in double quotes e.g. \"command name\" 5. Use subprocesses for commands that will not terminate within a few minutes Commands: 1. Google Search: \"google\", args: \"input\": \"\" 2. Browse Website: \"browse_website\", args: \"url\": \"\", \"question\": \"\" 3. Start GPT Agent: \"start_agent\", args: \"name\": \"\", \"task\": \"\", \"prompt\": \"\" 4. Message GPT Agent: \"message_agent\", args: \"key\": \"\", \"message\": \"\" 5. List GPT Agents: \"list_agents\", args: 6. Delete GPT Agent: \"delete_agent\", args: \"key\": \"\" 7. Clone Repository: \"clone_repository\", args: \"repository_url\": \"\", \"clone_path\": \"\" 8. Write to file: \"write_to_file\", args: \"file\": \"\", \"text\": \"\" 9. Read file: \"read_file\", args: \"file\": \"\" 10. Append to file: \"append_to_file\", args: \"file\": \"\", \"text\": \"\" 11. Delete file: \"delete_file\", args: \"file\": \"\" 12. Search Files: \"search_files\", args: \"directory\": \"\" 13. Analyze Code: \"analyze_code\", args: \"code\": \"\" 14. Get Improved Code: \"improve_code\", args: \"suggestions\": \"\", \"code\": \"\" 15. Write Tests: \"write_tests\", args: \"code\": \"\", \"focus\": \"\" 16. Execute Python File: \"execute_python_file\", args: \"file\": \"\" 17. Generate Image: \"generate_image\", args: \"prompt\": \"\" 18. Send Tweet: \"send_tweet\", args: \"text\": \"\" 19. Do Nothing: \"do_nothing\", args: 20. Task Complete (Shutdown): \"task_complete\", args: \"reason\": \"\" Resources: 1. Internet access for searches and information gathering. 2. Long Term memory management. 3. GPT-3.5 powered Agents for delegation of simple tasks. 4. File output. Performance Evaluation: 1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities. 2. Constructively self-criticize your big-picture behavior constantly. 3. Reflect on past decisions and strategies to refine your approach. 4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps. You should only respond in JSON format as described below Response Format: { \"thoughts\": { \"text\": \"thought\", \"reasoning\": \"reasoning\", \"plan\": \"- short bulleted\\n- list that conveys\\n- long-term plan\", \"criticism\": \"constructive self-criticism\", \"speak\": \"thoughts summary to say to user\" }, \"command\": { \"name\": \"command name\", \"args\": { \"arg name\": \"value\" } } } Ensure the response can be parsed by Python json.loads GPT-Engineer is another project to create a whole repository of code given a task specified in natural language. The GPT-Engineer is instructed to think over a list of smaller components to build and ask for user input to clarify questions as needed.\nHere are a sample conversation for task clarification sent to OpenAI ChatCompletion endpoint used by GPT-Engineer. The user inputs are wrapped in {{user input text}}.\n[ { \"role\": \"system\", \"content\": \"You will read instructions and not carry them out, only seek to clarify them.\\nSpecifically you will first summarise a list of super short bullets of areas that need clarification.\\nThen you will pick one clarifying question, and wait for an answer from the user.\\n\" }, { \"role\": \"user\", \"content\": \"We are writing {{a Super Mario game in python. MVC components split in separate files. Keyboard control.}}\\n\" }, { \"role\": \"assistant\", \"content\": \"Summary of areas that need clarification:\\n1. Specifics of the Super Mario game (e.g. level design, characters, gameplay mechanics)\\n2. Details about the MVC components (e.g. which components are in each file)\\n3. Keyboard control implementation (e.g. which keys to use, how to handle input)\\n\\nClarifying question:\\nCan you provide more details about the Super Mario game, such as level design, characters, and gameplay mechanics?\" }, { \"role\": \"user\", \"content\": \"{{There are 10 levels in total. The main character is a plumber named Mario, who can walk and jump. It is a classical platform game just like Super Mario. The main character moves from left to right, trying to get to the destination, where there are many obstacles and attacks from enemies in the process.}}\\n\\nIs anything else unclear? If yes, only answer in the form:\\n{remaining unclear areas} remaining questions.\\n{Next question}\\nIf everything is sufficiently clear, only answer \\\"Nothing more to clarify.\\\".\" }, { \"role\": \"assistant\", \"content\": \"Remaining unclear areas: 2 remaining questions.\\nCan you provide more information about how the MVC components are split into separate files?\" }, { \"role\": \"user\", \"content\": \"{{Make your own assumptions and state them explicitly before starting}}\" } ] Then after these clarification, the agent moved into the code writing mode with a different system message. System message:\nYou will get instructions for code to write. You will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code. Make sure that every detail of the architecture is, in the end, implemented as code. Think step by step and reason yourself to the right decisions to make sure we get it right. You will first lay out the names of the core classes, functions, methods that will be necessary, as well as a quick comment on their purpose.\nThen you will output the content of each file including ALL code. Each file must strictly follow a markdown code block format, where the following tokens must be replaced such that FILENAME is the lowercase file name including the file extension, LANG is the markup code block language for the code’s language, and CODE is the code:\nFILENAME\nCODE You will start with the “entrypoint” file, then go to the ones that are imported by that file, and so on. Please note that the code should be fully functional. No placeholders.\nFollow a language and framework appropriate best practice file naming convention. Make sure that files contain all imports, types etc. Make sure that code in different files are compatible with each other. Ensure to implement all code, if you are unsure, write a plausible implementation. Include module dependency or package manager dependency definition file. Before you finish, double check that all parts of the architecture is present in the files.\nUseful to know: You almost always put different classes in different files. For Python, you always create an appropriate requirements.txt file. For NodeJS, you always create an appropriate package.json file. You always add a comment briefly describing the purpose of the function definition. You try to add comments explaining very complex bits of logic. You always follow the best practices for the requested languages in terms of describing the code written as a defined package/project.\nPython toolbelt preferences:\npytest dataclasses Conversatin samples:\n[ { \"role\": \"system\", \"content\": \"You will get instructions for code to write.\\nYou will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code.\\nMake sure that every detail of the architecture is, in the end, implemented as code.\\n\\nThink step by step and reason yourself to the right decisions to make sure we get it right.\\nYou will first lay out the names of the core classes, functions, methods that will be necessary, as well as a quick comment on their purpose.\\n\\nThen you will output the content of each file including ALL code.\\nEach file must strictly follow a markdown code block format, where the following tokens must be replaced such that\\nFILENAME is the lowercase file name including the file extension,\\nLANG is the markup code block language for the code's language, and CODE is the code:\\n\\nFILENAME\\n```LANG\\nCODE\\n```\\n\\nYou will start with the \\\"entrypoint\\\" file, then go to the ones that are imported by that file, and so on.\\nPlease note that the code should be fully functional. No placeholders.\\n\\nFollow a language and framework appropriate best practice file naming convention.\\nMake sure that files contain all imports, types etc. Make sure that code in different files are compatible with each other.\\nEnsure to implement all code, if you are unsure, write a plausible implementation.\\nInclude module dependency or package manager dependency definition file.\\nBefore you finish, double check that all parts of the architecture is present in the files.\\n\\nUseful to know:\\nYou almost always put different classes in different files.\\nFor Python, you always create an appropriate requirements.txt file.\\nFor NodeJS, you always create an appropriate package.json file.\\nYou always add a comment briefly describing the purpose of the function definition.\\nYou try to add comments explaining very complex bits of logic.\\nYou always follow the best practices for the requested languages in terms of describing the code written as a defined\\npackage/project.\\n\\n\\nPython toolbelt preferences:\\n- pytest\\n- dataclasses\\n\" }, # … same conversation as earlier, ended with \"Make your own assumptions and state them explicitly before starting\". { \"role\": \"assistant\", \"content\": \"Assumptions:\\n1. Model: The model will contain the game's data, such as level information, character states, and enemy positions.\\n2. View: The view will handle the game's visuals, including rendering the game objects, backgrounds, and updating the display.\\n3. Controller: The controller will manage user input, such as keyboard controls, and update the model accordingly.\\n\\nWith these assumptions in mind, is there any specific aspect of the keyboard control implementation you would like me to clarify?\" }, { \"role\": \"user\", \"content\": \"Please now remember the steps:\\n\\nThink step by step and reason yourself to the right decisions to make sure we get it right.\\nFirst lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose.\\n\\nThen you will output the content of each file including ALL code.\\nEach file must strictly follow a markdown code block format, where the following tokens must be replaced such that\\nFILENAME is the lowercase file name including the file extension,\\nLANG is the markup code block language for the code's language, and CODE is the code:\\n\\nFILENAME\\n```LANG\\nCODE\\n```\\n\\nPlease note that the code should be fully functional. No placeholders.\\n\\nYou will start with the \\\"entrypoint\\\" file, then go to the ones that are imported by that file, and so on.\\nFollow a language and framework appropriate best practice file naming convention.\\nMake sure that files contain all imports, types etc. The code should be fully functional. Make sure that code in different files are compatible with each other.\\nBefore you finish, double check that all parts of the architecture is present in the files.\\n\" } ] Challenges After going through key ideas and demos of building LLM-centered agents, I start to see a couple common limitations:\nFinite context length: The restricted context capacity limits the inclusion of historical information, detailed instructions, API call context, and responses. The design of the system has to work with this limited communication bandwidth, while mechanisms like self-reflection to learn from past mistakes would benefit a lot from long or infinite context windows. Although vector stores and retrieval can provide access to a larger knowledge pool, their representation power is not as powerful as full attention.\nChallenges in long-term planning and task decomposition: Planning over a lengthy history and effectively exploring the solution space remain challenging. LLMs struggle to adjust plans when faced with unexpected errors, making them less robust compared to humans who learn from trial and error.\nReliability of natural language interface: Current agent system relies on natural language as an interface between LLMs and external components such as memory and tools. However, the reliability of model outputs is questionable, as LLMs may make formatting errors and occasionally exhibit rebellious behavior (e.g. refuse to follow an instruction). Consequently, much of the agent demo code focuses on parsing model output.\nCitation Cited as:\nWeng, Lilian. (Jun 2023). LLM-powered Autonomous Agents\". Lil’Log. https://lilianweng.github.io/posts/2023-06-23-agent/.\nOr\n@article{weng2023prompt, title = \"LLM-powered Autonomous Agents\"\", author = \"Weng, Lilian\", journal = \"lilianweng.github.io\", year = \"2023\", month = \"Jun\", url = \"https://lilianweng.github.io/posts/2023-06-23-agent/\" } References [1] Wei et al. “Chain of thought prompting elicits reasoning in large language models.” NeurIPS 2022\n[2] Yao et al. “Tree of Thoughts: Dliberate Problem Solving with Large Language Models.” arXiv preprint arXiv:2305.10601 (2023).\n[3] Liu et al. “Chain of Hindsight Aligns Language Models with Feedback “ arXiv preprint arXiv:2302.02676 (2023).\n[4] Liu et al. “LLM+P: Empowering Large Language Models with Optimal Planning Proficiency” arXiv preprint arXiv:2304.11477 (2023).\n[5] Yao et al. “ReAct: Synergizing reasoning and acting in language models.” ICLR 2023.\n[6] Google Blog. “Announcing ScaNN: Efficient Vector Similarity Search” July 28, 2020.\n[7] https://chat.openai.com/share/46ff149e-a4c7-4dd7-a800-fc4a642ea389\n[8] Shinn \u0026 Labash. “Reflexion: an autonomous agent with dynamic memory and self-reflection” arXiv preprint arXiv:2303.11366 (2023).\n[9] Laskin et al. “In-context Reinforcement Learning with Algorithm Distillation” ICLR 2023.\n[10] Karpas et al. “MRKL Systems A modular, neuro-symbolic architecture that combines large language models, external knowledge sources and discrete reasoning.” arXiv preprint arXiv:2205.00445 (2022).\n[11] Weaviate Blog. Why is Vector Search so fast? Sep 13, 2022.\n[12] Li et al. “API-Bank: A Benchmark for Tool-Augmented LLMs” arXiv preprint arXiv:2304.08244 (2023).\n[13] Shen et al. “HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in HuggingFace” arXiv preprint arXiv:2303.17580 (2023).\n[14] Bran et al. “ChemCrow: Augmenting large-language models with chemistry tools.” arXiv preprint arXiv:2304.05376 (2023).\n[15] Boiko et al. “Emergent autonomous scientific research capabilities of large language models.” arXiv preprint arXiv:2304.05332 (2023).\n[16] Joon Sung Park, et al. “Generative Agents: Interactive Simulacra of Human Behavior.” arXiv preprint arXiv:2304.03442 (2023).\n[17] AutoGPT. https://github.com/Significant-Gravitas/Auto-GPT\n[18] GPT-Engineer. https://github.com/AntonOsika/gpt-engineer\n",
<span id="line116"></span> "wordCount" : "6445",
<span id="line117"></span> "inLanguage": "en",
<span id="line118"></span> "datePublished": "2023-06-23T00:00:00Z",
<span id="line119"></span> "dateModified": "2023-06-23T00:00:00Z",
<span id="line120"></span> "author":{
<span id="line121"></span> "@type": "Person",
<span id="line122"></span> "name": "Lilian Weng"
<span id="line123"></span> },
<span id="line124"></span> "mainEntityOfPage": {
<span id="line125"></span> "@type": "WebPage",
<span id="line126"></span> "@id": "https://lilianweng.github.io/posts/2023-06-23-agent/"
<span id="line127"></span> },
<span id="line128"></span> "publisher": {
<span id="line129"></span> "@type": "Organization",
<span id="line130"></span> "name": "Lil'Log",
<span id="line131"></span> "logo": {
<span id="line132"></span> "@type": "ImageObject",
<span id="line133"></span> "url": "https://lilianweng.github.io/favicon_peach.ico"
<span id="line134"></span> }
<span id="line135"></span> }
<span id="line136"></span>}
<span id="line137"></span></span><span></<span class="end-tag">script</span>></span><span>
<span id="line138"></span></span><span></<span class="end-tag">head</span>></span><span>
<span id="line139"></span>
<span id="line140"></span></span><span><<span class="start-tag">body</span> <span class="attribute-name">class</span>="<a class="attribute-value"></a>" <span class="attribute-name">id</span>="<a class="attribute-value">top</a>"></span><span>
<span id="line141"></span></span><span><<span class="start-tag">script</span>></span><span>
<span id="line142"></span> if (localStorage.getItem("pref-theme") === "dark") {
<span id="line143"></span> document.body.classList.add('dark');
<span id="line144"></span> } else if (localStorage.getItem("pref-theme") === "light") {
<span id="line145"></span> document.body.classList.remove('dark')
<span id="line146"></span> } else if (window.matchMedia('(prefers-color-scheme: dark)').matches) {
<span id="line147"></span> document.body.classList.add('dark');
<span id="line148"></span> }
<span id="line149"></span>
<span id="line150"></span></span><span></<span class="end-tag">script</span>></span><span>
<span id="line151"></span>
<span id="line152"></span></span><span><<span class="start-tag">script</span>></span><span>
<span id="line153"></span> MathJax = {
<span id="line154"></span> tex: {
<span id="line155"></span> inlineMath: [['$', '$'], ['\\(', '\\)']],
<span id="line156"></span> displayMath: [['$$','$$'], ['\\[', '\\]']],
<span id="line157"></span> processEscapes: true,
<span id="line158"></span> processEnvironments: true
<span id="line159"></span> },
<span id="line160"></span> options: {
<span id="line161"></span> skipHtmlTags: ['script', 'noscript', 'style', 'textarea', 'pre']
<span id="line162"></span> }
<span id="line163"></span> };
<span id="line164"></span>
<span id="line165"></span> window.addEventListener('load', (event) => {
<span id="line166"></span> document.querySelectorAll("mjx-container").forEach(function(x){
<span id="line167"></span> x.parentElement.classList += 'has-jax'})
<span id="line168"></span> });
<span id="line169"></span>
<span id="line170"></span></span><span></<span class="end-tag">script</span>></span><span>
<span id="line171"></span></span><span><<span class="start-tag">script</span> <span class="attribute-name">src</span>="<a class="attribute-value" href="view-source:https://polyfill.io/v3/polyfill.min.js?features=es6">https://polyfill.io/v3/polyfill.min.js?features=es6</a>"></span><span></span><span></<span class="end-tag">script</span>></span><span>
<span id="line172"></span></span><span><<span class="start-tag">script</span> <span class="attribute-name">type</span>="<a class="attribute-value">text/javascript</a>" <span class="attribute-name">id</span>="<a class="attribute-value">MathJax-script</a>" <span class="attribute-name">async</span>
<span id="line173"></span> <span class="attribute-name">src</span>="<a class="attribute-value" href="view-source:https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js">https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js</a>"></span><span></span><span></<span class="end-tag">script</span>></span><span>
<span id="line174"></span>
<span id="line175"></span>
<span id="line176"></span></span><span><<span class="start-tag">header</span> <span class="attribute-name">class</span>="<a class="attribute-value">header</a>"></span><span>
<span id="line177"></span> </span><span><<span class="start-tag">nav</span> <span class="attribute-name">class</span>="<a class="attribute-value">nav</a>"></span><span>
<span id="line178"></span> </span><span><<span class="start-tag">div</span> <span class="attribute-name">class</span>="<a class="attribute-value">logo</a>"></span><span>
<span id="line179"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/">https://lilianweng.github.io/</a>" <span class="attribute-name">accesskey</span>="<a class="attribute-value">h</a>" <span class="attribute-name">title</span>="<a class="attribute-value">Lil<span class="entity"><span>&</span>#39;</span>Log (Alt + H)</a>"></span><span>Lil<span class="entity"><span>&</span>#39;</span>Log</span><span></<span class="end-tag">a</span>></span><span>
<span id="line180"></span> </span><span><<span class="start-tag">span</span> <span class="attribute-name">class</span>="<a class="attribute-value">logo-switches</a>"></span><span>
<span id="line181"></span> </span><span><<span class="start-tag">button</span> <span class="attribute-name">id</span>="<a class="attribute-value">theme-toggle</a>" <span class="attribute-name">accesskey</span>="<a class="attribute-value">t</a>" <span class="attribute-name">title</span>="<a class="attribute-value">(Alt + T)</a>"></span><span>
<span id="line182"></span> </span><span><<span class="start-tag">svg</span> <span class="attribute-name">id</span>="<a class="attribute-value">moon</a>" <span class="attribute-name">xmlns</span>="<a class="attribute-value">http://www.w3.org/2000/svg</a>" <span class="attribute-name">width</span>="<a class="attribute-value">24</a>" <span class="attribute-name">height</span>="<a class="attribute-value">24</a>" <span class="attribute-name">viewBox</span>="<a class="attribute-value">0 0 24 24</a>"
<span id="line183"></span> <span class="attribute-name">fill</span>="<a class="attribute-value">none</a>" <span class="attribute-name">stroke</span>="<a class="attribute-value">currentColor</a>" <span class="attribute-name">stroke-width</span>="<a class="attribute-value">2</a>" <span class="attribute-name">stroke-linecap</span>="<a class="attribute-value">round</a>"
<span id="line184"></span> <span class="attribute-name">stroke-linejoin</span>="<a class="attribute-value">round</a>"></span><span>
<span id="line185"></span> </span><span><<span class="start-tag">path</span> <span class="attribute-name">d</span>="<a class="attribute-value">M21 12.79A9 9 0 1 1 11.21 3 7 7 0 0 0 21 12.79z</a>"></span><span></span><span></<span class="end-tag">path</span>></span><span>
<span id="line186"></span> </span><span></<span class="end-tag">svg</span>></span><span>
<span id="line187"></span> </span><span><<span class="start-tag">svg</span> <span class="attribute-name">id</span>="<a class="attribute-value">sun</a>" <span class="attribute-name">xmlns</span>="<a class="attribute-value">http://www.w3.org/2000/svg</a>" <span class="attribute-name">width</span>="<a class="attribute-value">24</a>" <span class="attribute-name">height</span>="<a class="attribute-value">24</a>" <span class="attribute-name">viewBox</span>="<a class="attribute-value">0 0 24 24</a>"
<span id="line188"></span> <span class="attribute-name">fill</span>="<a class="attribute-value">none</a>" <span class="attribute-name">stroke</span>="<a class="attribute-value">currentColor</a>" <span class="attribute-name">stroke-width</span>="<a class="attribute-value">2</a>" <span class="attribute-name">stroke-linecap</span>="<a class="attribute-value">round</a>"
<span id="line189"></span> <span class="attribute-name">stroke-linejoin</span>="<a class="attribute-value">round</a>"></span><span>
<span id="line190"></span> </span><span><<span class="start-tag">circle</span> <span class="attribute-name">cx</span>="<a class="attribute-value">12</a>" <span class="attribute-name">cy</span>="<a class="attribute-value">12</a>" <span class="attribute-name">r</span>="<a class="attribute-value">5</a>"></span><span></span><span></<span class="end-tag">circle</span>></span><span>
<span id="line191"></span> </span><span><<span class="start-tag">line</span> <span class="attribute-name">x1</span>="<a class="attribute-value">12</a>" <span class="attribute-name">y1</span>="<a class="attribute-value">1</a>" <span class="attribute-name">x2</span>="<a class="attribute-value">12</a>" <span class="attribute-name">y2</span>="<a class="attribute-value">3</a>"></span><span></span><span></<span class="end-tag">line</span>></span><span>
<span id="line192"></span> </span><span><<span class="start-tag">line</span> <span class="attribute-name">x1</span>="<a class="attribute-value">12</a>" <span class="attribute-name">y1</span>="<a class="attribute-value">21</a>" <span class="attribute-name">x2</span>="<a class="attribute-value">12</a>" <span class="attribute-name">y2</span>="<a class="attribute-value">23</a>"></span><span></span><span></<span class="end-tag">line</span>></span><span>
<span id="line193"></span> </span><span><<span class="start-tag">line</span> <span class="attribute-name">x1</span>="<a class="attribute-value">4.22</a>" <span class="attribute-name">y1</span>="<a class="attribute-value">4.22</a>" <span class="attribute-name">x2</span>="<a class="attribute-value">5.64</a>" <span class="attribute-name">y2</span>="<a class="attribute-value">5.64</a>"></span><span></span><span></<span class="end-tag">line</span>></span><span>
<span id="line194"></span> </span><span><<span class="start-tag">line</span> <span class="attribute-name">x1</span>="<a class="attribute-value">18.36</a>" <span class="attribute-name">y1</span>="<a class="attribute-value">18.36</a>" <span class="attribute-name">x2</span>="<a class="attribute-value">19.78</a>" <span class="attribute-name">y2</span>="<a class="attribute-value">19.78</a>"></span><span></span><span></<span class="end-tag">line</span>></span><span>
<span id="line195"></span> </span><span><<span class="start-tag">line</span> <span class="attribute-name">x1</span>="<a class="attribute-value">1</a>" <span class="attribute-name">y1</span>="<a class="attribute-value">12</a>" <span class="attribute-name">x2</span>="<a class="attribute-value">3</a>" <span class="attribute-name">y2</span>="<a class="attribute-value">12</a>"></span><span></span><span></<span class="end-tag">line</span>></span><span>
<span id="line196"></span> </span><span><<span class="start-tag">line</span> <span class="attribute-name">x1</span>="<a class="attribute-value">21</a>" <span class="attribute-name">y1</span>="<a class="attribute-value">12</a>" <span class="attribute-name">x2</span>="<a class="attribute-value">23</a>" <span class="attribute-name">y2</span>="<a class="attribute-value">12</a>"></span><span></span><span></<span class="end-tag">line</span>></span><span>
<span id="line197"></span> </span><span><<span class="start-tag">line</span> <span class="attribute-name">x1</span>="<a class="attribute-value">4.22</a>" <span class="attribute-name">y1</span>="<a class="attribute-value">19.78</a>" <span class="attribute-name">x2</span>="<a class="attribute-value">5.64</a>" <span class="attribute-name">y2</span>="<a class="attribute-value">18.36</a>"></span><span></span><span></<span class="end-tag">line</span>></span><span>
<span id="line198"></span> </span><span><<span class="start-tag">line</span> <span class="attribute-name">x1</span>="<a class="attribute-value">18.36</a>" <span class="attribute-name">y1</span>="<a class="attribute-value">5.64</a>" <span class="attribute-name">x2</span>="<a class="attribute-value">19.78</a>" <span class="attribute-name">y2</span>="<a class="attribute-value">4.22</a>"></span><span></span><span></<span class="end-tag">line</span>></span><span>
<span id="line199"></span> </span><span></<span class="end-tag">svg</span>></span><span>
<span id="line200"></span> </span><span></<span class="end-tag">button</span>></span><span>
<span id="line201"></span> </span><span></<span class="end-tag">span</span>></span><span>
<span id="line202"></span> </span><span></<span class="end-tag">div</span>></span><span>
<span id="line203"></span> </span><span><<span class="start-tag">ul</span> <span class="attribute-name">id</span>="<a class="attribute-value">menu</a>"></span><span>
<span id="line204"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line205"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/">https://lilianweng.github.io/</a>" <span class="attribute-name">title</span>="<a class="attribute-value">Posts</a>"></span><span>
<span id="line206"></span> </span><span><<span class="start-tag">span</span>></span><span>Posts</span><span></<span class="end-tag">span</span>></span><span>
<span id="line207"></span> </span><span></<span class="end-tag">a</span>></span><span>
<span id="line208"></span> </span><span></<span class="end-tag">li</span>></span><span>
<span id="line209"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line210"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/archives">https://lilianweng.github.io/archives</a>" <span class="attribute-name">title</span>="<a class="attribute-value">Archive</a>"></span><span>
<span id="line211"></span> </span><span><<span class="start-tag">span</span>></span><span>Archive</span><span></<span class="end-tag">span</span>></span><span>
<span id="line212"></span> </span><span></<span class="end-tag">a</span>></span><span>
<span id="line213"></span> </span><span></<span class="end-tag">li</span>></span><span>
<span id="line214"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line215"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/search/">https://lilianweng.github.io/search/</a>" <span class="attribute-name">title</span>="<a class="attribute-value">Search (Alt <span class="entity"><span>&</span>#43;</span> /)</a>" <span class="attribute-name">accesskey</span>=<a class="attribute-value">/</a>></span><span>
<span id="line216"></span> </span><span><<span class="start-tag">span</span>></span><span>Search</span><span></<span class="end-tag">span</span>></span><span>
<span id="line217"></span> </span><span></<span class="end-tag">a</span>></span><span>
<span id="line218"></span> </span><span></<span class="end-tag">li</span>></span><span>
<span id="line219"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line220"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/tags/">https://lilianweng.github.io/tags/</a>" <span class="attribute-name">title</span>="<a class="attribute-value">Tags</a>"></span><span>
<span id="line221"></span> </span><span><<span class="start-tag">span</span>></span><span>Tags</span><span></<span class="end-tag">span</span>></span><span>
<span id="line222"></span> </span><span></<span class="end-tag">a</span>></span><span>
<span id="line223"></span> </span><span></<span class="end-tag">li</span>></span><span>
<span id="line224"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line225"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/faq">https://lilianweng.github.io/faq</a>" <span class="attribute-name">title</span>="<a class="attribute-value">FAQ</a>"></span><span>
<span id="line226"></span> </span><span><<span class="start-tag">span</span>></span><span>FAQ</span><span></<span class="end-tag">span</span>></span><span>
<span id="line227"></span> </span><span></<span class="end-tag">a</span>></span><span>
<span id="line228"></span> </span><span></<span class="end-tag">li</span>></span><span>
<span id="line229"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line230"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://www.emojisearch.app/">https://www.emojisearch.app/</a>" <span class="attribute-name">title</span>="<a class="attribute-value">emojisearch.app</a>"></span><span>
<span id="line231"></span> </span><span><<span class="start-tag">span</span>></span><span>emojisearch.app</span><span></<span class="end-tag">span</span>></span><span>
<span id="line232"></span> </span><span></<span class="end-tag">a</span>></span><span>
<span id="line233"></span> </span><span></<span class="end-tag">li</span>></span><span>
<span id="line234"></span> </span><span></<span class="end-tag">ul</span>></span><span>
<span id="line235"></span> </span><span></<span class="end-tag">nav</span>></span><span>
<span id="line236"></span></span><span></<span class="end-tag">header</span>></span><span>
<span id="line237"></span></span><span><<span class="start-tag">main</span> <span class="attribute-name">class</span>="<a class="attribute-value">main</a>"></span><span>
<span id="line238"></span>
<span id="line239"></span></span><span><<span class="start-tag">article</span> <span class="attribute-name">class</span>="<a class="attribute-value">post-single</a>"></span><span>
<span id="line240"></span> </span><span><<span class="start-tag">header</span> <span class="attribute-name">class</span>="<a class="attribute-value">post-header</a>"></span><span>
<span id="line241"></span>
<span id="line242"></span> </span><span><<span class="start-tag">h1</span> <span class="attribute-name">class</span>="<a class="attribute-value">post-title</a>"></span><span>
<span id="line243"></span> LLM Powered Autonomous Agents
<span id="line244"></span> </span><span></<span class="end-tag">h1</span>></span><span>
<span id="line245"></span> </span><span><<span class="start-tag">div</span> <span class="attribute-name">class</span>="<a class="attribute-value">post-meta</a>"></span><span>Date: June 23, 2023 | Estimated Reading Time: 31 min | Author: Lilian Weng
<span id="line246"></span>
<span id="line247"></span></span><span></<span class="end-tag">div</span>></span><span>
<span id="line248"></span> </span><span></<span class="end-tag">header</span>></span><span> </span><span><<span class="start-tag">div</span> <span class="attribute-name">class</span>="<a class="attribute-value">toc</a>"></span><span>
<span id="line249"></span> </span><span><<span class="start-tag">details</span> ></span><span>
<span id="line250"></span> </span><span><<span class="start-tag">summary</span> <span class="attribute-name">accesskey</span>="<a class="attribute-value">c</a>" <span class="attribute-name">title</span>="<a class="attribute-value">(Alt + C)</a>"></span><span>
<span id="line251"></span> </span><span><<span class="start-tag">span</span> <span class="attribute-name">class</span>="<a class="attribute-value">details</a>"></span><span>Table of Contents</span><span></<span class="end-tag">span</span>></span><span>
<span id="line252"></span> </span><span></<span class="end-tag">summary</span>></span><span>
<span id="line253"></span>
<span id="line254"></span> </span><span><<span class="start-tag">div</span> <span class="attribute-name">class</span>="<a class="attribute-value">inner</a>"></span><span></span><span><<span class="start-tag">ul</span>></span><span>
<span id="line255"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line256"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#agent-system-overview">#agent-system-overview</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">Agent System Overview</a>"></span><span>Agent System Overview</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line257"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line258"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#component-one-planning">#component-one-planning</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">Component One: Planning</a>"></span><span>Component One: Planning</span><span></<span class="end-tag">a</span>></span><span></span><span><<span class="start-tag">ul</span>></span><span>
<span id="line259"></span>
<span id="line260"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line261"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#task-decomposition">#task-decomposition</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">Task Decomposition</a>"></span><span>Task Decomposition</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line262"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line263"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#self-reflection">#self-reflection</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">Self-Reflection</a>"></span><span>Self-Reflection</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">li</span>></span><span></span><span></<span class="end-tag">ul</span>></span><span>
<span id="line264"></span> </span><span></<span class="end-tag">li</span>></span><span>
<span id="line265"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line266"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#component-two-memory">#component-two-memory</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">Component Two: Memory</a>"></span><span>Component Two: Memory</span><span></<span class="end-tag">a</span>></span><span></span><span><<span class="start-tag">ul</span>></span><span>
<span id="line267"></span>
<span id="line268"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line269"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#types-of-memory">#types-of-memory</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">Types of Memory</a>"></span><span>Types of Memory</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line270"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line271"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#maximum-inner-product-search-mips">#maximum-inner-product-search-mips</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">Maximum Inner Product Search (MIPS)</a>"></span><span>Maximum Inner Product Search (MIPS)</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">li</span>></span><span></span><span></<span class="end-tag">ul</span>></span><span>
<span id="line272"></span> </span><span></<span class="end-tag">li</span>></span><span>
<span id="line273"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line274"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#component-three-tool-use">#component-three-tool-use</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">Component Three: Tool Use</a>"></span><span>Component Three: Tool Use</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line275"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line276"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#case-studies">#case-studies</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">Case Studies</a>"></span><span>Case Studies</span><span></<span class="end-tag">a</span>></span><span></span><span><<span class="start-tag">ul</span>></span><span>
<span id="line277"></span>
<span id="line278"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line279"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#scientific-discovery-agent">#scientific-discovery-agent</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">Scientific Discovery Agent</a>"></span><span>Scientific Discovery Agent</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line280"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line281"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#generative-agents-simulation">#generative-agents-simulation</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">Generative Agents Simulation</a>"></span><span>Generative Agents Simulation</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line282"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line283"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#proof-of-concept-examples">#proof-of-concept-examples</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">Proof-of-Concept Examples</a>"></span><span>Proof-of-Concept Examples</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">li</span>></span><span></span><span></<span class="end-tag">ul</span>></span><span>
<span id="line284"></span> </span><span></<span class="end-tag">li</span>></span><span>
<span id="line285"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line286"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#challenges">#challenges</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">Challenges</a>"></span><span>Challenges</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line287"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line288"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#citation">#citation</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">Citation</a>"></span><span>Citation</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line289"></span> </span><span><<span class="start-tag">li</span>></span><span>
<span id="line290"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#references">#references</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">References</a>"></span><span>References</span><span></<span class="end-tag">a</span>></span><span>
<span id="line291"></span> </span><span></<span class="end-tag">li</span>></span><span>
<span id="line292"></span> </span><span></<span class="end-tag">ul</span>></span><span>
<span id="line293"></span> </span><span></<span class="end-tag">div</span>></span><span>
<span id="line294"></span> </span><span></<span class="end-tag">details</span>></span><span>
<span id="line295"></span></span><span></<span class="end-tag">div</span>></span><span>
<span id="line296"></span>
<span id="line297"></span> </span><span><<span class="start-tag">div</span> <span class="attribute-name">class</span>="<a class="attribute-value">post-content</a>"></span><span></span><span><<span class="start-tag">p</span>></span><span>Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://github.com/Significant-Gravitas/Auto-GPT">https://github.com/Significant-Gravitas/Auto-GPT</a>"></span><span>AutoGPT</span><span></<span class="end-tag">a</span>></span><span>, </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://github.com/AntonOsika/gpt-engineer">https://github.com/AntonOsika/gpt-engineer</a>"></span><span>GPT-Engineer</span><span></<span class="end-tag">a</span>></span><span> and </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://github.com/yoheinakajima/babyagi">https://github.com/yoheinakajima/babyagi</a>"></span><span>BabyAGI</span><span></<span class="end-tag">a</span>></span><span>, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line298"></span></span><span><<span class="start-tag">h1</span> <span class="attribute-name">id</span>="<a class="attribute-value">agent-system-overview</a>"></span><span>Agent System Overview</span><span><<span class="start-tag">a</span> <span class="attribute-name">hidden</span> <span class="attribute-name">class</span>="<a class="attribute-value">anchor</a>" <span class="attribute-name">aria-hidden</span>="<a class="attribute-value">true</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#agent-system-overview">#agent-system-overview</a>"></span><span>#</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">h1</span>></span><span>
<span id="line299"></span></span><span><<span class="start-tag">p</span>></span><span>In a LLM-powered autonomous agent system, LLM functions as the agent<span class="entity"><span>&</span>rsquo;</span>s brain, complemented by several key components:</span><span></<span class="end-tag">p</span>></span><span>
<span id="line300"></span></span><span><<span class="start-tag">ul</span>></span><span>
<span id="line301"></span></span><span><<span class="start-tag">li</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>Planning</span><span></<span class="end-tag">strong</span>></span><span>
<span id="line302"></span></span><span><<span class="start-tag">ul</span>></span><span>
<span id="line303"></span></span><span><<span class="start-tag">li</span>></span><span>Subgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line304"></span></span><span><<span class="start-tag">li</span>></span><span>Reflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line305"></span></span><span></<span class="end-tag">ul</span>></span><span>
<span id="line306"></span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line307"></span></span><span><<span class="start-tag">li</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>Memory</span><span></<span class="end-tag">strong</span>></span><span>
<span id="line308"></span></span><span><<span class="start-tag">ul</span>></span><span>
<span id="line309"></span></span><span><<span class="start-tag">li</span>></span><span>Short-term memory: I would consider all the in-context learning (See </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/">https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/</a>"></span><span>Prompt Engineering</span><span></<span class="end-tag">a</span>></span><span>) as utilizing short-term memory of the model to learn.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line310"></span></span><span><<span class="start-tag">li</span>></span><span>Long-term memory: This provides the agent with the capability to retain and recall (infinite) information over extended periods, often by leveraging an external vector store and fast retrieval.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line311"></span></span><span></<span class="end-tag">ul</span>></span><span>
<span id="line312"></span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line313"></span></span><span><<span class="start-tag">li</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>Tool use</span><span></<span class="end-tag">strong</span>></span><span>
<span id="line314"></span></span><span><<span class="start-tag">ul</span>></span><span>
<span id="line315"></span></span><span><<span class="start-tag">li</span>></span><span>The agent learns to call external APIs for extra information that is missing from the model weights (often hard to change after pre-training), including current information, code execution capability, access to proprietary information sources and more.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line316"></span></span><span></<span class="end-tag">ul</span>></span><span>
<span id="line317"></span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line318"></span></span><span></<span class="end-tag">ul</span>></span><span>
<span id="line319"></span></span><span><<span class="start-tag">img</span> <span class="attribute-name">src</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/agent-overview.png">agent-overview.png</a>" <span class="attribute-name">style</span>="<a class="attribute-value">width: 100%;</a>" <span class="attribute-name">class</span>="<a class="attribute-value">center</a>" <span>/</span>></span><span>
<span id="line320"></span></span><span><<span class="start-tag">figcaption</span>></span><span>Fig. 1. Overview of a LLM-powered autonomous agent system.</span><span></<span class="end-tag">figcaption</span>></span><span>
<span id="line321"></span></span><span><<span class="start-tag">h1</span> <span class="attribute-name">id</span>="<a class="attribute-value">component-one-planning</a>"></span><span>Component One: Planning</span><span><<span class="start-tag">a</span> <span class="attribute-name">hidden</span> <span class="attribute-name">class</span>="<a class="attribute-value">anchor</a>" <span class="attribute-name">aria-hidden</span>="<a class="attribute-value">true</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#component-one-planning">#component-one-planning</a>"></span><span>#</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">h1</span>></span><span>
<span id="line322"></span></span><span><<span class="start-tag">p</span>></span><span>A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line323"></span></span><span><<span class="start-tag">h2</span> <span class="attribute-name">id</span>="<a class="attribute-value">task-decomposition</a>"></span><span>Task Decomposition</span><span><<span class="start-tag">a</span> <span class="attribute-name">hidden</span> <span class="attribute-name">class</span>="<a class="attribute-value">anchor</a>" <span class="attribute-name">aria-hidden</span>="<a class="attribute-value">true</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#task-decomposition">#task-decomposition</a>"></span><span>#</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">h2</span>></span><span>
<span id="line324"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/#chain-of-thought-cot">https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/#chain-of-thought-cot</a>"></span><span></span><span><<span class="start-tag">strong</span>></span><span>Chain of thought</span><span></<span class="end-tag">strong</span>></span><span></span><span></<span class="end-tag">a</span>></span><span> (CoT; </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2201.11903">https://arxiv.org/abs/2201.11903</a>"></span><span>Wei et al. 2022</span><span></<span class="end-tag">a</span>></span><span>) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to <span class="entity"><span>&</span>ldquo;</span>think step by step<span class="entity"><span>&</span>rdquo;</span> to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model<span class="entity"><span>&</span>rsquo;</span>s thinking process.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line325"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>Tree of Thoughts</span><span></<span class="end-tag">strong</span>></span><span> (</span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2305.10601">https://arxiv.org/abs/2305.10601</a>"></span><span>Yao et al. 2023</span><span></<span class="end-tag">a</span>></span><span>) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line326"></span></span><span><<span class="start-tag">p</span>></span><span>Task decomposition can be done (1) by LLM with simple prompting like </span><span><<span class="start-tag">code</span>></span><span><span class="entity"><span>&</span>quot;</span>Steps for XYZ.\n1.<span class="entity"><span>&</span>quot;</span></span><span></<span class="end-tag">code</span>></span><span>, </span><span><<span class="start-tag">code</span>></span><span><span class="entity"><span>&</span>quot;</span>What are the subgoals for achieving XYZ?<span class="entity"><span>&</span>quot;</span></span><span></<span class="end-tag">code</span>></span><span>, (2) by using task-specific instructions; e.g. </span><span><<span class="start-tag">code</span>></span><span><span class="entity"><span>&</span>quot;</span>Write a story outline.<span class="entity"><span>&</span>quot;</span></span><span></<span class="end-tag">code</span>></span><span> for writing a novel, or (3) with human inputs.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line327"></span></span><span><<span class="start-tag">p</span>></span><span>Another quite distinct approach, </span><span><<span class="start-tag">strong</span>></span><span>LLM+P</span><span></<span class="end-tag">strong</span>></span><span> (</span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2304.11477">https://arxiv.org/abs/2304.11477</a>"></span><span>Liu et al. 2023</span><span></<span class="end-tag">a</span>></span><span>), involves relying on an external classical planner to do long-horizon planning. This approach utilizes the Planning Domain Definition Language (PDDL) as an intermediate interface to describe the planning problem. In this process, LLM (1) translates the problem into <span class="entity"><span>&</span>ldquo;</span>Problem PDDL<span class="entity"><span>&</span>rdquo;</span>, then (2) requests a classical planner to generate a PDDL plan based on an existing <span class="entity"><span>&</span>ldquo;</span>Domain PDDL<span class="entity"><span>&</span>rdquo;</span>, and finally (3) translates the PDDL plan back into natural language. Essentially, the planning step is outsourced to an external tool, assuming the availability of domain-specific PDDL and a suitable planner which is common in certain robotic setups but not in many other domains.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line328"></span></span><span><<span class="start-tag">h2</span> <span class="attribute-name">id</span>="<a class="attribute-value">self-reflection</a>"></span><span>Self-Reflection</span><span><<span class="start-tag">a</span> <span class="attribute-name">hidden</span> <span class="attribute-name">class</span>="<a class="attribute-value">anchor</a>" <span class="attribute-name">aria-hidden</span>="<a class="attribute-value">true</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#self-reflection">#self-reflection</a>"></span><span>#</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">h2</span>></span><span>
<span id="line329"></span></span><span><<span class="start-tag">p</span>></span><span>Self-reflection is a vital aspect that allows autonomous agents to improve iteratively by refining past action decisions and correcting previous mistakes. It plays a crucial role in real-world tasks where trial and error are inevitable.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line330"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">a</span> <span class="attribute-name">name</span>="<a class="attribute-value">react</a>"></span><span></span><span></<span class="end-tag">a</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>ReAct</span><span></<span class="end-tag">strong</span>></span><span> (</span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2210.03629">https://arxiv.org/abs/2210.03629</a>"></span><span>Yao et al. 2023</span><span></<span class="end-tag">a</span>></span><span>) integrates reasoning and acting within LLM by extending the action space to be a combination of task-specific discrete actions and the language space. The former enables LLM to interact with the environment (e.g. use Wikipedia search API), while the latter prompting LLM to generate reasoning traces in natural language.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line331"></span></span><span><<span class="start-tag">p</span>></span><span>The ReAct prompt template incorporates explicit steps for LLM to think, roughly formatted as:</span><span></<span class="end-tag">p</span>></span><span>
<span id="line332"></span></span><span><<span class="start-tag">pre</span> <span class="attribute-name">tabindex</span>="<a class="attribute-value">0</a>"></span><span></span><span><<span class="start-tag">code</span>></span><span>Thought: ...
<span id="line333"></span>Action: ...
<span id="line334"></span>Observation: ...
<span id="line335"></span>... (Repeated many times)
<span id="line336"></span></span><span></<span class="end-tag">code</span>></span><span></span><span></<span class="end-tag">pre</span>></span><span></span><span><<span class="start-tag">img</span> <span class="attribute-name">src</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/react.png">react.png</a>" <span class="attribute-name">style</span>="<a class="attribute-value">width: 100%;</a>" <span class="attribute-name">class</span>="<a class="attribute-value">center</a>" <span>/</span>></span><span>
<span id="line337"></span></span><span><<span class="start-tag">figcaption</span>></span><span>Fig. 2. Examples of reasoning trajectories for knowledge-intensive tasks (e.g. HotpotQA, FEVER) and decision-making tasks (e.g. AlfWorld Env, WebShop). (Image source: </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2210.03629">https://arxiv.org/abs/2210.03629</a>" <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>"></span><span>Yao et al. 2023</span><span></<span class="end-tag">a</span>></span><span>).</span><span></<span class="end-tag">figcaption</span>></span><span>
<span id="line338"></span></span><span><<span class="start-tag">p</span>></span><span>In both experiments on knowledge-intensive tasks and decision-making tasks, </span><span><<span class="start-tag">code</span>></span><span>ReAct</span><span></<span class="end-tag">code</span>></span><span> works better than the </span><span><<span class="start-tag">code</span>></span><span>Act</span><span></<span class="end-tag">code</span>></span><span>-only baseline where </span><span><<span class="start-tag">code</span>></span><span>Thought: …</span><span></<span class="end-tag">code</span>></span><span> step is removed.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line339"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>Reflexion</span><span></<span class="end-tag">strong</span>></span><span> (</span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2303.11366">https://arxiv.org/abs/2303.11366</a>"></span><span>Shinn <span class="entity"><span>&</span>amp;</span> Labash 2023</span><span></<span class="end-tag">a</span>></span><span>) is a framework to equips agents with dynamic memory and self-reflection capabilities to improve reasoning skills. Reflexion has a standard RL setup, in which the reward model provides a simple binary reward and the action space follows the setup in ReAct where the task-specific action space is augmented with language to enable complex reasoning steps. After each action $a_t$, the agent computes a heuristic $h_t$ and optionally may </span><span><<span class="start-tag">em</span>></span><span>decide to reset</span><span></<span class="end-tag">em</span>></span><span> the environment to start a new trial depending on the self-reflection results.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line340"></span></span><span><<span class="start-tag">img</span> <span class="attribute-name">src</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/reflexion.png">reflexion.png</a>" <span class="attribute-name">style</span>="<a class="attribute-value">width: 80%;</a>" <span class="attribute-name">class</span>="<a class="attribute-value">center</a>" <span>/</span>></span><span>
<span id="line341"></span></span><span><<span class="start-tag">figcaption</span>></span><span>Fig. 3. Illustration of the Reflexion framework. (Image source: </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2303.11366">https://arxiv.org/abs/2303.11366</a>" <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>"></span><span>Shinn <span><span>&</span></span> Labash, 2023</span><span></<span class="end-tag">a</span>></span><span>)</span><span></<span class="end-tag">figcaption</span>></span><span>
<span id="line342"></span></span><span><<span class="start-tag">p</span>></span><span>The heuristic function determines when the trajectory is inefficient or contains hallucination and should be stopped. Inefficient planning refers to trajectories that take too long without success. Hallucination is defined as encountering a sequence of consecutive identical actions that lead to the same observation in the environment.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line343"></span></span><span><<span class="start-tag">p</span>></span><span>Self-reflection is created by showing two-shot examples to LLM and each example is a pair of (failed trajectory, ideal reflection for guiding future changes in the plan). Then reflections are added into the agent<span class="entity"><span>&</span>rsquo;</span>s working memory, up to three, to be used as context for querying LLM.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line344"></span></span><span><<span class="start-tag">img</span> <span class="attribute-name">src</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/reflexion-exp.png">reflexion-exp.png</a>" <span class="attribute-name">style</span>="<a class="attribute-value">width: 100%;</a>" <span class="attribute-name">class</span>="<a class="attribute-value">center</a>" <span>/</span>></span><span>
<span id="line345"></span></span><span><<span class="start-tag">figcaption</span>></span><span>Fig. 4. Experiments on AlfWorld Env and HotpotQA. Hallucination is a more common failure than inefficient planning in AlfWorld. (Image source: </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2303.11366">https://arxiv.org/abs/2303.11366</a>" <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>"></span><span>Shinn <span><span>&</span></span> Labash, 2023</span><span></<span class="end-tag">a</span>></span><span>)</span><span></<span class="end-tag">figcaption</span>></span><span>
<span id="line346"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>Chain of Hindsight</span><span></<span class="end-tag">strong</span>></span><span> (CoH; </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2302.02676">https://arxiv.org/abs/2302.02676</a>"></span><span>Liu et al. 2023</span><span></<span class="end-tag">a</span>></span><span>) encourages the model to improve on its own outputs by explicitly presenting it with a sequence of past outputs, each annotated with feedback. Human feedback data is a collection of $D_h = \{(x, y_i , r_i , z_i)\}_{i=1}^n$, where $x$ is the prompt, each $y_i$ is a model completion, $r_i$ is the human rating of $y_i$, and $z_i$ is the corresponding human-provided hindsight feedback. Assume the feedback tuples are ranked by reward, $r_n \geq r_{n-1} \geq \dots \geq r_1$ The process is supervised fine-tuning where the data is a sequence in the form of $\tau_h = (x, z_i, y_i, z_j, y_j, \dots, z_n, y_n)$, where $\leq i \leq j \leq n$. The model is finetuned to only predict $y_n$ where conditioned on the sequence prefix, such that the model can self-reflect to produce better output based on the feedback sequence. The model can optionally receive multiple rounds of instructions with human annotators at test time.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line347"></span></span><span><<span class="start-tag">p</span>></span><span>To avoid overfitting, CoH adds a regularization term to maximize the log-likelihood of the pre-training dataset. To avoid shortcutting and copying (because there are many common words in feedback sequences), they randomly mask 0% - 5% of past tokens during training.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line348"></span></span><span><<span class="start-tag">p</span>></span><span>The training dataset in their experiments is a combination of </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://huggingface.co/datasets/openai/webgpt_comparisons">https://huggingface.co/datasets/openai/webgpt_comparisons</a>"></span><span>WebGPT comparisons</span><span></<span class="end-tag">a</span>></span><span>, </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://github.com/openai/summarize-from-feedback">https://github.com/openai/summarize-from-feedback</a>"></span><span>summarization from human feedback</span><span></<span class="end-tag">a</span>></span><span> and </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://github.com/anthropics/hh-rlhf">https://github.com/anthropics/hh-rlhf</a>"></span><span>human preference dataset</span><span></<span class="end-tag">a</span>></span><span>.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line349"></span></span><span><<span class="start-tag">img</span> <span class="attribute-name">src</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/CoH.png">CoH.png</a>" <span class="attribute-name">style</span>="<a class="attribute-value">width: 100%;</a>" <span class="attribute-name">class</span>="<a class="attribute-value">center</a>" <span>/</span>></span><span>
<span id="line350"></span></span><span><<span class="start-tag">figcaption</span>></span><span>Fig. 5. After fine-tuning with CoH, the model can follow instructions to produce outputs with incremental improvement in a sequence. (Image source: </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2302.02676">https://arxiv.org/abs/2302.02676</a>" <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>"></span><span>Liu et al. 2023</span><span></<span class="end-tag">a</span>></span><span>)</span><span></<span class="end-tag">figcaption</span>></span><span>
<span id="line351"></span></span><span><<span class="start-tag">p</span>></span><span>The idea of CoH is to present a history of sequentially improved outputs in context and train the model to take on the trend to produce better outputs. </span><span><<span class="start-tag">strong</span>></span><span>Algorithm Distillation</span><span></<span class="end-tag">strong</span>></span><span> (AD; </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2210.14215">https://arxiv.org/abs/2210.14215</a>"></span><span>Laskin et al. 2023</span><span></<span class="end-tag">a</span>></span><span>) applies the same idea to cross-episode trajectories in reinforcement learning tasks, where an </span><span><<span class="start-tag">em</span>></span><span>algorithm</span><span></<span class="end-tag">em</span>></span><span> is encapsulated in a long history-conditioned policy. Considering that an agent interacts with the environment many times and in each episode the agent gets a little better, AD concatenates this learning history and feeds that into the model. Hence we should expect the next predicted action to lead to better performance than previous trials. The goal is to learn the process of RL instead of training a task-specific policy itself.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line352"></span></span><span><<span class="start-tag">img</span> <span class="attribute-name">src</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/algorithm-distillation.png">algorithm-distillation.png</a>" <span class="attribute-name">style</span>="<a class="attribute-value">width: 85%;</a>" <span class="attribute-name">class</span>="<a class="attribute-value">center</a>" <span>/</span>></span><span>
<span id="line353"></span></span><span><<span class="start-tag">figcaption</span>></span><span>Fig. 6. Illustration of how Algorithm Distillation (AD) works. </span><span><<span class="start-tag">br</span><span>/</span>></span><span>(Image source: </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2210.14215">https://arxiv.org/abs/2210.14215</a>" <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>"></span><span>Laskin et al. 2023</span><span></<span class="end-tag">a</span>></span><span>).</span><span></<span class="end-tag">figcaption</span>></span><span>
<span id="line354"></span></span><span><<span class="start-tag">p</span>></span><span>The paper hypothesizes that any algorithm that generates a set of learning histories can be distilled into a neural network by performing behavioral cloning over actions. The history data is generated by a set of source policies, each trained for a specific task. At the training stage, during each RL run, a random task is sampled and a subsequence of multi-episode history is used for training, such that the learned policy is task-agnostic.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line355"></span></span><span><<span class="start-tag">p</span>></span><span>In reality, the model has limited context window length, so episodes should be short enough to construct multi-episode history. Multi-episodic contexts of 2-4 episodes are necessary to learn a near-optimal in-context RL algorithm. The emergence of in-context RL requires long enough context.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line356"></span></span><span><<span class="start-tag">p</span>></span><span>In comparison with three baselines, including ED (expert distillation, behavior cloning with expert trajectories instead of learning history), source policy (used for generating trajectories for distillation by </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2018-01-23-multi-armed-bandit/#upper-confidence-bounds">https://lilianweng.github.io/posts/2018-01-23-multi-armed-bandit/#upper-confidence-bounds</a>"></span><span>UCB</span><span></<span class="end-tag">a</span>></span><span>), RL^2 (</span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/1611.02779">https://arxiv.org/abs/1611.02779</a>"></span><span>Duan et al. 2017</span><span></<span class="end-tag">a</span>></span><span>; used as upper bound since it needs online RL), AD demonstrates in-context RL with performance getting close to RL^2 despite only using offline RL and learns much faster than other baselines. When conditioned on partial training history of the source policy, AD also improves much faster than ED baseline.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line357"></span></span><span><<span class="start-tag">img</span> <span class="attribute-name">src</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/algorithm-distillation-results.png">algorithm-distillation-results.png</a>" <span class="attribute-name">style</span>="<a class="attribute-value">width: 100%;</a>" <span class="attribute-name">class</span>="<a class="attribute-value">center</a>" <span>/</span>></span><span>
<span id="line358"></span></span><span><<span class="start-tag">figcaption</span>></span><span>Fig. 7. Comparison of AD, ED, source policy and RL^2 on environments that require memory and exploration. Only binary reward is assigned. The source policies are trained with </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2018-04-08-policy-gradient/#a3c">https://lilianweng.github.io/posts/2018-04-08-policy-gradient/#a3c</a>" <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>"></span><span>A3C</span><span></<span class="end-tag">a</span>></span><span> for "dark" environments and </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:http://lilianweng.github.io/posts/2018-02-19-rl-overview/#deep-q-network">http://lilianweng.github.io/posts/2018-02-19-rl-overview/#deep-q-network</a>" <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>"></span><span>DQN</span><span></<span class="end-tag">a</span>></span><span> for watermaze.</span><span><<span class="start-tag">br</span><span>/</span>></span><span>(Image source: </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2210.14215">https://arxiv.org/abs/2210.14215</a>" <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>"></span><span>Laskin et al. 2023</span><span></<span class="end-tag">a</span>></span><span>)</span><span></<span class="end-tag">figcaption</span>></span><span>
<span id="line359"></span></span><span><<span class="start-tag">h1</span> <span class="attribute-name">id</span>="<a class="attribute-value">component-two-memory</a>"></span><span>Component Two: Memory</span><span><<span class="start-tag">a</span> <span class="attribute-name">hidden</span> <span class="attribute-name">class</span>="<a class="attribute-value">anchor</a>" <span class="attribute-name">aria-hidden</span>="<a class="attribute-value">true</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#component-two-memory">#component-two-memory</a>"></span><span>#</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">h1</span>></span><span>
<span id="line360"></span></span><span><<span class="start-tag">p</span>></span><span>(Big thank you to ChatGPT for helping me draft this section. I<span class="entity"><span>&</span>rsquo;</span>ve learned a lot about the human brain and data structure for fast MIPS in my </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://chat.openai.com/share/46ff149e-a4c7-4dd7-a800-fc4a642ea389">https://chat.openai.com/share/46ff149e-a4c7-4dd7-a800-fc4a642ea389</a>"></span><span>conversations</span><span></<span class="end-tag">a</span>></span><span> with ChatGPT.)</span><span></<span class="end-tag">p</span>></span><span>
<span id="line361"></span></span><span><<span class="start-tag">h2</span> <span class="attribute-name">id</span>="<a class="attribute-value">types-of-memory</a>"></span><span>Types of Memory</span><span><<span class="start-tag">a</span> <span class="attribute-name">hidden</span> <span class="attribute-name">class</span>="<a class="attribute-value">anchor</a>" <span class="attribute-name">aria-hidden</span>="<a class="attribute-value">true</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#types-of-memory">#types-of-memory</a>"></span><span>#</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">h2</span>></span><span>
<span id="line362"></span></span><span><<span class="start-tag">p</span>></span><span>Memory can be defined as the processes used to acquire, store, retain, and later retrieve information. There are several types of memory in human brains.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line363"></span></span><span><<span class="start-tag">ol</span>></span><span>
<span id="line364"></span></span><span><<span class="start-tag">li</span>></span><span>
<span id="line365"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>Sensory Memory</span><span></<span class="end-tag">strong</span>></span><span>: This is the earliest stage of memory, providing the ability to retain impressions of sensory information (visual, auditory, etc) after the original stimuli have ended. Sensory memory typically only lasts for up to a few seconds. Subcategories include iconic memory (visual), echoic memory (auditory), and haptic memory (touch).</span><span></<span class="end-tag">p</span>></span><span>
<span id="line366"></span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line367"></span></span><span><<span class="start-tag">li</span>></span><span>
<span id="line368"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>Short-Term Memory</span><span></<span class="end-tag">strong</span>></span><span> (STM) or </span><span><<span class="start-tag">strong</span>></span><span>Working Memory</span><span></<span class="end-tag">strong</span>></span><span>: It stores information that we are currently aware of and needed to carry out complex cognitive tasks such as learning and reasoning. Short-term memory is believed to have the capacity of about 7 items (</span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/psychclassics.yorku.ca/Miller/">psychclassics.yorku.ca/Miller/</a>"></span><span>Miller 1956</span><span></<span class="end-tag">a</span>></span><span>) and lasts for 20-30 seconds.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line369"></span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line370"></span></span><span><<span class="start-tag">li</span>></span><span>
<span id="line371"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>Long-Term Memory</span><span></<span class="end-tag">strong</span>></span><span> (LTM): Long-term memory can store information for a remarkably long time, ranging from a few days to decades, with an essentially unlimited storage capacity. There are two subtypes of LTM:</span><span></<span class="end-tag">p</span>></span><span>
<span id="line372"></span></span><span><<span class="start-tag">ul</span>></span><span>
<span id="line373"></span></span><span><<span class="start-tag">li</span>></span><span>Explicit / declarative memory: This is memory of facts and events, and refers to those memories that can be consciously recalled, including episodic memory (events and experiences) and semantic memory (facts and concepts).</span><span></<span class="end-tag">li</span>></span><span>
<span id="line374"></span></span><span><<span class="start-tag">li</span>></span><span>Implicit / procedural memory: This type of memory is unconscious and involves skills and routines that are performed automatically, like riding a bike or typing on a keyboard.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line375"></span></span><span></<span class="end-tag">ul</span>></span><span>
<span id="line376"></span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line377"></span></span><span></<span class="end-tag">ol</span>></span><span>
<span id="line378"></span></span><span><<span class="start-tag">img</span> <span class="attribute-name">src</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/memory.png">memory.png</a>" <span class="attribute-name">style</span>="<a class="attribute-value">width: 100%;</a>" <span class="attribute-name">class</span>="<a class="attribute-value">center</a>" <span>/</span>></span><span>
<span id="line379"></span></span><span><<span class="start-tag">figcaption</span>></span><span>Fig. 8. Categorization of human memory.</span><span></<span class="end-tag">figcaption</span>></span><span>
<span id="line380"></span></span><span><<span class="start-tag">p</span>></span><span>We can roughly consider the following mappings:</span><span></<span class="end-tag">p</span>></span><span>
<span id="line381"></span></span><span><<span class="start-tag">ul</span>></span><span>
<span id="line382"></span></span><span><<span class="start-tag">li</span>></span><span>Sensory memory as learning embedding representations for raw inputs, including text, image or other modalities;</span><span></<span class="end-tag">li</span>></span><span>
<span id="line383"></span></span><span><<span class="start-tag">li</span>></span><span>Short-term memory as in-context learning. It is short and finite, as it is restricted by the finite context window length of Transformer.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line384"></span></span><span><<span class="start-tag">li</span>></span><span>Long-term memory as the external vector store that the agent can attend to at query time, accessible via fast retrieval.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line385"></span></span><span></<span class="end-tag">ul</span>></span><span>
<span id="line386"></span></span><span><<span class="start-tag">h2</span> <span class="attribute-name">id</span>="<a class="attribute-value">maximum-inner-product-search-mips</a>"></span><span>Maximum Inner Product Search (MIPS)</span><span><<span class="start-tag">a</span> <span class="attribute-name">hidden</span> <span class="attribute-name">class</span>="<a class="attribute-value">anchor</a>" <span class="attribute-name">aria-hidden</span>="<a class="attribute-value">true</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#maximum-inner-product-search-mips">#maximum-inner-product-search-mips</a>"></span><span>#</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">h2</span>></span><span>
<span id="line387"></span></span><span><<span class="start-tag">p</span>></span><span>The external memory can alleviate the restriction of finite attention span. A standard practice is to save the embedding representation of information into a vector store database that can support fast maximum inner-product search (</span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://en.wikipedia.org/wiki/Maximum_inner-product_search">https://en.wikipedia.org/wiki/Maximum_inner-product_search</a>"></span><span>MIPS</span><span></<span class="end-tag">a</span>></span><span>). To optimize the retrieval speed, the common choice is the </span><span><<span class="start-tag">em</span>></span><span>approximate nearest neighbors (ANN)</span><span></<span class="end-tag">em</span>></span><span> algorithm to return approximately top k nearest neighbors to trade off a little accuracy lost for a huge speedup.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line388"></span></span><span><<span class="start-tag">p</span>></span><span>A couple common choices of ANN algorithms for fast MIPS:</span><span></<span class="end-tag">p</span>></span><span>
<span id="line389"></span></span><span><<span class="start-tag">ul</span>></span><span>
<span id="line390"></span></span><span><<span class="start-tag">li</span>></span><span></span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://en.wikipedia.org/wiki/Locality-sensitive_hashing">https://en.wikipedia.org/wiki/Locality-sensitive_hashing</a>"></span><span></span><span><<span class="start-tag">strong</span>></span><span>LSH</span><span></<span class="end-tag">strong</span>></span><span></span><span></<span class="end-tag">a</span>></span><span> (Locality-Sensitive Hashing): It introduces a </span><span><<span class="start-tag">em</span>></span><span>hashing</span><span></<span class="end-tag">em</span>></span><span> function such that similar input items are mapped to the same buckets with high probability, where the number of buckets is much smaller than the number of inputs.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line391"></span></span><span><<span class="start-tag">li</span>></span><span></span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://github.com/spotify/annoy">https://github.com/spotify/annoy</a>"></span><span></span><span><<span class="start-tag">strong</span>></span><span>ANNOY</span><span></<span class="end-tag">strong</span>></span><span></span><span></<span class="end-tag">a</span>></span><span> (Approximate Nearest Neighbors Oh Yeah): The core data structure are </span><span><<span class="start-tag">em</span>></span><span>random projection trees</span><span></<span class="end-tag">em</span>></span><span>, a set of binary trees where each non-leaf node represents a hyperplane splitting the input space into half and each leaf stores one data point. Trees are built independently and at random, so to some extent, it mimics a hashing function. ANNOY search happens in all the trees to iteratively search through the half that is closest to the query and then aggregates the results. The idea is quite related to KD tree but a lot more scalable.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line392"></span></span><span><<span class="start-tag">li</span>></span><span></span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/1603.09320">https://arxiv.org/abs/1603.09320</a>"></span><span></span><span><<span class="start-tag">strong</span>></span><span>HNSW</span><span></<span class="end-tag">strong</span>></span><span></span><span></<span class="end-tag">a</span>></span><span> (Hierarchical Navigable Small World): It is inspired by the idea of </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://en.wikipedia.org/wiki/Small-world_network">https://en.wikipedia.org/wiki/Small-world_network</a>"></span><span>small world networks</span><span></<span class="end-tag">a</span>></span><span> where most nodes can be reached by any other nodes within a small number of steps; e.g. <span class="entity"><span>&</span>ldquo;</span>six degrees of separation<span class="entity"><span>&</span>rdquo;</span> feature of social networks. HNSW builds hierarchical layers of these small-world graphs, where the bottom layers contain the actual data points. The layers in the middle create shortcuts to speed up search. When performing a search, HNSW starts from a random node in the top layer and navigates towards the target. When it can<span class="entity"><span>&</span>rsquo;</span>t get any closer, it moves down to the next layer, until it reaches the bottom layer. Each move in the upper layers can potentially cover a large distance in the data space, and each move in the lower layers refines the search quality.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line393"></span></span><span><<span class="start-tag">li</span>></span><span></span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://github.com/facebookresearch/faiss">https://github.com/facebookresearch/faiss</a>"></span><span></span><span><<span class="start-tag">strong</span>></span><span>FAISS</span><span></<span class="end-tag">strong</span>></span><span></span><span></<span class="end-tag">a</span>></span><span> (Facebook AI Similarity Search): It operates on the assumption that in high dimensional space, distances between nodes follow a Gaussian distribution and thus there should exist </span><span><<span class="start-tag">em</span>></span><span>clustering</span><span></<span class="end-tag">em</span>></span><span> of data points. FAISS applies vector quantization by partitioning the vector space into clusters and then refining the quantization within clusters. Search first looks for cluster candidates with coarse quantization and then further looks into each cluster with finer quantization.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line394"></span></span><span><<span class="start-tag">li</span>></span><span></span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://github.com/google-research/google-research/tree/master/scann">https://github.com/google-research/google-research/tree/master/scann</a>"></span><span></span><span><<span class="start-tag">strong</span>></span><span>ScaNN</span><span></<span class="end-tag">strong</span>></span><span></span><span></<span class="end-tag">a</span>></span><span> (Scalable Nearest Neighbors): The main innovation in ScaNN is </span><span><<span class="start-tag">em</span>></span><span>anisotropic vector quantization</span><span></<span class="end-tag">em</span>></span><span>. It quantizes a data point $x_i$ to $\tilde{x}_i$ such that the inner product $\langle q, x_i \rangle$ is as similar to the original distance of $\angle q, \tilde{x}_i$ as possible, instead of picking the closet quantization centroid points.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line395"></span></span><span></<span class="end-tag">ul</span>></span><span>
<span id="line396"></span></span><span><<span class="start-tag">img</span> <span class="attribute-name">src</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/mips.png">mips.png</a>" <span class="attribute-name">style</span>="<a class="attribute-value">width: 80%;</a>" <span class="attribute-name">class</span>="<a class="attribute-value">center</a>" <span>/</span>></span><span>
<span id="line397"></span></span><span><<span class="start-tag">figcaption</span>></span><span>Fig. 9. Comparison of MIPS algorithms, measured in recall@10. (Image source: </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://ai.googleblog.com/2020/07/announcing-scann-efficient-vector.html">https://ai.googleblog.com/2020/07/announcing-scann-efficient-vector.html</a>" <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>"></span><span>Google Blog, 2020</span><span></<span class="end-tag">a</span>></span><span>)</span><span></<span class="end-tag">figcaption</span>></span><span>
<span id="line398"></span></span><span><<span class="start-tag">p</span>></span><span>Check more MIPS algorithms and performance comparison in </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://ann-benchmarks.com/">https://ann-benchmarks.com/</a>"></span><span>ann-benchmarks.com</span><span></<span class="end-tag">a</span>></span><span>.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line399"></span></span><span><<span class="start-tag">h1</span> <span class="attribute-name">id</span>="<a class="attribute-value">component-three-tool-use</a>"></span><span>Component Three: Tool Use</span><span><<span class="start-tag">a</span> <span class="attribute-name">hidden</span> <span class="attribute-name">class</span>="<a class="attribute-value">anchor</a>" <span class="attribute-name">aria-hidden</span>="<a class="attribute-value">true</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#component-three-tool-use">#component-three-tool-use</a>"></span><span>#</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">h1</span>></span><span>
<span id="line400"></span></span><span><<span class="start-tag">p</span>></span><span>Tool use is a remarkable and distinguishing characteristic of human beings. We create, modify and utilize external objects to do things that go beyond our physical and cognitive limits. Equipping LLMs with external tools can significantly extend the model capabilities.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line401"></span></span><span><<span class="start-tag">img</span> <span class="attribute-name">src</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/sea-otter.png">sea-otter.png</a>" <span class="attribute-name">style</span>="<a class="attribute-value">width: 500px;</a>" <span class="attribute-name">class</span>="<a class="attribute-value">center</a>" <span>/</span>></span><span>
<span id="line402"></span></span><span><<span class="start-tag">figcaption</span>></span><span>Fig. 10. A picture of a sea otter using rock to crack open a seashell, while floating in the water. While some other animals can use tools, the complexity is not comparable with humans. (Image source: </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://www.popularmechanics.com/science/animals/g39714258/animals-using-tools/">https://www.popularmechanics.com/science/animals/g39714258/animals-using-tools/</a>" <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>"></span><span>Animals using tools</span><span></<span class="end-tag">a</span>></span><span>)</span><span></<span class="end-tag">figcaption</span>></span><span>
<span id="line403"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">a</span> <span class="attribute-name">name</span>="<a class="attribute-value">mrkl</a>"></span><span></span><span></<span class="end-tag">a</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>MRKL</span><span></<span class="end-tag">strong</span>></span><span> (</span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2205.00445">https://arxiv.org/abs/2205.00445</a>"></span><span>Karpas et al. 2022</span><span></<span class="end-tag">a</span>></span><span>), short for <span class="entity"><span>&</span>ldquo;</span>Modular Reasoning, Knowledge and Language<span class="entity"><span>&</span>rdquo;</span>, is a neuro-symbolic architecture for autonomous agents. A MRKL system is proposed to contain a collection of <span class="entity"><span>&</span>ldquo;</span>expert<span class="entity"><span>&</span>rdquo;</span> modules and the general-purpose LLM works as a router to route inquiries to the best suitable expert module. These modules can be neural (e.g. deep learning models) or symbolic (e.g. math calculator, currency converter, weather API).</span><span></<span class="end-tag">p</span>></span><span>
<span id="line404"></span></span><span><<span class="start-tag">p</span>></span><span>They did an experiment on fine-tuning LLM to call a calculator, using arithmetic as a test case. Their experiments showed that it was harder to solve verbal math problems than explicitly stated math problems because LLMs (7B Jurassic1-large model) failed to extract the right arguments for the basic arithmetic reliably. The results highlight when the external symbolic tools can work reliably, </span><span><<span class="start-tag">em</span>></span><span>knowing when to and how to use the tools are crucial</span><span></<span class="end-tag">em</span>></span><span>, determined by the LLM capability.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line405"></span></span><span><<span class="start-tag">p</span>></span><span>Both </span><span><<span class="start-tag">strong</span>></span><span>TALM</span><span></<span class="end-tag">strong</span>></span><span> (Tool Augmented Language Models; </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2205.12255">https://arxiv.org/abs/2205.12255</a>"></span><span>Parisi et al. 2022</span><span></<span class="end-tag">a</span>></span><span>) and </span><span><<span class="start-tag">strong</span>></span><span>Toolformer</span><span></<span class="end-tag">strong</span>></span><span> (</span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2302.04761">https://arxiv.org/abs/2302.04761</a>"></span><span>Schick et al. 2023</span><span></<span class="end-tag">a</span>></span><span>) fine-tune a LM to learn to use external tool APIs. The dataset is expanded based on whether a newly added API call annotation can improve the quality of model outputs. See more details in the </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/#external-apis">https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/#external-apis</a>"></span><span><span class="entity"><span>&</span>ldquo;</span>External APIs<span class="entity"><span>&</span>rdquo;</span> section</span><span></<span class="end-tag">a</span>></span><span> of Prompt Engineering.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line406"></span></span><span><<span class="start-tag">p</span>></span><span>ChatGPT </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://openai.com/blog/chatgpt-plugins">https://openai.com/blog/chatgpt-plugins</a>"></span><span>Plugins</span><span></<span class="end-tag">a</span>></span><span> and OpenAI API </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://platform.openai.com/docs/guides/gpt/function-calling">https://platform.openai.com/docs/guides/gpt/function-calling</a>"></span><span>function calling</span><span></<span class="end-tag">a</span>></span><span> are good examples of LLMs augmented with tool use capability working in practice. The collection of tool APIs can be provided by other developers (as in Plugins) or self-defined (as in function calls).</span><span></<span class="end-tag">p</span>></span><span>
<span id="line407"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>HuggingGPT</span><span></<span class="end-tag">strong</span>></span><span> (</span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2303.17580">https://arxiv.org/abs/2303.17580</a>"></span><span>Shen et al. 2023</span><span></<span class="end-tag">a</span>></span><span>) is a framework to use ChatGPT as the task planner to select models available in HuggingFace platform according to the model descriptions and summarize the response based on the execution results.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line408"></span></span><span><<span class="start-tag">img</span> <span class="attribute-name">src</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/hugging-gpt.png">hugging-gpt.png</a>" <span class="attribute-name">style</span>="<a class="attribute-value">width: 100%;</a>" <span class="attribute-name">class</span>="<a class="attribute-value">center</a>" <span>/</span>></span><span>
<span id="line409"></span></span><span><<span class="start-tag">figcaption</span>></span><span>Fig. 11. Illustration of how HuggingGPT works. (Image source: </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2303.17580">https://arxiv.org/abs/2303.17580</a>" <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>"></span><span>Shen et al. 2023</span><span></<span class="end-tag">a</span>></span><span>)</span><span></<span class="end-tag">figcaption</span>></span><span>
<span id="line410"></span></span><span><<span class="start-tag">p</span>></span><span>The system comprises of 4 stages:</span><span></<span class="end-tag">p</span>></span><span>
<span id="line411"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>(1) Task planning</span><span></<span class="end-tag">strong</span>></span><span>: LLM works as the brain and parses the user requests into multiple tasks. There are four attributes associated with each task: task type, ID, dependencies, and arguments. They use few-shot examples to guide LLM to do task parsing and planning.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line412"></span></span><span><<span class="start-tag">p</span>></span><span>Instruction:</span><span></<span class="end-tag">p</span>></span><span>
<span id="line413"></span></span><span><<span class="start-tag">div</span> <span class="attribute-name">class</span>="<a class="attribute-value">prompt</a>"></span><span>
<span id="line414"></span>The AI assistant can parse user input to several tasks: [{"task": task, "id", task_id, "dep": dependency_task_ids, "args": {"text": text, "image": URL, "audio": URL, "video": URL}}]. The "dep" field denotes the id of the previous task which generates a new resource that the current task relies on. A special tag "</span><span><<span class="start-tag">resource</span>></span><span>-task_id" refers to the generated text image, audio and video in the dependency task with id as task_id. The task MUST be selected from the following options: {{ Available Task List }}. There is a logical relationship between tasks, please note their order. If the user input can't be parsed, you need to reply empty JSON. Here are several cases for your reference: {{ Demonstrations }}. The chat history is recorded as {{ Chat History }}. From this chat history, you can find the path of the user-mentioned resources for your task planning.
<span id="line415"></span></span><span class="error" title="End tag “div” seen, but there were open elements."></<span class="end-tag">div</span>></span><span>
<span id="line416"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>(2) Model selection</span><span></<span class="end-tag">strong</span>></span><span>: LLM distributes the tasks to expert models, where the request is framed as a multiple-choice question. LLM is presented with a list of models to choose from. Due to the limited context length, task type based filtration is needed.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line417"></span></span><span><<span class="start-tag">p</span>></span><span>Instruction:</span><span></<span class="end-tag">p</span>></span><span>
<span id="line418"></span></span><span><<span class="start-tag">div</span> <span class="attribute-name">class</span>="<a class="attribute-value">prompt</a>"></span><span>
<span id="line419"></span>Given the user request and the call command, the AI assistant helps the user to select a suitable model from a list of models to process the user request. The AI assistant merely outputs the model id of the most appropriate model. The output must be in a strict JSON format: "id": "id", "reason": "your detail reason for the choice". We have a list of models for you to choose from {{ Candidate Models }}. Please select one model from the list.
<span id="line420"></span></span><span></<span class="end-tag">div</span>></span><span>
<span id="line421"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>(3) Task execution</span><span></<span class="end-tag">strong</span>></span><span>: Expert models execute on the specific tasks and log results.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line422"></span></span><span><<span class="start-tag">p</span>></span><span>Instruction:</span><span></<span class="end-tag">p</span>></span><span>
<span id="line423"></span></span><span><<span class="start-tag">div</span> <span class="attribute-name">class</span>="<a class="attribute-value">prompt</a>"></span><span>
<span id="line424"></span>With the input and the inference results, the AI assistant needs to describe the process and results. The previous stages can be formed as - User Input: {{ User Input }}, Task Planning: {{ Tasks }}, Model Selection: {{ Model Assignment }}, Task Execution: {{ Predictions }}. You must first answer the user's request in a straightforward manner. Then describe the task process and show your analysis and model inference results to the user in the first person. If inference results contain a file path, must tell the user the complete file path.
<span id="line425"></span></span><span></<span class="end-tag">div</span>></span><span>
<span id="line426"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>(4) Response generation</span><span></<span class="end-tag">strong</span>></span><span>: LLM receives the execution results and provides summarized results to users.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line427"></span></span><span><<span class="start-tag">p</span>></span><span>To put HuggingGPT into real world usage, a couple challenges need to solve: (1) Efficiency improvement is needed as both LLM inference rounds and interactions with other models slow down the process; (2) It relies on a long context window to communicate over complicated task content; (3) Stability improvement of LLM outputs and external model services.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line428"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>API-Bank</span><span></<span class="end-tag">strong</span>></span><span> (</span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2304.08244">https://arxiv.org/abs/2304.08244</a>"></span><span>Li et al. 2023</span><span></<span class="end-tag">a</span>></span><span>) is a benchmark for evaluating the performance of tool-augmented LLMs. It contains 53 commonly used API tools, a complete tool-augmented LLM workflow, and 264 annotated dialogues that involve 568 API calls. The selection of APIs is quite diverse, including search engines, calculator, calendar queries, smart home control, schedule management, health data management, account authentication workflow and more. Because there are a large number of APIs, LLM first has access to API search engine to find the right API to call and then uses the corresponding documentation to make a call.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line429"></span></span><span><<span class="start-tag">img</span> <span class="attribute-name">src</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/api-bank-process.png">api-bank-process.png</a>" <span class="attribute-name">style</span>="<a class="attribute-value">width: 60%;</a>" <span class="attribute-name">class</span>="<a class="attribute-value">center</a>" <span>/</span>></span><span>
<span id="line430"></span></span><span><<span class="start-tag">figcaption</span>></span><span>Fig. 12. Pseudo code of how LLM makes an API call in API-Bank. (Image source: </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2304.08244">https://arxiv.org/abs/2304.08244</a>" <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>"></span><span>Li et al. 2023</span><span></<span class="end-tag">a</span>></span><span>)</span><span></<span class="end-tag">figcaption</span>></span><span>
<span id="line431"></span></span><span><<span class="start-tag">p</span>></span><span>In the API-Bank workflow, LLMs need to make a couple of decisions and at each step we can evaluate how accurate that decision is. Decisions include:</span><span></<span class="end-tag">p</span>></span><span>
<span id="line432"></span></span><span><<span class="start-tag">ol</span>></span><span>
<span id="line433"></span></span><span><<span class="start-tag">li</span>></span><span>Whether an API call is needed.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line434"></span></span><span><<span class="start-tag">li</span>></span><span>Identify the right API to call: if not good enough, LLMs need to iteratively modify the API inputs (e.g. deciding search keywords for Search Engine API).</span><span></<span class="end-tag">li</span>></span><span>
<span id="line435"></span></span><span><<span class="start-tag">li</span>></span><span>Response based on the API results: the model can choose to refine and call again if results are not satisfied.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line436"></span></span><span></<span class="end-tag">ol</span>></span><span>
<span id="line437"></span></span><span><<span class="start-tag">p</span>></span><span>This benchmark evaluates the agent<span class="entity"><span>&</span>rsquo;</span>s tool use capabilities at three levels:</span><span></<span class="end-tag">p</span>></span><span>
<span id="line438"></span></span><span><<span class="start-tag">ul</span>></span><span>
<span id="line439"></span></span><span><<span class="start-tag">li</span>></span><span>Level-1 evaluates the ability to </span><span><<span class="start-tag">em</span>></span><span>call the API</span><span></<span class="end-tag">em</span>></span><span>. Given an API<span class="entity"><span>&</span>rsquo;</span>s description, the model needs to determine whether to call a given API, call it correctly, and respond properly to API returns.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line440"></span></span><span><<span class="start-tag">li</span>></span><span>Level-2 examines the ability to </span><span><<span class="start-tag">em</span>></span><span>retrieve the API</span><span></<span class="end-tag">em</span>></span><span>. The model needs to search for possible APIs that may solve the user<span class="entity"><span>&</span>rsquo;</span>s requirement and learn how to use them by reading documentation.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line441"></span></span><span><<span class="start-tag">li</span>></span><span>Level-3 assesses the ability to </span><span><<span class="start-tag">em</span>></span><span>plan API beyond retrieve and call</span><span></<span class="end-tag">em</span>></span><span>. Given unclear user requests (e.g. schedule group meetings, book flight/hotel/restaurant for a trip), the model may have to conduct multiple API calls to solve it.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line442"></span></span><span></<span class="end-tag">ul</span>></span><span>
<span id="line443"></span></span><span><<span class="start-tag">h1</span> <span class="attribute-name">id</span>="<a class="attribute-value">case-studies</a>"></span><span>Case Studies</span><span><<span class="start-tag">a</span> <span class="attribute-name">hidden</span> <span class="attribute-name">class</span>="<a class="attribute-value">anchor</a>" <span class="attribute-name">aria-hidden</span>="<a class="attribute-value">true</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#case-studies">#case-studies</a>"></span><span>#</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">h1</span>></span><span>
<span id="line444"></span></span><span><<span class="start-tag">h2</span> <span class="attribute-name">id</span>="<a class="attribute-value">scientific-discovery-agent</a>"></span><span>Scientific Discovery Agent</span><span><<span class="start-tag">a</span> <span class="attribute-name">hidden</span> <span class="attribute-name">class</span>="<a class="attribute-value">anchor</a>" <span class="attribute-name">aria-hidden</span>="<a class="attribute-value">true</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#scientific-discovery-agent">#scientific-discovery-agent</a>"></span><span>#</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">h2</span>></span><span>
<span id="line445"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>ChemCrow</span><span></<span class="end-tag">strong</span>></span><span> (</span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2304.05376">https://arxiv.org/abs/2304.05376</a>"></span><span>Bran et al. 2023</span><span></<span class="end-tag">a</span>></span><span>) is a domain-specific example in which LLM is augmented with 13 expert-designed tools to accomplish tasks across organic synthesis, drug discovery, and materials design. The workflow, implemented in </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://github.com/hwchase17/langchain">https://github.com/hwchase17/langchain</a>"></span><span>LangChain</span><span></<span class="end-tag">a</span>></span><span>, reflects what was previously described in the </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#react">#react</a>"></span><span>ReAct</span><span></<span class="end-tag">a</span>></span><span> and </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#mrkl">#mrkl</a>"></span><span>MRKLs</span><span></<span class="end-tag">a</span>></span><span> and combines CoT reasoning with tools relevant to the tasks:</span><span></<span class="end-tag">p</span>></span><span>
<span id="line446"></span></span><span><<span class="start-tag">ul</span>></span><span>
<span id="line447"></span></span><span><<span class="start-tag">li</span>></span><span>The LLM is provided with a list of tool names, descriptions of their utility, and details about the expected input/output.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line448"></span></span><span><<span class="start-tag">li</span>></span><span>It is then instructed to answer a user-given prompt using the tools provided when necessary. The instruction suggests the model to follow the ReAct format - </span><span><<span class="start-tag">code</span>></span><span>Thought, Action, Action Input, Observation</span><span></<span class="end-tag">code</span>></span><span>.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line449"></span></span><span></<span class="end-tag">ul</span>></span><span>
<span id="line450"></span></span><span><<span class="start-tag">p</span>></span><span>One interesting observation is that while the LLM-based evaluation concluded that GPT-4 and ChemCrow perform nearly equivalently, human evaluations with experts oriented towards the completion and chemical correctness of the solutions showed that ChemCrow outperforms GPT-4 by a large margin. This indicates a potential problem with using LLM to evaluate its own performance on domains that requires deep expertise. The lack of expertise may cause LLMs not knowing its flaws and thus cannot well judge the correctness of task results.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line451"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2304.05332">https://arxiv.org/abs/2304.05332</a>"></span><span>Boiko et al. (2023)</span><span></<span class="end-tag">a</span>></span><span> also looked into LLM-empowered agents for scientific discovery, to handle autonomous design, planning, and performance of complex scientific experiments. This agent can use tools to browse the Internet, read documentation, execute code, call robotics experimentation APIs and leverage other LLMs.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line452"></span></span><span><<span class="start-tag">p</span>></span><span>For example, when requested to </span><span><<span class="start-tag">code</span>></span><span><span class="entity"><span>&</span>quot;</span>develop a novel anticancer drug<span class="entity"><span>&</span>quot;</span></span><span></<span class="end-tag">code</span>></span><span>, the model came up with the following reasoning steps:</span><span></<span class="end-tag">p</span>></span><span>
<span id="line453"></span></span><span><<span class="start-tag">ol</span>></span><span>
<span id="line454"></span></span><span><<span class="start-tag">li</span>></span><span>inquired about current trends in anticancer drug discovery;</span><span></<span class="end-tag">li</span>></span><span>
<span id="line455"></span></span><span><<span class="start-tag">li</span>></span><span>selected a target;</span><span></<span class="end-tag">li</span>></span><span>
<span id="line456"></span></span><span><<span class="start-tag">li</span>></span><span>requested a scaffold targeting these compounds;</span><span></<span class="end-tag">li</span>></span><span>
<span id="line457"></span></span><span><<span class="start-tag">li</span>></span><span>Once the compound was identified, the model attempted its synthesis.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line458"></span></span><span></<span class="end-tag">ol</span>></span><span>
<span id="line459"></span></span><span><<span class="start-tag">p</span>></span><span>They also discussed the risks, especially with illicit drugs and bioweapons. They developed a test set containing a list of known chemical weapon agents and asked the agent to synthesize them. 4 out of 11 requests (36%) were accepted to obtain a synthesis solution and the agent attempted to consult documentation to execute the procedure. 7 out of 11 were rejected and among these 7 rejected cases, 5 happened after a Web search while 2 were rejected based on prompt only.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line460"></span></span><span><<span class="start-tag">h2</span> <span class="attribute-name">id</span>="<a class="attribute-value">generative-agents-simulation</a>"></span><span>Generative Agents Simulation</span><span><<span class="start-tag">a</span> <span class="attribute-name">hidden</span> <span class="attribute-name">class</span>="<a class="attribute-value">anchor</a>" <span class="attribute-name">aria-hidden</span>="<a class="attribute-value">true</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#generative-agents-simulation">#generative-agents-simulation</a>"></span><span>#</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">h2</span>></span><span>
<span id="line461"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>Generative Agents</span><span></<span class="end-tag">strong</span>></span><span> (</span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2304.03442">https://arxiv.org/abs/2304.03442</a>"></span><span>Park, et al. 2023</span><span></<span class="end-tag">a</span>></span><span>) is super fun experiment where 25 virtual characters, each controlled by a LLM-powered agent, are living and interacting in a sandbox environment, inspired by The Sims. Generative agents create believable simulacra of human behavior for interactive applications.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line462"></span></span><span><<span class="start-tag">p</span>></span><span>The design of generative agents combines LLM with memory, planning and reflection mechanisms to enable agents to behave conditioned on past experience, as well as to interact with other agents.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line463"></span></span><span><<span class="start-tag">ul</span>></span><span>
<span id="line464"></span></span><span><<span class="start-tag">li</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>Memory</span><span></<span class="end-tag">strong</span>></span><span> stream: is a long-term memory module (external database) that records a comprehensive list of agents<span class="entity"><span>&</span>rsquo;</span> experience in natural language.
<span id="line465"></span></span><span><<span class="start-tag">ul</span>></span><span>
<span id="line466"></span></span><span><<span class="start-tag">li</span>></span><span>Each element is an </span><span><<span class="start-tag">em</span>></span><span>observation</span><span></<span class="end-tag">em</span>></span><span>, an event directly provided by the agent.
<span id="line467"></span>- Inter-agent communication can trigger new natural language statements.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line468"></span></span><span></<span class="end-tag">ul</span>></span><span>
<span id="line469"></span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line470"></span></span><span><<span class="start-tag">li</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>Retrieval</span><span></<span class="end-tag">strong</span>></span><span> model: surfaces the context to inform the agent<span class="entity"><span>&</span>rsquo;</span>s behavior, according to relevance, recency and importance.
<span id="line471"></span></span><span><<span class="start-tag">ul</span>></span><span>
<span id="line472"></span></span><span><<span class="start-tag">li</span>></span><span>Recency: recent events have higher scores</span><span></<span class="end-tag">li</span>></span><span>
<span id="line473"></span></span><span><<span class="start-tag">li</span>></span><span>Importance: distinguish mundane from core memories. Ask LM directly.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line474"></span></span><span><<span class="start-tag">li</span>></span><span>Relevance: based on how related it is to the current situation / query.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line475"></span></span><span></<span class="end-tag">ul</span>></span><span>
<span id="line476"></span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line477"></span></span><span><<span class="start-tag">li</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>Reflection</span><span></<span class="end-tag">strong</span>></span><span> mechanism: synthesizes memories into higher level inferences over time and guides the agent<span class="entity"><span>&</span>rsquo;</span>s future behavior. They are </span><span><<span class="start-tag">em</span>></span><span>higher-level summaries of past events</span><span></<span class="end-tag">em</span>></span><span> (<span class="entity"><span>&</span>lt;</span>- note that this is a bit different from </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#self-reflection">#self-reflection</a>"></span><span>self-reflection</span><span></<span class="end-tag">a</span>></span><span> above)
<span id="line478"></span></span><span><<span class="start-tag">ul</span>></span><span>
<span id="line479"></span></span><span><<span class="start-tag">li</span>></span><span>Prompt LM with 100 most recent observations and to generate 3 most salient high-level questions given a set of observations/statements. Then ask LM to answer those questions.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line480"></span></span><span></<span class="end-tag">ul</span>></span><span>
<span id="line481"></span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line482"></span></span><span><<span class="start-tag">li</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>Planning <span class="entity"><span>&</span>amp;</span> Reacting</span><span></<span class="end-tag">strong</span>></span><span>: translate the reflections and the environment information into actions
<span id="line483"></span></span><span><<span class="start-tag">ul</span>></span><span>
<span id="line484"></span></span><span><<span class="start-tag">li</span>></span><span>Planning is essentially in order to optimize believability at the moment vs in time.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line485"></span></span><span><<span class="start-tag">li</span>></span><span>Prompt template: </span><span><<span class="start-tag">code</span>></span><span>{Intro of an agent X}. Here is X's plan today in broad strokes: 1)</span><span></<span class="end-tag">code</span>></span><span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line486"></span></span><span><<span class="start-tag">li</span>></span><span>Relationships between agents and observations of one agent by another are all taken into consideration for planning and reacting.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line487"></span></span><span><<span class="start-tag">li</span>></span><span>Environment information is present in a tree structure.</span><span></<span class="end-tag">li</span>></span><span>
<span id="line488"></span></span><span></<span class="end-tag">ul</span>></span><span>
<span id="line489"></span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line490"></span></span><span></<span class="end-tag">ul</span>></span><span>
<span id="line491"></span></span><span><<span class="start-tag">img</span> <span class="attribute-name">src</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/generative-agents.png">generative-agents.png</a>" <span class="attribute-name">style</span>="<a class="attribute-value">width: 100%;</a>" <span class="attribute-name">class</span>="<a class="attribute-value">center</a>" <span>/</span>></span><span>
<span id="line492"></span></span><span><<span class="start-tag">figcaption</span>></span><span>Fig. 13. The generative agent architecture. (Image source: </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2304.03442">https://arxiv.org/abs/2304.03442</a>" <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>"></span><span>Park et al. 2023</span><span></<span class="end-tag">a</span>></span><span>)</span><span></<span class="end-tag">figcaption</span>></span><span>
<span id="line493"></span></span><span><<span class="start-tag">p</span>></span><span>This fun simulation results in emergent social behavior, such as information diffusion, relationship memory (e.g. two agents continuing the conversation topic) and coordination of social events (e.g. host a party and invite many others).</span><span></<span class="end-tag">p</span>></span><span>
<span id="line494"></span></span><span><<span class="start-tag">h2</span> <span class="attribute-name">id</span>="<a class="attribute-value">proof-of-concept-examples</a>"></span><span>Proof-of-Concept Examples</span><span><<span class="start-tag">a</span> <span class="attribute-name">hidden</span> <span class="attribute-name">class</span>="<a class="attribute-value">anchor</a>" <span class="attribute-name">aria-hidden</span>="<a class="attribute-value">true</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#proof-of-concept-examples">#proof-of-concept-examples</a>"></span><span>#</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">h2</span>></span><span>
<span id="line495"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://github.com/Significant-Gravitas/Auto-GPT">https://github.com/Significant-Gravitas/Auto-GPT</a>"></span><span>AutoGPT</span><span></<span class="end-tag">a</span>></span><span> has drawn a lot of attention into the possibility of setting up autonomous agents with LLM as the main controller. It has quite a lot of reliability issues given the natural language interface, but nevertheless a cool proof-of-concept demo. A lot of code in AutoGPT is about format parsing.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line496"></span></span><span><<span class="start-tag">p</span>></span><span>Here is the system message used by AutoGPT, where </span><span><<span class="start-tag">code</span>></span><span>{{...}}</span><span></<span class="end-tag">code</span>></span><span> are user inputs:</span><span></<span class="end-tag">p</span>></span><span>
<span id="line497"></span></span><span><<span class="start-tag">pre</span> <span class="attribute-name">tabindex</span>="<a class="attribute-value">0</a>"></span><span></span><span><<span class="start-tag">code</span>></span><span>You are {{ai-name}}, {{user-provided AI bot description}}.
<span id="line498"></span>Your decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications.
<span id="line499"></span>
<span id="line500"></span>GOALS:
<span id="line501"></span>
<span id="line502"></span>1. {{user-provided goal 1}}
<span id="line503"></span>2. {{user-provided goal 2}}
<span id="line504"></span>3. ...
<span id="line505"></span>4. ...
<span id="line506"></span>5. ...
<span id="line507"></span>
<span id="line508"></span>Constraints:
<span id="line509"></span>1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files.
<span id="line510"></span>2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.
<span id="line511"></span>3. No user assistance
<span id="line512"></span>4. Exclusively use the commands listed in double quotes e.g. <span class="entity"><span>&</span>#34;</span>command name<span class="entity"><span>&</span>#34;</span>
<span id="line513"></span>5. Use subprocesses for commands that will not terminate within a few minutes
<span id="line514"></span>
<span id="line515"></span>Commands:
<span id="line516"></span>1. Google Search: <span class="entity"><span>&</span>#34;</span>google<span class="entity"><span>&</span>#34;</span>, args: <span class="entity"><span>&</span>#34;</span>input<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>search<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>
<span id="line517"></span>2. Browse Website: <span class="entity"><span>&</span>#34;</span>browse_website<span class="entity"><span>&</span>#34;</span>, args: <span class="entity"><span>&</span>#34;</span>url<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>url<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>, <span class="entity"><span>&</span>#34;</span>question<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>what_you_want_to_find_on_website<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>
<span id="line518"></span>3. Start GPT Agent: <span class="entity"><span>&</span>#34;</span>start_agent<span class="entity"><span>&</span>#34;</span>, args: <span class="entity"><span>&</span>#34;</span>name<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>name<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>, <span class="entity"><span>&</span>#34;</span>task<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>short_task_desc<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>, <span class="entity"><span>&</span>#34;</span>prompt<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>prompt<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>
<span id="line519"></span>4. Message GPT Agent: <span class="entity"><span>&</span>#34;</span>message_agent<span class="entity"><span>&</span>#34;</span>, args: <span class="entity"><span>&</span>#34;</span>key<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>key<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>, <span class="entity"><span>&</span>#34;</span>message<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>message<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>
<span id="line520"></span>5. List GPT Agents: <span class="entity"><span>&</span>#34;</span>list_agents<span class="entity"><span>&</span>#34;</span>, args:
<span id="line521"></span>6. Delete GPT Agent: <span class="entity"><span>&</span>#34;</span>delete_agent<span class="entity"><span>&</span>#34;</span>, args: <span class="entity"><span>&</span>#34;</span>key<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>key<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>
<span id="line522"></span>7. Clone Repository: <span class="entity"><span>&</span>#34;</span>clone_repository<span class="entity"><span>&</span>#34;</span>, args: <span class="entity"><span>&</span>#34;</span>repository_url<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>url<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>, <span class="entity"><span>&</span>#34;</span>clone_path<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>directory<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>
<span id="line523"></span>8. Write to file: <span class="entity"><span>&</span>#34;</span>write_to_file<span class="entity"><span>&</span>#34;</span>, args: <span class="entity"><span>&</span>#34;</span>file<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>file<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>, <span class="entity"><span>&</span>#34;</span>text<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>text<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>
<span id="line524"></span>9. Read file: <span class="entity"><span>&</span>#34;</span>read_file<span class="entity"><span>&</span>#34;</span>, args: <span class="entity"><span>&</span>#34;</span>file<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>file<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>
<span id="line525"></span>10. Append to file: <span class="entity"><span>&</span>#34;</span>append_to_file<span class="entity"><span>&</span>#34;</span>, args: <span class="entity"><span>&</span>#34;</span>file<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>file<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>, <span class="entity"><span>&</span>#34;</span>text<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>text<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>
<span id="line526"></span>11. Delete file: <span class="entity"><span>&</span>#34;</span>delete_file<span class="entity"><span>&</span>#34;</span>, args: <span class="entity"><span>&</span>#34;</span>file<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>file<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>
<span id="line527"></span>12. Search Files: <span class="entity"><span>&</span>#34;</span>search_files<span class="entity"><span>&</span>#34;</span>, args: <span class="entity"><span>&</span>#34;</span>directory<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>directory<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>
<span id="line528"></span>13. Analyze Code: <span class="entity"><span>&</span>#34;</span>analyze_code<span class="entity"><span>&</span>#34;</span>, args: <span class="entity"><span>&</span>#34;</span>code<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>full_code_string<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>
<span id="line529"></span>14. Get Improved Code: <span class="entity"><span>&</span>#34;</span>improve_code<span class="entity"><span>&</span>#34;</span>, args: <span class="entity"><span>&</span>#34;</span>suggestions<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>list_of_suggestions<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>, <span class="entity"><span>&</span>#34;</span>code<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>full_code_string<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>
<span id="line530"></span>15. Write Tests: <span class="entity"><span>&</span>#34;</span>write_tests<span class="entity"><span>&</span>#34;</span>, args: <span class="entity"><span>&</span>#34;</span>code<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>full_code_string<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>, <span class="entity"><span>&</span>#34;</span>focus<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>list_of_focus_areas<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>
<span id="line531"></span>16. Execute Python File: <span class="entity"><span>&</span>#34;</span>execute_python_file<span class="entity"><span>&</span>#34;</span>, args: <span class="entity"><span>&</span>#34;</span>file<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>file<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>
<span id="line532"></span>17. Generate Image: <span class="entity"><span>&</span>#34;</span>generate_image<span class="entity"><span>&</span>#34;</span>, args: <span class="entity"><span>&</span>#34;</span>prompt<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>prompt<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>
<span id="line533"></span>18. Send Tweet: <span class="entity"><span>&</span>#34;</span>send_tweet<span class="entity"><span>&</span>#34;</span>, args: <span class="entity"><span>&</span>#34;</span>text<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>text<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>
<span id="line534"></span>19. Do Nothing: <span class="entity"><span>&</span>#34;</span>do_nothing<span class="entity"><span>&</span>#34;</span>, args:
<span id="line535"></span>20. Task Complete (Shutdown): <span class="entity"><span>&</span>#34;</span>task_complete<span class="entity"><span>&</span>#34;</span>, args: <span class="entity"><span>&</span>#34;</span>reason<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>lt;</span>reason<span class="entity"><span>&</span>gt;</span><span class="entity"><span>&</span>#34;</span>
<span id="line536"></span>
<span id="line537"></span>Resources:
<span id="line538"></span>1. Internet access for searches and information gathering.
<span id="line539"></span>2. Long Term memory management.
<span id="line540"></span>3. GPT-3.5 powered Agents for delegation of simple tasks.
<span id="line541"></span>4. File output.
<span id="line542"></span>
<span id="line543"></span>Performance Evaluation:
<span id="line544"></span>1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.
<span id="line545"></span>2. Constructively self-criticize your big-picture behavior constantly.
<span id="line546"></span>3. Reflect on past decisions and strategies to refine your approach.
<span id="line547"></span>4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.
<span id="line548"></span>
<span id="line549"></span>You should only respond in JSON format as described below
<span id="line550"></span>Response Format:
<span id="line551"></span>{
<span id="line552"></span> <span class="entity"><span>&</span>#34;</span>thoughts<span class="entity"><span>&</span>#34;</span>: {
<span id="line553"></span> <span class="entity"><span>&</span>#34;</span>text<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>thought<span class="entity"><span>&</span>#34;</span>,
<span id="line554"></span> <span class="entity"><span>&</span>#34;</span>reasoning<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>reasoning<span class="entity"><span>&</span>#34;</span>,
<span id="line555"></span> <span class="entity"><span>&</span>#34;</span>plan<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>- short bulleted\n- list that conveys\n- long-term plan<span class="entity"><span>&</span>#34;</span>,
<span id="line556"></span> <span class="entity"><span>&</span>#34;</span>criticism<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>constructive self-criticism<span class="entity"><span>&</span>#34;</span>,
<span id="line557"></span> <span class="entity"><span>&</span>#34;</span>speak<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>thoughts summary to say to user<span class="entity"><span>&</span>#34;</span>
<span id="line558"></span> },
<span id="line559"></span> <span class="entity"><span>&</span>#34;</span>command<span class="entity"><span>&</span>#34;</span>: {
<span id="line560"></span> <span class="entity"><span>&</span>#34;</span>name<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>command name<span class="entity"><span>&</span>#34;</span>,
<span id="line561"></span> <span class="entity"><span>&</span>#34;</span>args<span class="entity"><span>&</span>#34;</span>: {
<span id="line562"></span> <span class="entity"><span>&</span>#34;</span>arg name<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>value<span class="entity"><span>&</span>#34;</span>
<span id="line563"></span> }
<span id="line564"></span> }
<span id="line565"></span>}
<span id="line566"></span>Ensure the response can be parsed by Python json.loads
<span id="line567"></span></span><span></<span class="end-tag">code</span>></span><span></span><span></<span class="end-tag">pre</span>></span><span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://github.com/AntonOsika/gpt-engineer">https://github.com/AntonOsika/gpt-engineer</a>"></span><span>GPT-Engineer</span><span></<span class="end-tag">a</span>></span><span> is another project to create a whole repository of code given a task specified in natural language. The GPT-Engineer is instructed to think over a list of smaller components to build and ask for user input to clarify questions as needed.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line568"></span></span><span><<span class="start-tag">p</span>></span><span>Here are a sample conversation for task clarification sent to OpenAI ChatCompletion endpoint used by GPT-Engineer. The user inputs are wrapped in </span><span><<span class="start-tag">code</span>></span><span>{{user input text}}</span><span></<span class="end-tag">code</span>></span><span>.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line569"></span></span><span><<span class="start-tag">pre</span> <span class="attribute-name">tabindex</span>="<a class="attribute-value">0</a>"></span><span></span><span><<span class="start-tag">code</span>></span><span>[
<span id="line570"></span> {
<span id="line571"></span> <span class="entity"><span>&</span>#34;</span>role<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>system<span class="entity"><span>&</span>#34;</span>,
<span id="line572"></span> <span class="entity"><span>&</span>#34;</span>content<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>You will read instructions and not carry them out, only seek to clarify them.\nSpecifically you will first summarise a list of super short bullets of areas that need clarification.\nThen you will pick one clarifying question, and wait for an answer from the user.\n<span class="entity"><span>&</span>#34;</span>
<span id="line573"></span> },
<span id="line574"></span> {
<span id="line575"></span> <span class="entity"><span>&</span>#34;</span>role<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>user<span class="entity"><span>&</span>#34;</span>,
<span id="line576"></span> <span class="entity"><span>&</span>#34;</span>content<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>We are writing {{a Super Mario game in python. MVC components split in separate files. Keyboard control.}}\n<span class="entity"><span>&</span>#34;</span>
<span id="line577"></span> },
<span id="line578"></span> {
<span id="line579"></span> <span class="entity"><span>&</span>#34;</span>role<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>assistant<span class="entity"><span>&</span>#34;</span>,
<span id="line580"></span> <span class="entity"><span>&</span>#34;</span>content<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>Summary of areas that need clarification:\n1. Specifics of the Super Mario game (e.g. level design, characters, gameplay mechanics)\n2. Details about the MVC components (e.g. which components are in each file)\n3. Keyboard control implementation (e.g. which keys to use, how to handle input)\n\nClarifying question:\nCan you provide more details about the Super Mario game, such as level design, characters, and gameplay mechanics?<span class="entity"><span>&</span>#34;</span>
<span id="line581"></span> },
<span id="line582"></span> {
<span id="line583"></span> <span class="entity"><span>&</span>#34;</span>role<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>user<span class="entity"><span>&</span>#34;</span>,
<span id="line584"></span> <span class="entity"><span>&</span>#34;</span>content<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>{{There are 10 levels in total. The main character is a plumber named Mario, who can walk and jump. It is a classical platform game just like Super Mario. The main character moves from left to right, trying to get to the destination, where there are many obstacles and attacks from enemies in the process.}}\n\nIs anything else unclear? If yes, only answer in the form:\n{remaining unclear areas} remaining questions.\n{Next question}\nIf everything is sufficiently clear, only answer \<span class="entity"><span>&</span>#34;</span>Nothing more to clarify.\<span class="entity"><span>&</span>#34;</span>.<span class="entity"><span>&</span>#34;</span>
<span id="line585"></span> },
<span id="line586"></span> {
<span id="line587"></span> <span class="entity"><span>&</span>#34;</span>role<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>assistant<span class="entity"><span>&</span>#34;</span>,
<span id="line588"></span> <span class="entity"><span>&</span>#34;</span>content<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>Remaining unclear areas: 2 remaining questions.\nCan you provide more information about how the MVC components are split into separate files?<span class="entity"><span>&</span>#34;</span>
<span id="line589"></span> },
<span id="line590"></span> {
<span id="line591"></span> <span class="entity"><span>&</span>#34;</span>role<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>user<span class="entity"><span>&</span>#34;</span>,
<span id="line592"></span> <span class="entity"><span>&</span>#34;</span>content<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>{{Make your own assumptions and state them explicitly before starting}}<span class="entity"><span>&</span>#34;</span>
<span id="line593"></span> }
<span id="line594"></span>]
<span id="line595"></span></span><span></<span class="end-tag">code</span>></span><span></span><span></<span class="end-tag">pre</span>></span><span></span><span><<span class="start-tag">p</span>></span><span>Then after these clarification, the agent moved into the code writing mode with a different system message.
<span id="line596"></span>System message:</span><span></<span class="end-tag">p</span>></span><span>
<span id="line597"></span></span><span><<span class="start-tag">div</span> <span class="attribute-name">class</span>="<a class="attribute-value">prompt</a>"></span><span>
<span id="line598"></span>You will get instructions for code to write.
<span id="line599"></span>You will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code.
<span id="line600"></span>Make sure that every detail of the architecture is, in the end, implemented as code.
<span id="line601"></span></span><span><<span class="start-tag">p</span>></span><span>Think step by step and reason yourself to the right decisions to make sure we get it right.
<span id="line602"></span>You will first lay out the names of the core classes, functions, methods that will be necessary, as well as a quick comment on their purpose.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line603"></span></span><span><<span class="start-tag">p</span>></span><span>Then you will output the content of each file including ALL code.
<span id="line604"></span>Each file must strictly follow a markdown code block format, where the following tokens must be replaced such that
<span id="line605"></span>FILENAME is the lowercase file name including the file extension,
<span id="line606"></span>LANG is the markup code block language for the code<span class="entity"><span>&</span>rsquo;</span>s language, and CODE is the code:</span><span></<span class="end-tag">p</span>></span><span>
<span id="line607"></span></span><span><<span class="start-tag">p</span>></span><span>FILENAME</span><span></<span class="end-tag">p</span>></span><span>
<span id="line608"></span></span><span><<span class="start-tag">pre</span> <span class="attribute-name">tabindex</span>="<a class="attribute-value">0</a>"></span><span></span><span><<span class="start-tag">code</span> <span class="attribute-name">class</span>="<a class="attribute-value">language-LANG</a>" <span class="attribute-name">data-lang</span>="<a class="attribute-value">LANG</a>"></span><span>CODE
<span id="line609"></span></span><span></<span class="end-tag">code</span>></span><span></span><span></<span class="end-tag">pre</span>></span><span></span><span><<span class="start-tag">p</span>></span><span>You will start with the <span class="entity"><span>&</span>ldquo;</span>entrypoint<span class="entity"><span>&</span>rdquo;</span> file, then go to the ones that are imported by that file, and so on.
<span id="line610"></span>Please note that the code should be fully functional. No placeholders.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line611"></span></span><span><<span class="start-tag">p</span>></span><span>Follow a language and framework appropriate best practice file naming convention.
<span id="line612"></span>Make sure that files contain all imports, types etc. Make sure that code in different files are compatible with each other.
<span id="line613"></span>Ensure to implement all code, if you are unsure, write a plausible implementation.
<span id="line614"></span>Include module dependency or package manager dependency definition file.
<span id="line615"></span>Before you finish, double check that all parts of the architecture is present in the files.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line616"></span></span><span><<span class="start-tag">p</span>></span><span>Useful to know:
<span id="line617"></span>You almost always put different classes in different files.
<span id="line618"></span>For Python, you always create an appropriate requirements.txt file.
<span id="line619"></span>For NodeJS, you always create an appropriate package.json file.
<span id="line620"></span>You always add a comment briefly describing the purpose of the function definition.
<span id="line621"></span>You try to add comments explaining very complex bits of logic.
<span id="line622"></span>You always follow the best practices for the requested languages in terms of describing the code written as a defined
<span id="line623"></span>package/project.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line624"></span></span><span><<span class="start-tag">p</span>></span><span>Python toolbelt preferences:</span><span></<span class="end-tag">p</span>></span><span>
<span id="line625"></span></span><span><<span class="start-tag">ul</span>></span><span>
<span id="line626"></span></span><span><<span class="start-tag">li</span>></span><span>pytest</span><span></<span class="end-tag">li</span>></span><span>
<span id="line627"></span></span><span><<span class="start-tag">li</span>></span><span>dataclasses</span><span></<span class="end-tag">li</span>></span><span>
<span id="line628"></span></span><span></<span class="end-tag">ul</span>></span><span>
<span id="line629"></span></span><span></<span class="end-tag">div</span>></span><span>
<span id="line630"></span></span><span><<span class="start-tag">p</span>></span><span>Conversatin samples:</span><span></<span class="end-tag">p</span>></span><span>
<span id="line631"></span></span><span><<span class="start-tag">pre</span> <span class="attribute-name">tabindex</span>="<a class="attribute-value">0</a>"></span><span></span><span><<span class="start-tag">code</span>></span><span>[
<span id="line632"></span> {
<span id="line633"></span> <span class="entity"><span>&</span>#34;</span>role<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>system<span class="entity"><span>&</span>#34;</span>,
<span id="line634"></span> <span class="entity"><span>&</span>#34;</span>content<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>You will get instructions for code to write.\nYou will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code.\nMake sure that every detail of the architecture is, in the end, implemented as code.\n\nThink step by step and reason yourself to the right decisions to make sure we get it right.\nYou will first lay out the names of the core classes, functions, methods that will be necessary, as well as a quick comment on their purpose.\n\nThen you will output the content of each file including ALL code.\nEach file must strictly follow a markdown code block format, where the following tokens must be replaced such that\nFILENAME is the lowercase file name including the file extension,\nLANG is the markup code block language for the code<span class="entity"><span>&</span>#39;</span>s language, and CODE is the code:\n\nFILENAME\n```LANG\nCODE\n```\n\nYou will start with the \<span class="entity"><span>&</span>#34;</span>entrypoint\<span class="entity"><span>&</span>#34;</span> file, then go to the ones that are imported by that file, and so on.\nPlease note that the code should be fully functional. No placeholders.\n\nFollow a language and framework appropriate best practice file naming convention.\nMake sure that files contain all imports, types etc. Make sure that code in different files are compatible with each other.\nEnsure to implement all code, if you are unsure, write a plausible implementation.\nInclude module dependency or package manager dependency definition file.\nBefore you finish, double check that all parts of the architecture is present in the files.\n\nUseful to know:\nYou almost always put different classes in different files.\nFor Python, you always create an appropriate requirements.txt file.\nFor NodeJS, you always create an appropriate package.json file.\nYou always add a comment briefly describing the purpose of the function definition.\nYou try to add comments explaining very complex bits of logic.\nYou always follow the best practices for the requested languages in terms of describing the code written as a defined\npackage/project.\n\n\nPython toolbelt preferences:\n- pytest\n- dataclasses\n<span class="entity"><span>&</span>#34;</span>
<span id="line635"></span> },
<span id="line636"></span> # … same conversation as earlier, ended with <span class="entity"><span>&</span>#34;</span>Make your own assumptions and state them explicitly before starting<span class="entity"><span>&</span>#34;</span>.
<span id="line637"></span> {
<span id="line638"></span> <span class="entity"><span>&</span>#34;</span>role<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>assistant<span class="entity"><span>&</span>#34;</span>,
<span id="line639"></span> <span class="entity"><span>&</span>#34;</span>content<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>Assumptions:\n1. Model: The model will contain the game<span class="entity"><span>&</span>#39;</span>s data, such as level information, character states, and enemy positions.\n2. View: The view will handle the game<span class="entity"><span>&</span>#39;</span>s visuals, including rendering the game objects, backgrounds, and updating the display.\n3. Controller: The controller will manage user input, such as keyboard controls, and update the model accordingly.\n\nWith these assumptions in mind, is there any specific aspect of the keyboard control implementation you would like me to clarify?<span class="entity"><span>&</span>#34;</span>
<span id="line640"></span> },
<span id="line641"></span> {
<span id="line642"></span> <span class="entity"><span>&</span>#34;</span>role<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>user<span class="entity"><span>&</span>#34;</span>,
<span id="line643"></span> <span class="entity"><span>&</span>#34;</span>content<span class="entity"><span>&</span>#34;</span>: <span class="entity"><span>&</span>#34;</span>Please now remember the steps:\n\nThink step by step and reason yourself to the right decisions to make sure we get it right.\nFirst lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose.\n\nThen you will output the content of each file including ALL code.\nEach file must strictly follow a markdown code block format, where the following tokens must be replaced such that\nFILENAME is the lowercase file name including the file extension,\nLANG is the markup code block language for the code<span class="entity"><span>&</span>#39;</span>s language, and CODE is the code:\n\nFILENAME\n```LANG\nCODE\n```\n\nPlease note that the code should be fully functional. No placeholders.\n\nYou will start with the \<span class="entity"><span>&</span>#34;</span>entrypoint\<span class="entity"><span>&</span>#34;</span> file, then go to the ones that are imported by that file, and so on.\nFollow a language and framework appropriate best practice file naming convention.\nMake sure that files contain all imports, types etc. The code should be fully functional. Make sure that code in different files are compatible with each other.\nBefore you finish, double check that all parts of the architecture is present in the files.\n<span class="entity"><span>&</span>#34;</span>
<span id="line644"></span> }
<span id="line645"></span>]
<span id="line646"></span></span><span></<span class="end-tag">code</span>></span><span></span><span></<span class="end-tag">pre</span>></span><span></span><span><<span class="start-tag">h1</span> <span class="attribute-name">id</span>="<a class="attribute-value">challenges</a>"></span><span>Challenges</span><span><<span class="start-tag">a</span> <span class="attribute-name">hidden</span> <span class="attribute-name">class</span>="<a class="attribute-value">anchor</a>" <span class="attribute-name">aria-hidden</span>="<a class="attribute-value">true</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#challenges">#challenges</a>"></span><span>#</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">h1</span>></span><span>
<span id="line647"></span></span><span><<span class="start-tag">p</span>></span><span>After going through key ideas and demos of building LLM-centered agents, I start to see a couple common limitations:</span><span></<span class="end-tag">p</span>></span><span>
<span id="line648"></span></span><span><<span class="start-tag">ul</span>></span><span>
<span id="line649"></span></span><span><<span class="start-tag">li</span>></span><span>
<span id="line650"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>Finite context length</span><span></<span class="end-tag">strong</span>></span><span>: The restricted context capacity limits the inclusion of historical information, detailed instructions, API call context, and responses. The design of the system has to work with this limited communication bandwidth, while mechanisms like self-reflection to learn from past mistakes would benefit a lot from long or infinite context windows. Although vector stores and retrieval can provide access to a larger knowledge pool, their representation power is not as powerful as full attention.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line651"></span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line652"></span></span><span><<span class="start-tag">li</span>></span><span>
<span id="line653"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>Challenges in long-term planning and task decomposition</span><span></<span class="end-tag">strong</span>></span><span>: Planning over a lengthy history and effectively exploring the solution space remain challenging. LLMs struggle to adjust plans when faced with unexpected errors, making them less robust compared to humans who learn from trial and error.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line654"></span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line655"></span></span><span><<span class="start-tag">li</span>></span><span>
<span id="line656"></span></span><span><<span class="start-tag">p</span>></span><span></span><span><<span class="start-tag">strong</span>></span><span>Reliability of natural language interface</span><span></<span class="end-tag">strong</span>></span><span>: Current agent system relies on natural language as an interface between LLMs and external components such as memory and tools. However, the reliability of model outputs is questionable, as LLMs may make formatting errors and occasionally exhibit rebellious behavior (e.g. refuse to follow an instruction). Consequently, much of the agent demo code focuses on parsing model output.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line657"></span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line658"></span></span><span></<span class="end-tag">ul</span>></span><span>
<span id="line659"></span></span><span><<span class="start-tag">h1</span> <span class="attribute-name">id</span>="<a class="attribute-value">citation</a>"></span><span>Citation</span><span><<span class="start-tag">a</span> <span class="attribute-name">hidden</span> <span class="attribute-name">class</span>="<a class="attribute-value">anchor</a>" <span class="attribute-name">aria-hidden</span>="<a class="attribute-value">true</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#citation">#citation</a>"></span><span>#</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">h1</span>></span><span>
<span id="line660"></span></span><span><<span class="start-tag">p</span>></span><span>Cited as:</span><span></<span class="end-tag">p</span>></span><span>
<span id="line661"></span></span><span><<span class="start-tag">blockquote</span>></span><span>
<span id="line662"></span></span><span><<span class="start-tag">p</span>></span><span>Weng, Lilian. (Jun 2023). LLM-powered Autonomous Agents<span class="entity"><span>&</span>quot;</span>. Lil<span class="entity"><span>&</span>rsquo;</span>Log. https://lilianweng.github.io/posts/2023-06-23-agent/.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line663"></span></span><span></<span class="end-tag">blockquote</span>></span><span>
<span id="line664"></span></span><span><<span class="start-tag">p</span>></span><span>Or</span><span></<span class="end-tag">p</span>></span><span>
<span id="line665"></span></span><span><<span class="start-tag">pre</span> <span class="attribute-name">tabindex</span>="<a class="attribute-value">0</a>"></span><span></span><span><<span class="start-tag">code</span>></span><span>@article{weng2023prompt,
<span id="line666"></span> title = <span class="entity"><span>&</span>#34;</span>LLM-powered Autonomous Agents<span class="entity"><span>&</span>#34;</span><span class="entity"><span>&</span>#34;</span>,
<span id="line667"></span> author = <span class="entity"><span>&</span>#34;</span>Weng, Lilian<span class="entity"><span>&</span>#34;</span>,
<span id="line668"></span> journal = <span class="entity"><span>&</span>#34;</span>lilianweng.github.io<span class="entity"><span>&</span>#34;</span>,
<span id="line669"></span> year = <span class="entity"><span>&</span>#34;</span>2023<span class="entity"><span>&</span>#34;</span>,
<span id="line670"></span> month = <span class="entity"><span>&</span>#34;</span>Jun<span class="entity"><span>&</span>#34;</span>,
<span id="line671"></span> url = <span class="entity"><span>&</span>#34;</span>https://lilianweng.github.io/posts/2023-06-23-agent/<span class="entity"><span>&</span>#34;</span>
<span id="line672"></span>}
<span id="line673"></span></span><span></<span class="end-tag">code</span>></span><span></span><span></<span class="end-tag">pre</span>></span><span></span><span><<span class="start-tag">h1</span> <span class="attribute-name">id</span>="<a class="attribute-value">references</a>"></span><span>References</span><span><<span class="start-tag">a</span> <span class="attribute-name">hidden</span> <span class="attribute-name">class</span>="<a class="attribute-value">anchor</a>" <span class="attribute-name">aria-hidden</span>="<a class="attribute-value">true</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#references">#references</a>"></span><span>#</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">h1</span>></span><span>
<span id="line674"></span></span><span><<span class="start-tag">p</span>></span><span>[1] Wei et al. </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2201.11903">https://arxiv.org/abs/2201.11903</a>"></span><span><span class="entity"><span>&</span>ldquo;</span>Chain of thought prompting elicits reasoning in large language models.<span class="entity"><span>&</span>rdquo;</span></span><span></<span class="end-tag">a</span>></span><span> NeurIPS 2022</span><span></<span class="end-tag">p</span>></span><span>
<span id="line675"></span></span><span><<span class="start-tag">p</span>></span><span>[2] Yao et al. </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2305.10601">https://arxiv.org/abs/2305.10601</a>"></span><span><span class="entity"><span>&</span>ldquo;</span>Tree of Thoughts: Dliberate Problem Solving with Large Language Models.<span class="entity"><span>&</span>rdquo;</span></span><span></<span class="end-tag">a</span>></span><span> arXiv preprint arXiv:2305.10601 (2023).</span><span></<span class="end-tag">p</span>></span><span>
<span id="line676"></span></span><span><<span class="start-tag">p</span>></span><span>[3] Liu et al. </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2302.02676">https://arxiv.org/abs/2302.02676</a>"></span><span><span class="entity"><span>&</span>ldquo;</span>Chain of Hindsight Aligns Language Models with Feedback
<span id="line677"></span><span class="entity"><span>&</span>ldquo;</span></span><span></<span class="end-tag">a</span>></span><span> arXiv preprint arXiv:2302.02676 (2023).</span><span></<span class="end-tag">p</span>></span><span>
<span id="line678"></span></span><span><<span class="start-tag">p</span>></span><span>[4] Liu et al. </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2304.11477">https://arxiv.org/abs/2304.11477</a>"></span><span><span class="entity"><span>&</span>ldquo;</span>LLM+P: Empowering Large Language Models with Optimal Planning Proficiency<span class="entity"><span>&</span>rdquo;</span></span><span></<span class="end-tag">a</span>></span><span> arXiv preprint arXiv:2304.11477 (2023).</span><span></<span class="end-tag">p</span>></span><span>
<span id="line679"></span></span><span><<span class="start-tag">p</span>></span><span>[5] Yao et al. </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2210.03629">https://arxiv.org/abs/2210.03629</a>"></span><span><span class="entity"><span>&</span>ldquo;</span>ReAct: Synergizing reasoning and acting in language models.<span class="entity"><span>&</span>rdquo;</span></span><span></<span class="end-tag">a</span>></span><span> ICLR 2023.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line680"></span></span><span><<span class="start-tag">p</span>></span><span>[6] Google Blog. </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://ai.googleblog.com/2020/07/announcing-scann-efficient-vector.html">https://ai.googleblog.com/2020/07/announcing-scann-efficient-vector.html</a>"></span><span><span class="entity"><span>&</span>ldquo;</span>Announcing ScaNN: Efficient Vector Similarity Search<span class="entity"><span>&</span>rdquo;</span></span><span></<span class="end-tag">a</span>></span><span> July 28, 2020.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line681"></span></span><span><<span class="start-tag">p</span>></span><span>[7] </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://chat.openai.com/share/46ff149e-a4c7-4dd7-a800-fc4a642ea389">https://chat.openai.com/share/46ff149e-a4c7-4dd7-a800-fc4a642ea389</a>"></span><span>https://chat.openai.com/share/46ff149e-a4c7-4dd7-a800-fc4a642ea389</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">p</span>></span><span>
<span id="line682"></span></span><span><<span class="start-tag">p</span>></span><span>[8] Shinn <span class="entity"><span>&</span>amp;</span> Labash. </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2303.11366">https://arxiv.org/abs/2303.11366</a>"></span><span><span class="entity"><span>&</span>ldquo;</span>Reflexion: an autonomous agent with dynamic memory and self-reflection<span class="entity"><span>&</span>rdquo;</span></span><span></<span class="end-tag">a</span>></span><span> arXiv preprint arXiv:2303.11366 (2023).</span><span></<span class="end-tag">p</span>></span><span>
<span id="line683"></span></span><span><<span class="start-tag">p</span>></span><span>[9] Laskin et al. </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2210.14215">https://arxiv.org/abs/2210.14215</a>"></span><span><span class="entity"><span>&</span>ldquo;</span>In-context Reinforcement Learning with Algorithm Distillation<span class="entity"><span>&</span>rdquo;</span></span><span></<span class="end-tag">a</span>></span><span> ICLR 2023.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line684"></span></span><span><<span class="start-tag">p</span>></span><span>[10] Karpas et al. </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2205.00445">https://arxiv.org/abs/2205.00445</a>"></span><span><span class="entity"><span>&</span>ldquo;</span>MRKL Systems A modular, neuro-symbolic architecture that combines large language models, external knowledge sources and discrete reasoning.<span class="entity"><span>&</span>rdquo;</span></span><span></<span class="end-tag">a</span>></span><span> arXiv preprint arXiv:2205.00445 (2022).</span><span></<span class="end-tag">p</span>></span><span>
<span id="line685"></span></span><span><<span class="start-tag">p</span>></span><span>[11] Weaviate Blog. </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://weaviate.io/blog/why-is-vector-search-so-fast">https://weaviate.io/blog/why-is-vector-search-so-fast</a>"></span><span>Why is Vector Search so fast?</span><span></<span class="end-tag">a</span>></span><span> Sep 13, 2022.</span><span></<span class="end-tag">p</span>></span><span>
<span id="line686"></span></span><span><<span class="start-tag">p</span>></span><span>[12] Li et al. </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2304.08244">https://arxiv.org/abs/2304.08244</a>"></span><span><span class="entity"><span>&</span>ldquo;</span>API-Bank: A Benchmark for Tool-Augmented LLMs<span class="entity"><span>&</span>rdquo;</span></span><span></<span class="end-tag">a</span>></span><span> arXiv preprint arXiv:2304.08244 (2023).</span><span></<span class="end-tag">p</span>></span><span>
<span id="line687"></span></span><span><<span class="start-tag">p</span>></span><span>[13] Shen et al. </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2303.17580">https://arxiv.org/abs/2303.17580</a>"></span><span><span class="entity"><span>&</span>ldquo;</span>HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in HuggingFace<span class="entity"><span>&</span>rdquo;</span></span><span></<span class="end-tag">a</span>></span><span> arXiv preprint arXiv:2303.17580 (2023).</span><span></<span class="end-tag">p</span>></span><span>
<span id="line688"></span></span><span><<span class="start-tag">p</span>></span><span>[14] Bran et al. </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2304.05376">https://arxiv.org/abs/2304.05376</a>"></span><span><span class="entity"><span>&</span>ldquo;</span>ChemCrow: Augmenting large-language models with chemistry tools.<span class="entity"><span>&</span>rdquo;</span></span><span></<span class="end-tag">a</span>></span><span> arXiv preprint arXiv:2304.05376 (2023).</span><span></<span class="end-tag">p</span>></span><span>
<span id="line689"></span></span><span><<span class="start-tag">p</span>></span><span>[15] Boiko et al. </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2304.05332">https://arxiv.org/abs/2304.05332</a>"></span><span><span class="entity"><span>&</span>ldquo;</span>Emergent autonomous scientific research capabilities of large language models.<span class="entity"><span>&</span>rdquo;</span></span><span></<span class="end-tag">a</span>></span><span> arXiv preprint arXiv:2304.05332 (2023).</span><span></<span class="end-tag">p</span>></span><span>
<span id="line690"></span></span><span><<span class="start-tag">p</span>></span><span>[16] Joon Sung Park, et al. </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://arxiv.org/abs/2304.03442">https://arxiv.org/abs/2304.03442</a>"></span><span><span class="entity"><span>&</span>ldquo;</span>Generative Agents: Interactive Simulacra of Human Behavior.<span class="entity"><span>&</span>rdquo;</span></span><span></<span class="end-tag">a</span>></span><span> arXiv preprint arXiv:2304.03442 (2023).</span><span></<span class="end-tag">p</span>></span><span>
<span id="line691"></span></span><span><<span class="start-tag">p</span>></span><span>[17] AutoGPT. </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://github.com/Significant-Gravitas/Auto-GPT">https://github.com/Significant-Gravitas/Auto-GPT</a>"></span><span>https://github.com/Significant-Gravitas/Auto-GPT</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">p</span>></span><span>
<span id="line692"></span></span><span><<span class="start-tag">p</span>></span><span>[18] GPT-Engineer. </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://github.com/AntonOsika/gpt-engineer">https://github.com/AntonOsika/gpt-engineer</a>"></span><span>https://github.com/AntonOsika/gpt-engineer</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">p</span>></span><span>
<span id="line693"></span>
<span id="line694"></span>
<span id="line695"></span> </span><span></<span class="end-tag">div</span>></span><span>
<span id="line696"></span>
<span id="line697"></span> </span><span><<span class="start-tag">footer</span> <span class="attribute-name">class</span>="<a class="attribute-value">post-footer</a>"></span><span>
<span id="line698"></span> </span><span><<span class="start-tag">ul</span> <span class="attribute-name">class</span>="<a class="attribute-value">post-tags</a>"></span><span>
<span id="line699"></span> </span><span><<span class="start-tag">li</span>></span><span></span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/tags/nlp/">https://lilianweng.github.io/tags/nlp/</a>"></span><span>nlp</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line700"></span> </span><span><<span class="start-tag">li</span>></span><span></span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/tags/language-model/">https://lilianweng.github.io/tags/language-model/</a>"></span><span>language-model</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line701"></span> </span><span><<span class="start-tag">li</span>></span><span></span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/tags/agent/">https://lilianweng.github.io/tags/agent/</a>"></span><span>agent</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line702"></span> </span><span><<span class="start-tag">li</span>></span><span></span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/tags/steerability/">https://lilianweng.github.io/tags/steerability/</a>"></span><span>steerability</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line703"></span> </span><span><<span class="start-tag">li</span>></span><span></span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/tags/prompting/">https://lilianweng.github.io/tags/prompting/</a>"></span><span>prompting</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">li</span>></span><span>
<span id="line704"></span> </span><span></<span class="end-tag">ul</span>></span><span>
<span id="line705"></span></span><span><<span class="start-tag">nav</span> <span class="attribute-name">class</span>="<a class="attribute-value">paginav</a>"></span><span>
<span id="line706"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">class</span>="<a class="attribute-value">prev</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/">https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/</a>"></span><span>
<span id="line707"></span> </span><span><<span class="start-tag">span</span> <span class="attribute-name">class</span>="<a class="attribute-value">title</a>"></span><span>« </span><span></<span class="end-tag">span</span>></span><span>
<span id="line708"></span> </span><span><<span class="start-tag">br</span>></span><span>
<span id="line709"></span> </span><span><<span class="start-tag">span</span>></span><span>Adversarial Attacks on LLMs</span><span></<span class="end-tag">span</span>></span><span>
<span id="line710"></span> </span><span></<span class="end-tag">a</span>></span><span>
<span id="line711"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">class</span>="<a class="attribute-value">next</a>" <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/">https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/</a>"></span><span>
<span id="line712"></span> </span><span><<span class="start-tag">span</span> <span class="attribute-name">class</span>="<a class="attribute-value">title</a>"></span><span> »</span><span></<span class="end-tag">span</span>></span><span>
<span id="line713"></span> </span><span><<span class="start-tag">br</span>></span><span>
<span id="line714"></span> </span><span><<span class="start-tag">span</span>></span><span>Prompt Engineering</span><span></<span class="end-tag">span</span>></span><span>
<span id="line715"></span> </span><span></<span class="end-tag">a</span>></span><span>
<span id="line716"></span></span><span></<span class="end-tag">nav</span>></span><span>
<span id="line717"></span>
<span id="line718"></span>
<span id="line719"></span></span><span><<span class="start-tag">div</span> <span class="attribute-name">class</span>="<a class="attribute-value">share-buttons</a>"></span><span>
<span id="line720"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>" <span class="attribute-name">rel</span>="<a class="attribute-value">noopener noreferrer</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">share LLM Powered Autonomous Agents on twitter</a>"
<span id="line721"></span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://twitter.com/intent/tweet/?text=LLM%20Powered%20Autonomous%20Agents&url=https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f&hashtags=nlp%2clanguage-model%2cagent%2csteerability%2cprompting">https://twitter.com/intent/tweet/?text=LLM%20Powered%20Autonomous%20Agents<span class="entity"><span>&</span>amp;</span>url=https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f<span class="entity"><span>&</span>amp;</span>hashtags=nlp%2clanguage-model%2cagent%2csteerability%2cprompting</a>"></span><span>
<span id="line722"></span> </span><span><<span class="start-tag">svg</span> <span class="attribute-name">version</span>="<a class="attribute-value">1.1</a>" <span class="attribute-name">viewBox</span>="<a class="attribute-value">0 0 512 512</a>" <span class="attribute-name">xml:space</span>="<a class="attribute-value">preserve</a>"></span><span>
<span id="line723"></span> </span><span><<span class="start-tag">path</span>
<span id="line724"></span> <span class="attribute-name">d</span>="<a class="attribute-value">M449.446,0c34.525,0 62.554,28.03 62.554,62.554l0,386.892c0,34.524 -28.03,62.554 -62.554,62.554l-386.892,0c-34.524,0 -62.554,-28.03 -62.554,-62.554l0,-386.892c0,-34.524 28.029,-62.554 62.554,-62.554l386.892,0Zm-253.927,424.544c135.939,0 210.268,-112.643 210.268,-210.268c0,-3.218 0,-6.437 -0.153,-9.502c14.406,-10.421 26.973,-23.448 36.935,-38.314c-13.18,5.824 -27.433,9.809 -42.452,11.648c15.326,-9.196 26.973,-23.602 32.49,-40.92c-14.252,8.429 -30.038,14.56 -46.896,17.931c-13.487,-14.406 -32.644,-23.295 -53.946,-23.295c-40.767,0 -73.87,33.104 -73.87,73.87c0,5.824 0.613,11.494 1.992,16.858c-61.456,-3.065 -115.862,-32.49 -152.337,-77.241c-6.284,10.881 -9.962,23.601 -9.962,37.088c0,25.594 13.027,48.276 32.95,61.456c-12.107,-0.307 -23.448,-3.678 -33.41,-9.196l0,0.92c0,35.862 25.441,65.594 59.311,72.49c-6.13,1.686 -12.72,2.606 -19.464,2.606c-4.751,0 -9.348,-0.46 -13.946,-1.38c9.349,29.426 36.628,50.728 68.965,51.341c-25.287,19.771 -57.164,31.571 -91.8,31.571c-5.977,0 -11.801,-0.306 -17.625,-1.073c32.337,21.15 71.264,33.41 112.95,33.41Z</a>" <span>/</span>></span><span>
<span id="line725"></span> </span><span></<span class="end-tag">svg</span>></span><span>
<span id="line726"></span> </span><span></<span class="end-tag">a</span>></span><span>
<span id="line727"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>" <span class="attribute-name">rel</span>="<a class="attribute-value">noopener noreferrer</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">share LLM Powered Autonomous Agents on linkedin</a>"
<span id="line728"></span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://www.linkedin.com/shareArticle?mini=true&url=https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f&title=LLM%20Powered%20Autonomous%20Agents&summary=LLM%20Powered%20Autonomous%20Agents&source=https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f">https://www.linkedin.com/shareArticle?mini=true<span class="entity"><span>&</span>amp;</span>url=https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f<span class="entity"><span>&</span>amp;</span>title=LLM%20Powered%20Autonomous%20Agents<span class="entity"><span>&</span>amp;</span>summary=LLM%20Powered%20Autonomous%20Agents<span class="entity"><span>&</span>amp;</span>source=https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f</a>"></span><span>
<span id="line729"></span> </span><span><<span class="start-tag">svg</span> <span class="attribute-name">version</span>="<a class="attribute-value">1.1</a>" <span class="attribute-name">viewBox</span>="<a class="attribute-value">0 0 512 512</a>" <span class="attribute-name">xml:space</span>="<a class="attribute-value">preserve</a>"></span><span>
<span id="line730"></span> </span><span><<span class="start-tag">path</span>
<span id="line731"></span> <span class="attribute-name">d</span>="<a class="attribute-value">M449.446,0c34.525,0 62.554,28.03 62.554,62.554l0,386.892c0,34.524 -28.03,62.554 -62.554,62.554l-386.892,0c-34.524,0 -62.554,-28.03 -62.554,-62.554l0,-386.892c0,-34.524 28.029,-62.554 62.554,-62.554l386.892,0Zm-288.985,423.278l0,-225.717l-75.04,0l0,225.717l75.04,0Zm270.539,0l0,-129.439c0,-69.333 -37.018,-101.586 -86.381,-101.586c-39.804,0 -57.634,21.891 -67.617,37.266l0,-31.958l-75.021,0c0.995,21.181 0,225.717 0,225.717l75.02,0l0,-126.056c0,-6.748 0.486,-13.492 2.474,-18.315c5.414,-13.475 17.767,-27.434 38.494,-27.434c27.135,0 38.007,20.707 38.007,51.037l0,120.768l75.024,0Zm-307.552,-334.556c-25.674,0 -42.448,16.879 -42.448,39.002c0,21.658 16.264,39.002 41.455,39.002l0.484,0c26.165,0 42.452,-17.344 42.452,-39.002c-0.485,-22.092 -16.241,-38.954 -41.943,-39.002Z</a>" <span>/</span>></span><span>
<span id="line732"></span> </span><span></<span class="end-tag">svg</span>></span><span>
<span id="line733"></span> </span><span></<span class="end-tag">a</span>></span><span>
<span id="line734"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>" <span class="attribute-name">rel</span>="<a class="attribute-value">noopener noreferrer</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">share LLM Powered Autonomous Agents on reddit</a>"
<span id="line735"></span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://reddit.com/submit?url=https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f&title=LLM%20Powered%20Autonomous%20Agents">https://reddit.com/submit?url=https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f<span><span>&</span>ti</span>tle=LLM%20Powered%20Autonomous%20Agents</a>"></span><span>
<span id="line736"></span> </span><span><<span class="start-tag">svg</span> <span class="attribute-name">version</span>="<a class="attribute-value">1.1</a>" <span class="attribute-name">viewBox</span>="<a class="attribute-value">0 0 512 512</a>" <span class="attribute-name">xml:space</span>="<a class="attribute-value">preserve</a>"></span><span>
<span id="line737"></span> </span><span><<span class="start-tag">path</span>
<span id="line738"></span> <span class="attribute-name">d</span>="<a class="attribute-value">M449.446,0c34.525,0 62.554,28.03 62.554,62.554l0,386.892c0,34.524 -28.03,62.554 -62.554,62.554l-386.892,0c-34.524,0 -62.554,-28.03 -62.554,-62.554l0,-386.892c0,-34.524 28.029,-62.554 62.554,-62.554l386.892,0Zm-3.446,265.638c0,-22.964 -18.616,-41.58 -41.58,-41.58c-11.211,0 -21.361,4.457 -28.841,11.666c-28.424,-20.508 -67.586,-33.757 -111.204,-35.278l18.941,-89.121l61.884,13.157c0.756,15.734 13.642,28.29 29.56,28.29c16.407,0 29.706,-13.299 29.706,-29.701c0,-16.403 -13.299,-29.702 -29.706,-29.702c-11.666,0 -21.657,6.792 -26.515,16.578l-69.105,-14.69c-1.922,-0.418 -3.939,-0.042 -5.585,1.036c-1.658,1.073 -2.811,2.761 -3.224,4.686l-21.152,99.438c-44.258,1.228 -84.046,14.494 -112.837,35.232c-7.468,-7.164 -17.589,-11.591 -28.757,-11.591c-22.965,0 -41.585,18.616 -41.585,41.58c0,16.896 10.095,31.41 24.568,37.918c-0.639,4.135 -0.99,8.328 -0.99,12.576c0,63.977 74.469,115.836 166.33,115.836c91.861,0 166.334,-51.859 166.334,-115.836c0,-4.218 -0.347,-8.387 -0.977,-12.493c14.564,-6.47 24.735,-21.034 24.735,-38.001Zm-119.474,108.193c-20.27,20.241 -59.115,21.816 -70.534,21.816c-11.428,0 -50.277,-1.575 -70.522,-21.82c-3.007,-3.008 -3.007,-7.882 0,-10.889c3.003,-2.999 7.882,-3.003 10.885,0c12.777,12.781 40.11,17.317 59.637,17.317c19.522,0 46.86,-4.536 59.657,-17.321c3.016,-2.999 7.886,-2.995 10.885,0.008c3.008,3.011 3.003,7.882 -0.008,10.889Zm-5.23,-48.781c-16.373,0 -29.701,-13.324 -29.701,-29.698c0,-16.381 13.328,-29.714 29.701,-29.714c16.378,0 29.706,13.333 29.706,29.714c0,16.374 -13.328,29.698 -29.706,29.698Zm-160.386,-29.702c0,-16.381 13.328,-29.71 29.714,-29.71c16.369,0 29.689,13.329 29.689,29.71c0,16.373 -13.32,29.693 -29.689,29.693c-16.386,0 -29.714,-13.32 -29.714,-29.693Z</a>" <span>/</span>></span><span>
<span id="line739"></span> </span><span></<span class="end-tag">svg</span>></span><span>
<span id="line740"></span> </span><span></<span class="end-tag">a</span>></span><span>
<span id="line741"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>" <span class="attribute-name">rel</span>="<a class="attribute-value">noopener noreferrer</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">share LLM Powered Autonomous Agents on facebook</a>"
<span id="line742"></span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://facebook.com/sharer/sharer.php?u=https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f">https://facebook.com/sharer/sharer.php?u=https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f</a>"></span><span>
<span id="line743"></span> </span><span><<span class="start-tag">svg</span> <span class="attribute-name">version</span>="<a class="attribute-value">1.1</a>" <span class="attribute-name">viewBox</span>="<a class="attribute-value">0 0 512 512</a>" <span class="attribute-name">xml:space</span>="<a class="attribute-value">preserve</a>"></span><span>
<span id="line744"></span> </span><span><<span class="start-tag">path</span>
<span id="line745"></span> <span class="attribute-name">d</span>="<a class="attribute-value">M449.446,0c34.525,0 62.554,28.03 62.554,62.554l0,386.892c0,34.524 -28.03,62.554 -62.554,62.554l-106.468,0l0,-192.915l66.6,0l12.672,-82.621l-79.272,0l0,-53.617c0,-22.603 11.073,-44.636 46.58,-44.636l36.042,0l0,-70.34c0,0 -32.71,-5.582 -63.982,-5.582c-65.288,0 -107.96,39.569 -107.96,111.204l0,62.971l-72.573,0l0,82.621l72.573,0l0,192.915l-191.104,0c-34.524,0 -62.554,-28.03 -62.554,-62.554l0,-386.892c0,-34.524 28.029,-62.554 62.554,-62.554l386.892,0Z</a>" <span>/</span>></span><span>
<span id="line746"></span> </span><span></<span class="end-tag">svg</span>></span><span>
<span id="line747"></span> </span><span></<span class="end-tag">a</span>></span><span>
<span id="line748"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>" <span class="attribute-name">rel</span>="<a class="attribute-value">noopener noreferrer</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">share LLM Powered Autonomous Agents on whatsapp</a>"
<span id="line749"></span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://api.whatsapp.com/send?text=LLM%20Powered%20Autonomous%20Agents%20-%20https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f">https://api.whatsapp.com/send?text=LLM%20Powered%20Autonomous%20Agents%20-%20https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f</a>"></span><span>
<span id="line750"></span> </span><span><<span class="start-tag">svg</span> <span class="attribute-name">version</span>="<a class="attribute-value">1.1</a>" <span class="attribute-name">viewBox</span>="<a class="attribute-value">0 0 512 512</a>" <span class="attribute-name">xml:space</span>="<a class="attribute-value">preserve</a>"></span><span>
<span id="line751"></span> </span><span><<span class="start-tag">path</span>
<span id="line752"></span> <span class="attribute-name">d</span>="<a class="attribute-value">M449.446,0c34.525,0 62.554,28.03 62.554,62.554l0,386.892c0,34.524 -28.03,62.554 -62.554,62.554l-386.892,0c-34.524,0 -62.554,-28.03 -62.554,-62.554l0,-386.892c0,-34.524 28.029,-62.554 62.554,-62.554l386.892,0Zm-58.673,127.703c-33.842,-33.881 -78.847,-52.548 -126.798,-52.568c-98.799,0 -179.21,80.405 -179.249,179.234c-0.013,31.593 8.241,62.428 23.927,89.612l-25.429,92.884l95.021,-24.925c26.181,14.28 55.659,21.807 85.658,21.816l0.074,0c98.789,0 179.206,-80.413 179.247,-179.243c0.018,-47.895 -18.61,-92.93 -52.451,-126.81Zm-126.797,275.782l-0.06,0c-26.734,-0.01 -52.954,-7.193 -75.828,-20.767l-5.441,-3.229l-56.386,14.792l15.05,-54.977l-3.542,-5.637c-14.913,-23.72 -22.791,-51.136 -22.779,-79.287c0.033,-82.142 66.867,-148.971 149.046,-148.971c39.793,0.014 77.199,15.531 105.329,43.692c28.128,28.16 43.609,65.592 43.594,105.4c-0.034,82.149 -66.866,148.983 -148.983,148.984Zm81.721,-111.581c-4.479,-2.242 -26.499,-13.075 -30.604,-14.571c-4.105,-1.495 -7.091,-2.241 -10.077,2.241c-2.986,4.483 -11.569,14.572 -14.182,17.562c-2.612,2.988 -5.225,3.364 -9.703,1.12c-4.479,-2.241 -18.91,-6.97 -36.017,-22.23c-13.314,-11.876 -22.304,-26.542 -24.916,-31.026c-2.612,-4.484 -0.279,-6.908 1.963,-9.14c2.016,-2.007 4.48,-5.232 6.719,-7.847c2.24,-2.615 2.986,-4.484 4.479,-7.472c1.493,-2.99 0.747,-5.604 -0.374,-7.846c-1.119,-2.241 -10.077,-24.288 -13.809,-33.256c-3.635,-8.733 -7.327,-7.55 -10.077,-7.688c-2.609,-0.13 -5.598,-0.158 -8.583,-0.158c-2.986,0 -7.839,1.121 -11.944,5.604c-4.105,4.484 -15.675,15.32 -15.675,37.364c0,22.046 16.048,43.342 18.287,46.332c2.24,2.99 31.582,48.227 76.511,67.627c10.685,4.615 19.028,7.371 25.533,9.434c10.728,3.41 20.492,2.929 28.209,1.775c8.605,-1.285 26.499,-10.833 30.231,-21.295c3.732,-10.464 3.732,-19.431 2.612,-21.298c-1.119,-1.869 -4.105,-2.99 -8.583,-5.232Z</a>" <span>/</span>></span><span>
<span id="line753"></span> </span><span></<span class="end-tag">svg</span>></span><span>
<span id="line754"></span> </span><span></<span class="end-tag">a</span>></span><span>
<span id="line755"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>" <span class="attribute-name">rel</span>="<a class="attribute-value">noopener noreferrer</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">share LLM Powered Autonomous Agents on telegram</a>"
<span id="line756"></span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://telegram.me/share/url?text=LLM%20Powered%20Autonomous%20Agents&url=https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f">https://telegram.me/share/url?text=LLM%20Powered%20Autonomous%20Agents<span class="entity"><span>&</span>amp;</span>url=https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f</a>"></span><span>
<span id="line757"></span> </span><span><<span class="start-tag">svg</span> <span class="attribute-name">version</span>="<a class="attribute-value">1.1</a>" <span class="attribute-name">xml:space</span>="<a class="attribute-value">preserve</a>" <span class="attribute-name">viewBox</span>="<a class="attribute-value">2 2 28 28</a>"></span><span>
<span id="line758"></span> </span><span><<span class="start-tag">path</span>
<span id="line759"></span> <span class="attribute-name">d</span>="<a class="attribute-value">M26.49,29.86H5.5a3.37,3.37,0,0,1-2.47-1,3.35,3.35,0,0,1-1-2.47V5.48A3.36,3.36,0,0,1,3,3,3.37,3.37,0,0,1,5.5,2h21A3.38,3.38,0,0,1,29,3a3.36,3.36,0,0,1,1,2.46V26.37a3.35,3.35,0,0,1-1,2.47A3.38,3.38,0,0,1,26.49,29.86Zm-5.38-6.71a.79.79,0,0,0,.85-.66L24.73,9.24a.55.55,0,0,0-.18-.46.62.62,0,0,0-.41-.17q-.08,0-16.53,6.11a.59.59,0,0,0-.41.59.57.57,0,0,0,.43.52l4,1.24,1.61,4.83a.62.62,0,0,0,.63.43.56.56,0,0,0,.4-.17L16.54,20l4.09,3A.9.9,0,0,0,21.11,23.15ZM13.8,20.71l-1.21-4q8.72-5.55,8.78-5.55c.15,0,.23,0,.23.16a.18.18,0,0,1,0,.06s-2.51,2.3-7.52,6.8Z</a>" <span>/</span>></span><span>
<span id="line760"></span> </span><span></<span class="end-tag">svg</span>></span><span>
<span id="line761"></span> </span><span></<span class="end-tag">a</span>></span><span>
<span id="line762"></span></span><span></<span class="end-tag">div</span>></span><span>
<span id="line763"></span>
<span id="line764"></span> </span><span></<span class="end-tag">footer</span>></span><span>
<span id="line765"></span></span><span></<span class="end-tag">article</span>></span><span>
<span id="line766"></span> </span><span></<span class="end-tag">main</span>></span><span>
<span id="line767"></span>
<span id="line768"></span></span><span><<span class="start-tag">footer</span> <span class="attribute-name">class</span>="<a class="attribute-value">footer</a>"></span><span>
<span id="line769"></span> </span><span><<span class="start-tag">span</span>></span><span><span class="entity"><span>&</span>copy;</span> 2023 </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/">https://lilianweng.github.io/</a>"></span><span>Lil<span class="entity"><span>&</span>#39;</span>Log</span><span></<span class="end-tag">a</span>></span><span></span><span></<span class="end-tag">span</span>></span><span>
<span id="line770"></span> </span><span><<span class="start-tag">span</span>></span><span>
<span id="line771"></span> Powered by
<span id="line772"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://gohugo.io/">https://gohugo.io/</a>" <span class="attribute-name">rel</span>="<a class="attribute-value">noopener noreferrer</a>" <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>"></span><span>Hugo</span><span></<span class="end-tag">a</span>></span><span> <span><span>&</span></span>
<span id="line773"></span> </span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://git.io/hugopapermod">https://git.io/hugopapermod</a>" <span class="attribute-name">rel</span>="<a class="attribute-value">noopener</a>" <span class="attribute-name">target</span>="<a class="attribute-value">_blank</a>"></span><span>PaperMod</span><span></<span class="end-tag">a</span>></span><span>
<span id="line774"></span> </span><span></<span class="end-tag">span</span>></span><span>
<span id="line775"></span></span><span></<span class="end-tag">footer</span>></span><span>
<span id="line776"></span></span><span><<span class="start-tag">a</span> <span class="attribute-name">href</span>="<a class="attribute-value" href="view-source:https://lilianweng.github.io/posts/2023-06-23-agent/#top">#top</a>" <span class="attribute-name">aria-label</span>="<a class="attribute-value">go to top</a>" <span class="attribute-name">title</span>="<a class="attribute-value">Go to Top (Alt + G)</a>" <span class="attribute-name">class</span>="<a class="attribute-value">top-link</a>" <span class="attribute-name">id</span>="<a class="attribute-value">top-link</a>" <span class="attribute-name">accesskey</span>="<a class="attribute-value">g</a>"></span><span>
<span id="line777"></span> </span><span><<span class="start-tag">svg</span> <span class="attribute-name">xmlns</span>="<a class="attribute-value">http://www.w3.org/2000/svg</a>" <span class="attribute-name">viewBox</span>="<a class="attribute-value">0 0 12 6</a>" <span class="attribute-name">fill</span>="<a class="attribute-value">currentColor</a>"></span><span>
<span id="line778"></span> </span><span><<span class="start-tag">path</span> <span class="attribute-name">d</span>="<a class="attribute-value">M12 6H0l6-6z</a>" <span>/</span>></span><span>
<span id="line779"></span> </span><span></<span class="end-tag">svg</span>></span><span>
<span id="line780"></span></span><span></<span class="end-tag">a</span>></span><span>
<span id="line781"></span>
<span id="line782"></span></span><span><<span class="start-tag">script</span>></span><span>
<span id="line783"></span> let menu = document.getElementById('menu')
<span id="line784"></span> if (menu) {
<span id="line785"></span> menu.scrollLeft = localStorage.getItem("menu-scroll-position");
<span id="line786"></span> menu.onscroll = function () {
<span id="line787"></span> localStorage.setItem("menu-scroll-position", menu.scrollLeft);
<span id="line788"></span> }
<span id="line789"></span> }
<span id="line790"></span>
<span id="line791"></span> document.querySelectorAll('a[href^="#"]').forEach(anchor => {
<span id="line792"></span> anchor.addEventListener("click", function (e) {
<span id="line793"></span> e.preventDefault();
<span id="line794"></span> var id = this.getAttribute("href").substr(1);
<span id="line795"></span> if (!window.matchMedia('(prefers-reduced-motion: reduce)').matches) {
<span id="line796"></span> document.querySelector(`[id='${decodeURIComponent(id)}']`).scrollIntoView({
<span id="line797"></span> behavior: "smooth"
<span id="line798"></span> });
<span id="line799"></span> } else {
<span id="line800"></span> document.querySelector(`[id='${decodeURIComponent(id)}']`).scrollIntoView();
<span id="line801"></span> }
<span id="line802"></span> if (id === "top") {
<span id="line803"></span> history.replaceState(null, null, " ");
<span id="line804"></span> } else {
<span id="line805"></span> history.pushState(null, null, `#${id}`);
<span id="line806"></span> }
<span id="line807"></span> });
<span id="line808"></span> });
<span id="line809"></span>
<span id="line810"></span></span><span></<span class="end-tag">script</span>></span><span>
<span id="line811"></span></span><span><<span class="start-tag">script</span>></span><span>
<span id="line812"></span> var mybutton = document.getElementById("top-link");
<span id="line813"></span> window.onscroll = function () {
<span id="line814"></span> if (document.body.scrollTop > 800 || document.documentElement.scrollTop > 800) {
<span id="line815"></span> mybutton.style.visibility = "visible";
<span id="line816"></span> mybutton.style.opacity = "1";
<span id="line817"></span> } else {
<span id="line818"></span> mybutton.style.visibility = "hidden";
<span id="line819"></span> mybutton.style.opacity = "0";
<span id="line820"></span> }
<span id="line821"></span> };
<span id="line822"></span>
<span id="line823"></span></span><span></<span class="end-tag">script</span>></span><span>
<span id="line824"></span></span><span><<span class="start-tag">script</span>></span><span>
<span id="line825"></span> document.getElementById("theme-toggle").addEventListener("click", () => {
<span id="line826"></span> if (document.body.className.includes("dark")) {
<span id="line827"></span> document.body.classList.remove('dark');
<span id="line828"></span> localStorage.setItem("pref-theme", 'light');
<span id="line829"></span> } else {
<span id="line830"></span> document.body.classList.add('dark');
<span id="line831"></span> localStorage.setItem("pref-theme", 'dark');
<span id="line832"></span> }
<span id="line833"></span> })
<span id="line834"></span>
<span id="line835"></span></span><span></<span class="end-tag">script</span>></span><span>
<span id="line836"></span></span><span><<span class="start-tag">script</span>></span><span>
<span id="line837"></span> document.querySelectorAll('pre > code').forEach((codeblock) => {
<span id="line838"></span> const container = codeblock.parentNode.parentNode;
<span id="line839"></span>
<span id="line840"></span> const copybutton = document.createElement('button');
<span id="line841"></span> copybutton.classList.add('copy-code');
<span id="line842"></span> copybutton.innerText = 'copy';
<span id="line843"></span>
<span id="line844"></span> function copyingDone() {
<span id="line845"></span> copybutton.innerText = 'copied!';
<span id="line846"></span> setTimeout(() => {
<span id="line847"></span> copybutton.innerText = 'copy';
<span id="line848"></span> }, 2000);
<span id="line849"></span> }
<span id="line850"></span>
<span id="line851"></span> copybutton.addEventListener('click', (cb) => {
<span id="line852"></span> if ('clipboard' in navigator) {
<span id="line853"></span> navigator.clipboard.writeText(codeblock.textContent);
<span id="line854"></span> copyingDone();
<span id="line855"></span> return;
<span id="line856"></span> }
<span id="line857"></span>
<span id="line858"></span> const range = document.createRange();
<span id="line859"></span> range.selectNodeContents(codeblock);
<span id="line860"></span> const selection = window.getSelection();
<span id="line861"></span> selection.removeAllRanges();
<span id="line862"></span> selection.addRange(range);
<span id="line863"></span> try {
<span id="line864"></span> document.execCommand('copy');
<span id="line865"></span> copyingDone();
<span id="line866"></span> } catch (e) { };
<span id="line867"></span> selection.removeRange(range);
<span id="line868"></span> });
<span id="line869"></span>
<span id="line870"></span> if (container.classList.contains("highlight")) {
<span id="line871"></span> container.appendChild(copybutton);
<span id="line872"></span> } else if (container.parentNode.firstChild == container) {
<span id="line873"></span>
<span id="line874"></span> } else if (codeblock.parentNode.parentNode.parentNode.parentNode.parentNode.nodeName == "TABLE") {
<span id="line875"></span>
<span id="line876"></span> codeblock.parentNode.parentNode.parentNode.parentNode.parentNode.appendChild(copybutton);
<span id="line877"></span> } else {
<span id="line878"></span>
<span id="line879"></span> codeblock.parentNode.appendChild(copybutton);
<span id="line880"></span> }
<span id="line881"></span> });
<span id="line882"></span></span><span></<span class="end-tag">script</span>></span><span>
<span id="line883"></span></span><span></<span class="end-tag">body</span>></span><span>
<span id="line884"></span>
<span id="line885"></span></span><span></<span class="end-tag">html</span>></span><span>
<span id="line886"></span></span></pre></body></html>