diff --git a/datasets/create_chitchat_dataset.py b/datasets/create_chitchat_dataset.py
deleted file mode 100644
index d5ed426f..00000000
--- a/datasets/create_chitchat_dataset.py
+++ /dev/null
@@ -1,325 +0,0 @@
-import os
-import json
-import random
-import re
-
-from tqdm import tqdm
-
-
-_path = os.path.dirname(__file__)
-_samsum_train_path = os.path.join(_path, "data/samsum-train.json")
-_squad_train_path = os.path.join(_path, "data/squad2-train.json")
-_squad_filter_path = os.path.join(_path, "data/squad_items_about_people.json")
-_candidate_answers = [
-    "unknown",
-    "I don't know",
-    "I do not know",
-    "I have no information about this",
-]
-_unknown_fraction = 0.1
-_context_fraction = 0.2
-
-
-def get_speakers(dialogue):
-    speakers = set()
-    for line in dialogue.split("\n"):
-        name = line[: line.find(":")]
-        speakers.add(name)
-
-    return list(speakers)
-
-
-def select_random_pair_of_speakers(candidates):
-    random.shuffle(candidates)
-    return candidates[:2]
-
-
-def create_inset_from_unanswerable_question(squad_set, first_speaker, second_speaker):
-    data = squad_set["data"]
-    item = random.choice(data)
-    paragraph = random.choice(item["paragraphs"])
-    qas = random.choice(paragraph["qas"])
-    question = qas["question"]
-    answer = random.choice(_candidate_answers)
-    return f"{first_speaker}: {question}\n{second_speaker}: {answer}\n"
-
-
-def from_name_to_2nd_person(text, name):
-    text = re.sub(f"{name} doesn't", "you don't", text, flags=re.IGNORECASE)
-    text = re.sub(f"{name} does not", "you do not", text, flags=re.IGNORECASE)
-    text = re.sub(f"{name} does", "you do", text, flags=re.IGNORECASE)
-    text = re.sub(f"{name}'s", "your", text, flags=re.IGNORECASE)
-    text = re.sub(f"does {name}", "do you", text, flags=re.IGNORECASE)
-    text = re.sub(f"is {name}", "are you", text, flags=re.IGNORECASE)
-    text = re.sub(f"was {name}", "were you", text, flags=re.IGNORECASE)
-    text = re.sub(f"{name} is", "you are", text, flags=re.IGNORECASE)
-    text = re.sub(f"{name}", "you", text, flags=re.IGNORECASE)
-    return text
-
-
-def from_name_to_1st_person(text, name):
-    text = re.sub(f"{name} doesn't", "I don't", text, flags=re.IGNORECASE)
-    text = re.sub(f"{name} does not", "I do not", text, flags=re.IGNORECASE)
-    text = re.sub(f"{name} does", "I do", text, flags=re.IGNORECASE)
-    text = re.sub(f"{name}'s", "my", text, flags=re.IGNORECASE)
-    text = re.sub(f"does {name}", "do I", text, flags=re.IGNORECASE)
-    text = re.sub(f"is {name}", "am I", text, flags=re.IGNORECASE)
-    text = re.sub(f"was {name}", "was I", text, flags=re.IGNORECASE)
-    text = re.sub(f"{name} is", "I am", text, flags=re.IGNORECASE)
-    text = re.sub(f"to {name}", "to me", text, flags=re.IGNORECASE)
-    text = re.sub(f"{name}", "I", text, flags=re.IGNORECASE)
-    return text
-
-
-def from_2nd_person_to_name(text, name):
-    text = re.sub("you don't", f"{name} doesn't", text, flags=re.IGNORECASE)
-    text = re.sub("you do not", f"{name} does not", text, flags=re.IGNORECASE)
-    text = re.sub("you do", f"{name} does", text, flags=re.IGNORECASE)
-    text = re.sub("your", f"{name}'s", text, flags=re.IGNORECASE)
-    text = re.sub("do you", f"does {name}", text, flags=re.IGNORECASE)
-    text = re.sub("are you", f"is {name}", text, flags=re.IGNORECASE)
-    text = re.sub("were you", f"was {name}", text, flags=re.IGNORECASE)
-    text = re.sub("you are", f"{name} is", text, flags=re.IGNORECASE)
-    text = re.sub("you will", f"{name} will", text, flags=re.IGNORECASE)
-    text = re.sub("you'll", f"{name} will", text, flags=re.IGNORECASE)
-    text = re.sub(" you ", f" {name} ", text, flags=re.IGNORECASE)
-    text = re.sub(" you\.", f" {name}\.", text, flags=re.IGNORECASE)
-    text = re.sub(" you!", f" {name}!", text, flags=re.IGNORECASE)
-    text = re.sub(" you\?", f" {name}\?", text, flags=re.IGNORECASE)
-    return text
-
-
-def from_1st_person_to_name(text, name):
-    text = re.sub("I don't", f"{name} doesn't", text, flags=re.IGNORECASE)
-    text = re.sub("I do not", f"{name} does not", text, flags=re.IGNORECASE)
-    text = re.sub("I do", f"{name} does", text, flags=re.IGNORECASE)
-    text = re.sub("my ", f"{name}'s ", text, flags=re.IGNORECASE)
-    text = re.sub("do I", f"does {name}", text, flags=re.IGNORECASE)
-    text = re.sub("am I", f"is {name}", text, flags=re.IGNORECASE)
-    text = re.sub("was I", f"was {name}", text, flags=re.IGNORECASE)
-    text = re.sub("I am", f"{name} is", text, flags=re.IGNORECASE)
-    text = re.sub("to me", f"to {name}", text, flags=re.IGNORECASE)
-    text = re.sub("I will", f"{name} will", text, flags=re.IGNORECASE)
-    text = re.sub("I'll", f"{name} will", text, flags=re.IGNORECASE)
-    text = re.sub("I'm", f"{name} is", text)
-    text = re.sub("I ", f"{name} ", text)
-    text = re.sub(" I\?", f" {name}\?", text)
-    text = re.sub(" me ", f" {name} ", text, flags=re.IGNORECASE)
-    text = re.sub(" me\.", f" {name}\.", text, flags=re.IGNORECASE)
-    text = re.sub(" me!", f" {name}!", text, flags=re.IGNORECASE)
-    text = re.sub(" me\?", f" {name}\?", text, flags=re.IGNORECASE)
-    return text
-
-
-def replace_names(text, names, replace_function):
-    names = sorted(names, key=lambda x: -len(x))
-    for name in names:
-        if name in text:
-            return replace_function(text, name)
-
-    return text
-
-
-def create_inset_with_first_person_answer(
-    squad_set, squad_people_filter, first_speaker, second_speaker
-):
-    squad_item_number, names = random.sample(squad_people_filter.items(), 1)[0]
-    squad_item_number = int(squad_item_number)
-    names = names["names"]
-    question, answer = "", ""
-    while (
-        "you" not in question.lower()
-        and "your" not in question.lower()
-        and "I " not in answer
-        and "my" not in answer.lower()
-    ):
-        paragraph = random.choice(squad_set["data"][squad_item_number]["paragraphs"])
-        qas = random.choice(paragraph["qas"])
-        if not qas["answers"]:
-            continue
-
-        question = replace_names(qas["question"], names, from_name_to_2nd_person)
-        answer = replace_names(
-            random.choice(qas["answers"])["text"], names, from_name_to_1st_person
-        )
-
-    context = replace_names(
-        paragraph["context"], names, lambda x, y: x.replace(y, second_speaker)
-    )
-    return f"{first_speaker}: {question}\n{second_speaker}: {answer}\n", context
-
-
-def create_inset_with_first_person_query(
-    squad_set, squad_people_filter, first_speaker, second_speaker
-):
-    squad_item_number, names = random.sample(squad_people_filter.items(), 1)[0]
-    squad_item_number = int(squad_item_number)
-    names = names["names"]
-    question, answer = "", ""
-    while (
-        "I" not in question.lower()
-        and "my" not in question.lower()
-        and "you " not in answer.lower()
-        and "your " not in answer.lower()
-    ):
-        paragraph = random.choice(squad_set["data"][squad_item_number]["paragraphs"])
-        qas = random.choice(paragraph["qas"])
-        if not qas["answers"]:
-            continue
-
-        question = replace_names(qas["question"], names, from_name_to_1st_person)
-        answer = replace_names(
-            random.choice(qas["answers"])["text"], names, from_name_to_2nd_person
-        )
-
-    context = replace_names(
-        paragraph["context"], names, lambda x, y: x.replace(y, second_speaker)
-    )
-    return f"{first_speaker}: {question}\n{second_speaker}: {answer}\n", context
-
-
-def is_question(line):
-    return "?" in line
-
-
-def get_sequence_of_speakers(dialogue_lines):
-    return [line.split(":")[0] for line in dialogue_lines if ":" in line]
-
-
-def find_next_speaker(speaker_sequence, index, curr_speaker):
-    for speaker in speaker_sequence[index + 1 :]:
-        if curr_speaker != speaker:
-            return speaker
-
-    raise RuntimeWarning("No next speaker in conversation.")
-
-
-def find_prior_speaker(speaker_sequence, index, curr_speaker):
-    for speaker in speaker_sequence[:index][::-1]:
-        if curr_speaker != speaker:
-            return speaker
-
-    raise RuntimeError("No prior speaker in conversation.")
-
-
-def substitute_pronouns_with_speaker_names(dialogue_text):
-    dialogue_lines = [line for line in dialogue_text.split("\n") if line]
-    speaker_sequence = get_sequence_of_speakers(dialogue_lines)
-    new_lines = []
-    for index in range(len(dialogue_lines) - 1):
-        line = dialogue_lines[index]
-        curr_speaker = speaker_sequence[index]
-        if "remembers" in curr_speaker:
-            new_lines.append(line)
-            continue
-
-        new_line = from_1st_person_to_name(line, curr_speaker)
-        try:
-            next_speaker = find_next_speaker(speaker_sequence, index, curr_speaker)
-
-        except RuntimeWarning:
-            new_lines.append(new_line)
-            break
-
-        new_line = from_2nd_person_to_name(new_line, next_speaker)
-        new_lines.append(new_line)
-
-    new_line = from_1st_person_to_name(dialogue_lines[-1], speaker_sequence[-1])
-    try:
-        prior_speaker = find_prior_speaker(speaker_sequence, -1, speaker_sequence[-1])
-
-    except RuntimeWarning:
-        new_lines.append(new_line)
-        return "\n".join(new_lines)
-
-    new_line = from_2nd_person_to_name(new_line, prior_speaker)
-    new_lines.append(new_line)
-
-    return "\n".join(new_lines)
-
-
-if __name__ == "__main__":
-    samsum_train = json.load(open(_samsum_train_path))
-    squad_train = json.load(open(_squad_train_path))
-    squad_people_filter = json.load(open(_squad_filter_path))
-
-    new_train_set = []
-    for item in tqdm(samsum_train[:1000]):
-        new_item = {}
-        dialogue = item["dialogue"].replace("\r", "")
-        if not dialogue:
-            continue
-
-        speakers = get_speakers(dialogue)
-        first, second = select_random_pair_of_speakers(speakers)
-        inset = create_inset_from_unanswerable_question(squad_train, first, second)
-        first_person_answer, sp_context = create_inset_with_first_person_answer(
-            squad_train, squad_people_filter, first, second
-        )
-        first_person_query, fp_context = create_inset_with_first_person_query(
-            squad_train, squad_people_filter, first, second
-        )
-
-        new_dialogue = ""
-        num_lines = len(dialogue.split("\n"))
-        unknown_inserted_before = False
-        first_person_answer_inserted_before = False
-        first_person_query_inserted_before = False
-
-        for line in dialogue.split("\n"):
-            new_dialogue += line + "\n"
-            if line and is_question(line):
-                continue
-
-            threshold = _unknown_fraction / num_lines
-            context_threshold = _context_fraction / num_lines
-            if random.uniform(0, 1) < threshold and not unknown_inserted_before:
-                new_dialogue += inset
-                unknown_inserted_before = True
-
-            elif (
-                random.uniform(0, 1) < context_threshold
-                and not first_person_answer_inserted_before
-            ):
-                if random.choice([1, 0]):
-                    new_dialogue += f"{second} remembers: " + sp_context + "\n"
-                    first_person_answer = first_person_answer.replace(
-                        f"{second}:", f"{second}: [factual]"
-                    )
-
-                else:
-                    new_dialogue += f"{second}: " + sp_context + "\n"
-                    first_person_answer = first_person_answer.replace(
-                        f"{second}:", f"{second}: [answer in conversation]"
-                    )
-
-                new_dialogue += first_person_answer
-                first_person_answer_inserted_before = True
-                continue
-
-            elif (
-                random.uniform(0, 1) < context_threshold
-                and not first_person_query_inserted_before
-            ):
-                if random.choice([1, 0]):
-                    new_dialogue += f"{second} remembers: " + fp_context + "\n"
-                    first_person_query = first_person_query.replace(
-                        f"{second}:", f"{second}: [factual]"
-                    )
-
-                else:
-                    new_dialogue += f"{first}: " + fp_context + "\n"
-                    first_person_query = first_person_query.replace(
-                        f"{second}:", f"{second}: [answer in conversation]"
-                    )
-
-                new_dialogue += first_person_query
-                first_person_answer_inserted_before = True
-
-        new_item["dialogue"] = (
-            "In the dialogue below some people are talking:\n"
-            + substitute_pronouns_with_speaker_names(new_dialogue)
-        )
-        new_train_set.append(new_item)
-
-    json.dump(new_train_set, open(os.path.join(_path, "data/dialogues.json"), "w"))
diff --git a/datasets/create_rules_dataset.py b/datasets/create_rules_dataset.py
deleted file mode 100644
index c711745a..00000000
--- a/datasets/create_rules_dataset.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import asyncio
-import pandas as pd
-
-from wafl.config import Configuration
-from wafl.connectors.remote.remote_llm_connector import RemoteLLMConnector
-
-
-def get_prompt(df, theme):
-    prompt = ""
-    for _, row in df.sample(9).iterrows():
-        prompt += (
-            f"""
-<task>
-Create a plausible dialogue about the theme \"{row["Theme"]}\" based on the following summary and rules.
-
-The rules are as follows:
-{row["Rules"]}
-
-The conversation goes as follows:
-{row["Conversation"]}
-</task>
-        """.strip()
-            + "\n\n"
-        )
-
-    return (
-        prompt
-        + f'<task>\nCreate plausible dialogue about the theme "{theme}" based on the following summary and rules.\n\nThe rules are as follows:\n'
-    )
-
-
-if __name__ == "__main__":
-    config = Configuration.load_local_config()
-    remote_llm_connector = RemoteLLMConnector(
-        config.get_value("llm_model"), last_strings=["</task>"]
-    )
-
-    df = pd.read_csv("data/complex_instructions.csv")
-    theme = "playing a song that the user likes"
-    prompt = get_prompt(df, theme)
-    print(
-        asyncio.run(
-            remote_llm_connector.predict(prompt, temperature=0.5, num_tokens=1500)
-        )
-    )
diff --git a/datasets/train_llm_on_rules_dataset.py b/datasets/train_llm_on_rules_dataset.py
deleted file mode 100644
index 251bbe9d..00000000
--- a/datasets/train_llm_on_rules_dataset.py
+++ /dev/null
@@ -1,122 +0,0 @@
-import random
-
-import pandas as pd
-from datasets import Dataset
-from transformers import (
-    AutoTokenizer,
-    AutoModelForCausalLM,
-    TrainingArguments,
-    Trainer,
-    DataCollatorForLanguageModeling,
-)
-
-model_name_or_path = "mistralai/Mistral-7B-Instruct-v0.1"
-max_length = 1024 + 512
-
-
-def get_prompts(df):
-    prompts = []
-    for _, row in df.sample(frac=1).iterrows():
-        memory = ""
-        if memory == "":
-            memory = "The user has no memory."
-
-        current_rule = row["Rules"]
-        rules = df.sample(random.choice([1, 2]))["Rules"].tolist() + [current_rule]
-        random.shuffle(rules)
-        rules = "\n".join(rules)
-        prompt = (
-            f"""
-The user is talking with a chatbot about the theme \"{row["Theme"]}\" based on the following summary.
-<summary>
-{memory}
-</summary>
-
-The rules are as follows:
-<rules>
-{rules}
-</rules>
-
-The conversation goes as follows:
-{row["Conversation"]}
-        """.strip()
-            + "\n\n"
-        )
-        prompts.append(prompt)
-
-    return prompts
-
-
-def preprocess_function(sample):
-    model_inputs = tokenizer(
-        sample["prompt"],
-        return_tensors="pt",
-        max_length=max_length,
-        padding="max_length",
-    )
-    labels = tokenizer(
-        sample["prompt"],
-        return_tensors="pt",
-        max_length=max_length,
-        padding="max_length",
-    )
-
-    model_inputs["labels"] = labels["input_ids"]
-    return model_inputs
-
-
-def model_init():
-    model = AutoModelForCausalLM.from_pretrained(model_name_or_path)
-    parameters = model.parameters()
-    for parameter in parameters:
-        parameter.requires_grad = False
-
-    model.model.enable_input_require_grads()
-    model.lm_head.training = True
-    for index in range(len(model.model.layers)):
-        model.model.layers[index].self_attn.k_proj.training = True
-
-    return model
-
-
-def create_dataset_from_file(filepath):
-    df = pd.read_csv(filepath)
-    prompts = get_prompts(df)
-    return Dataset.from_dict({"prompt": prompts})
-
-
-if __name__ == "__main__":
-    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
-    tokenizer.pad_token = tokenizer.eos_token
-    dataset = create_dataset_from_file("data/complex_instructions.csv")
-    train_dataset = dataset.map(
-        preprocess_function, batched=True, batch_size=1, num_proc=4
-    )
-    data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
-    learning_rate = 1e-6
-    output_dir_name = f"checkpoint_lr{learning_rate}"
-    training_args = TrainingArguments(
-        output_dir=output_dir_name,
-        per_device_train_batch_size=1,
-        per_device_eval_batch_size=1,
-        evaluation_strategy="steps",
-        use_cpu=True,
-        learning_rate=learning_rate,
-        num_train_epochs=2,
-        logging_steps=200,
-        eval_steps=200,
-        save_total_limit=1,
-    )
-    model = model_init()
-    trainer = Trainer(
-        model=model,
-        args=training_args,
-        tokenizer=tokenizer,
-        data_collator=data_collator,
-        train_dataset=train_dataset,
-    )
-    trainer.train()
-    trainer.save_model("wafl-mistral")
-    model = trainer.model
-    model.push_to_hub("fractalego/wafl-mistral")
-    tokenizer.push_to_hub("fractalego/wafl-mistral")
diff --git a/tests/config.json b/tests/config.json
index 4bb4fa90..c78c40b3 100644
--- a/tests/config.json
+++ b/tests/config.json
@@ -4,6 +4,7 @@
   "deactivate_sound": true,
   "rules": "rules.yaml",
   "functions": "functions.py",
+  "max_recursion": 2,
   "llm_model": {
     "model_host": "localhost",
     "model_port": 8080
diff --git a/tests/test_rules.py b/tests/test_rules.py
index 619e3cd9..990f5492 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -13,6 +13,16 @@
     
   - the user says their name:
     - reply casually to the conversation"
+    
+  - the user wants to buy coffee:
+    - the bot says the coffee prices
+    - ask for which price range
+    - tell them the right coffee
+    
+  - the bot says the coffee prices:
+    - decaf is 1.50
+    - regular is 1.00
+    - espresso is 2.00
 """
 
 
@@ -48,3 +58,20 @@ def test__rules_are_not_always_triggered(self):
         asyncio.run(conversation_events.process_next())
         unexpected = "bot: the horse is tall"
         self.assertNotEqual(unexpected, interface.get_utterances_list()[-1])
+
+    def test__rules_can_nest(self):
+        interface = DummyInterface(
+            to_utter=[
+                "I want to buy coffee",
+            ]
+        )
+        config = Configuration.load_local_config()
+        config.set_value("rules", wafl_example)
+        conversation_events = ConversationEvents(
+            config=config,
+            interface=interface,
+        )
+        asyncio.run(conversation_events.process_next())
+        self.assertIn("decaf", interface.get_facts_and_timestamp()[0][1])
+        self.assertIn("regular", interface.get_facts_and_timestamp()[0][1])
+        self.assertIn("espresso", interface.get_facts_and_timestamp()[0][1])
diff --git a/todo.txt b/todo.txt
index d0d39bbb..0beacad2 100644
--- a/todo.txt
+++ b/todo.txt
@@ -1,8 +1,62 @@
+* on wafl_llm make it so only some LLMs are supported
+* change speaker model with newer one
+
+
+1) train on more steps
+   a) try 3 epochs, save each
+   b) use lr=1e-6
+   c) use batch_size=4
+   d) do not use 4 bit original model, use 16 bit (on the GPU)
+2) evaluate result
+3) Upload to hf
+4) create a test set of 50 elements for paper. Find a way to test it. repeat from 1)
+5) refactor code
+6) maybe change voice model
+6) write paper
+
+
+
 ### TODO
 
+* script to add wrong <tags> when none are needed
+
+
+On the to_modify set:
+* sometimes the user answers yes (after "do you confirm?") and the dialogue does not have "user: yes"
+
+
+On the accepted set:
+* CHANGE <|USER|>\n into user: (some of the elements are in the wrong format)
+* Perhaps change <memory>function()</memory> into <memory|retrieve><execute>function()</execute></memory> (the memory should store the results of the function)
+* Create a first paragraphs with the summary of the conversation: The conversation must always be grounded on the summary (USE LLM TO CREATE THE SUMMARY)
+* The LLM wrote text after </execute|run|running> hallucinating the result of the execution. Think about how to deal with that.
+* all the rules that says "two level of retrieval" should have the trigger rewritten to something more specific
+* change "bot" into "assistant" some of times
+* some sentences are between [] and should be removed
+* put the items in <memory> so far in the conversation summary. If it is a function then you need to simulaten the relevant output using the LLM
+* sometimes at the end of the conversation the bot says "Process finished with exit code 0". Erase this
+* add ability to index files and files in entire folders
+* if the bot uses a function to retrieve information, you should add <memory>. This is symmetrical to <memory> with a function call when necessary.
+* some tags like <output> should end the training item text
+* todo User -> user, or at least be internally consistent
+
+* find a way to use HuggingFaceH4/ultrachat_200k as a starting point for each item
+   - each item should be easy to copy into a csv.
+   - Separate the items with special tokens/lines
+* Create a dataset with about 500 elements
+   - use hugginface chat dataset as a starting point for
+       - themes
+       - conversation guide in prompt
+   - use LLM to create corresponding python code
+* retriever in create_prompt
+* change num_replicas back to 10 in remote_llm_connector
+
+
 /* create actions from command line
    /* add condition of when to stop to the actions
 
+
+Actions:
 #### Find way to delete cache in remote llm connector
 #### Put colors in action output (and dummy interface)
 #### Add green for when an expectation is matched
diff --git a/wafl/answerer/dialogue_answerer.py b/wafl/answerer/dialogue_answerer.py
index 7fe8c68c..a115d77c 100644
--- a/wafl/answerer/dialogue_answerer.py
+++ b/wafl/answerer/dialogue_answerer.py
@@ -10,7 +10,7 @@
     get_last_user_utterance,
 )
 from wafl.answerer.base_answerer import BaseAnswerer
-from wafl.answerer.rule_creator import RuleCreator
+from wafl.answerer.rule_maker import RuleMaker
 from wafl.connectors.bridges.llm_chitchat_answer_bridge import LLMChitChatAnswerBridge
 from wafl.exceptions import CloseConversation
 from wafl.extractors.dataclasses import Query, Answer
@@ -31,7 +31,7 @@ def __init__(self, config, knowledge, interface, code_path, logger):
         self._init_python_module(code_path.replace(".py", ""))
         self._prior_rule_with_timestamp = None
         self._max_predictions = 3
-        self._rule_creator = RuleCreator(
+        self._rule_creator = RuleMaker(
             knowledge,
             config,
             interface,
@@ -126,7 +126,7 @@ async def _get_relevant_facts(
             > conversational_timestamp - self._max_num_past_utterances_for_facts
         ]
         facts_and_thresholds = await self._knowledge.ask_for_facts_with_threshold(
-            query, is_from_user=True, threshold=0.8
+            query, is_from_user=True, threshold=0.85
         )
         if facts_and_thresholds:
             facts = [
@@ -206,13 +206,13 @@ async def _run_code(self, to_execute):
 
             except Exception as e:
                 result = (
-                    f'Error while executing\n\n"""python\n{to_execute}\n"""\n\n{str(e)}'
+                    f'Error while executing\n\n```python\n{to_execute}\n```\n\n{str(e)}'
                 )
                 traceback.print_exc()
                 break
 
         if not result:
-            result = f'\n"""python\n{to_execute}\n"""'
+            result = f"\n```python\n{to_execute}\n```"
 
         return result
 
diff --git a/wafl/answerer/rule_creator.py b/wafl/answerer/rule_maker.py
similarity index 83%
rename from wafl/answerer/rule_creator.py
rename to wafl/answerer/rule_maker.py
index a1a49ba0..7819fab5 100644
--- a/wafl/answerer/rule_creator.py
+++ b/wafl/answerer/rule_maker.py
@@ -1,4 +1,4 @@
-class RuleCreator:
+class RuleMaker:
     def __init__(
         self,
         knowledge,
@@ -13,11 +13,15 @@ def __init__(
         self._interface = interface
         self._max_num_rules = max_num_rules
         self._delete_current_rule = delete_current_rule
-        self._max_indentation = max_recursion
+        if not config.get_value("max_recursion"):
+            self._max_indentation = max_recursion
+        else:
+            self._max_indentation = config.get_value("max_recursion")
+
         self._indent_str = "    "
 
     async def create_from_query(self, query):
-        rules = await self._knowledge.ask_for_rule_backward(query)
+        rules = await self._knowledge.ask_for_rule_backward(query, threshold=0.92)
         rules = rules[: self._max_num_rules]
         rules_texts = []
         for rule in rules:
@@ -34,15 +38,18 @@ async def create_from_query(self, query):
         return "\n".join(rules_texts)
 
     async def recursively_add_rules(self, query, depth=2):
+        if depth > self._max_indentation:
+            return ""
+
         rules = await self._knowledge.ask_for_rule_backward(query, threshold=0.95)
-        rules = rules[: self._max_num_rules]
+        rules = rules[:1]
         rules_texts = []
         for rule in rules:
-            rules_text = f"- If {rule.effect.text} go through the following points:\n"
+            rules_text = ""
             for cause_index, causes in enumerate(rule.causes):
                 indentation = self._indent_str * depth
                 rules_text += f"{indentation}{cause_index + 1}) {causes.text}\n"
-                rules_text += await self.recursively_add_rules(causes.text, depth + 1)
+                rules_text += await self.recursively_add_rules(causes, depth + 1)
 
             rules_texts.append(rules_text)
 
diff --git a/wafl/command_line.py b/wafl/command_line.py
index 14fbb6cd..fc68db06 100644
--- a/wafl/command_line.py
+++ b/wafl/command_line.py
@@ -7,7 +7,7 @@
     run_from_command_line,
     run_testcases,
     print_incipit,
-    download_models
+    download_models,
 )
 from wafl.runners.run_from_actions import run_action
 from wafl.runners.run_from_audio import run_from_audio
diff --git a/wafl/connectors/base_llm_connector.py b/wafl/connectors/base_llm_connector.py
index 806164e4..2c800c20 100644
--- a/wafl/connectors/base_llm_connector.py
+++ b/wafl/connectors/base_llm_connector.py
@@ -59,7 +59,7 @@ async def generate(self, prompt: str) -> str:
         if end_set:
             end = min(end_set)
 
-        candidate_answer = text[start:end].split("bot: ")[-1].strip()
+        candidate_answer = text[start:end].strip()
         candidate_answer = re.sub(r"(.*)<\|.*\|>", r"\1", candidate_answer).strip()
 
         if prompt not in self._cache:
diff --git a/wafl/connectors/remote/remote_llm_connector.py b/wafl/connectors/remote/remote_llm_connector.py
index d232df7d..42fa0da7 100644
--- a/wafl/connectors/remote/remote_llm_connector.py
+++ b/wafl/connectors/remote/remote_llm_connector.py
@@ -9,13 +9,13 @@ class RemoteLLMConnector(BaseLLMConnector):
     _max_reply_length = 1024
     _num_prediction_tokens = 200
     _cache = {}
-    _num_replicas = 10
 
-    def __init__(self, config, last_strings=None):
+    def __init__(self, config, last_strings=None, num_replicas=3):
         super().__init__(last_strings)
         host = config["model_host"]
         port = config["model_port"]
         self._server_url = f"https://{host}:{port}/predictions/bot"
+        self._num_replicas = num_replicas
 
         try:
             loop = asyncio.get_running_loop()
@@ -28,24 +28,28 @@ def __init__(self, config, last_strings=None):
         ):
             raise RuntimeError("Cannot connect a running LLM.")
 
-    async def predict(self, prompt: str, temperature=None, num_tokens=None) -> [str]:
+    async def predict(self, prompt: str, temperature=None, num_tokens=None, num_replicas=None) -> [str]:
         if not temperature:
             temperature = 0.5
 
         if not num_tokens:
             num_tokens = self._num_prediction_tokens
 
+        if not num_replicas:
+            num_replicas = self._num_replicas
+
         payload = {
             "data": prompt,
             "temperature": temperature,
             "num_tokens": num_tokens,
             "last_strings": self._last_strings,
-            "num_replicas": self._num_replicas,
+            "num_replicas": num_replicas,
         }
 
         for _ in range(self._max_tries):
             async with aiohttp.ClientSession(
-                connector=aiohttp.TCPConnector(ssl=False)
+                conn_timeout=6000,
+                connector=aiohttp.TCPConnector(ssl=False),
             ) as session:
                 async with session.post(self._server_url, json=payload) as response:
                     answer = await response.text()
diff --git a/wafl/frontend/index.html b/wafl/frontend/index.html
index bc7283f1..8137ca99 100644
--- a/wafl/frontend/index.html
+++ b/wafl/frontend/index.html
@@ -39,6 +39,37 @@
                     </svg>
                 </a>
             </li>
+            <li>
+                <a title="Toggle logs"
+                   hx-post="/{{conversation_id}}/toggle_logs"
+                   hx-swap="none"
+                   class="flex items-center p-2 rounded-lg text-white hover:bg-gray-700 group">
+
+                    <svg fill="#FFFFFF" version="1.1" id="Capa_1" xmlns="http://www.w3.org/2000/svg"
+                         xmlns:xlink="http://www.w3.org/1999/xlink"
+                         class="w-6 h-6" viewBox="0 0 548.291 548.291"
+                         xml:space="preserve">
+                        <g>
+                            <path d="M486.201,196.124h-13.166V132.59c0-0.396-0.062-0.795-0.115-1.196c-0.021-2.523-0.825-5-2.552-6.963L364.657,3.677
+		c-0.033-0.031-0.064-0.042-0.085-0.073c-0.63-0.707-1.364-1.292-2.143-1.795c-0.229-0.157-0.461-0.286-0.702-0.421
+		c-0.672-0.366-1.387-0.671-2.121-0.892c-0.2-0.055-0.379-0.136-0.577-0.188C358.23,0.118,357.401,0,356.562,0H96.757
+		C84.894,0,75.256,9.651,75.256,21.502v174.613H62.092c-16.971,0-30.732,13.756-30.732,30.733v159.812
+		c0,16.968,13.761,30.731,30.732,30.731h13.164V526.79c0,11.854,9.638,21.501,21.501,21.501h354.776
+		c11.853,0,21.501-9.647,21.501-21.501V417.392h13.166c16.966,0,30.729-13.764,30.729-30.731V226.854
+		C516.93,209.872,503.167,196.124,486.201,196.124z M96.757,21.502h249.054v110.009c0,5.939,4.817,10.75,10.751,10.75h94.972v53.861
+		H96.757V21.502z M317.816,303.427c0,47.77-28.973,76.746-71.558,76.746c-43.234,0-68.531-32.641-68.531-74.152
+		c0-43.679,27.887-76.319,70.906-76.319C293.389,229.702,317.816,263.213,317.816,303.427z M82.153,377.79V232.085h33.073v118.039
+		h57.944v27.66H82.153V377.79z M451.534,520.962H96.757v-103.57h354.776V520.962z M461.176,371.092
+		c-10.162,3.454-29.402,8.209-48.641,8.209c-26.589,0-45.833-6.698-59.24-19.664c-13.396-12.535-20.75-31.568-20.529-52.967
+		c0.214-48.436,35.448-76.108,83.229-76.108c18.814,0,33.292,3.688,40.431,7.139l-6.92,26.37
+		c-7.999-3.457-17.942-6.268-33.942-6.268c-27.449,0-48.209,15.567-48.209,47.134c0,30.049,18.807,47.771,45.831,47.771
+		c7.564,0,13.623-0.852,16.21-2.152v-30.488h-22.478v-25.723h54.258V371.092L461.176,371.092z"/>
+                            <path d="M212.533,305.37c0,28.535,13.407,48.64,35.452,48.64c22.268,0,35.021-21.186,35.021-49.5
+		c0-26.153-12.539-48.655-35.237-48.655C225.504,255.854,212.533,277.047,212.533,305.37z"/>
+                        </g>
+                    </svg>
+                </a>
+            </li>
         </ul>
     </div>
 </div>
diff --git a/wafl/run.py b/wafl/run.py
index 4f664ecb..b0397e84 100644
--- a/wafl/run.py
+++ b/wafl/run.py
@@ -52,4 +52,3 @@ def download_models():
     import nltk
 
     nltk.download("averaged_perceptron_tagger")
-
diff --git a/wafl/runners/routes.py b/wafl/runners/routes.py
index 169bfabc..0cd5961b 100644
--- a/wafl/runners/routes.py
+++ b/wafl/runners/routes.py
@@ -9,7 +9,6 @@
 from wafl.config import Configuration
 from wafl.events.conversation_events import ConversationEvents
 from wafl.interface.queue_interface import QueueInterface
-from wafl.knowledge.single_file_knowledge import SingleFileKnowledge
 from wafl.logger.local_file_logger import LocalFileLogger
 from wafl.scheduler.conversation_loop import ConversationLoop
 from wafl.scheduler.scheduler import Scheduler
@@ -125,3 +124,9 @@ def add_new_rules(app, conversation_id, web_server_loop):
         web_server_loop.thumbs_down,
         methods=["POST"],
     )
+    app.add_url_rule(
+        f"/{conversation_id}/toggle_logs",
+        f"toggle_logs_{conversation_id}",
+        web_server_loop.toggle_logs,
+        methods=["POST"],
+    )
\ No newline at end of file
diff --git a/wafl/runners/run_from_actions.py b/wafl/runners/run_from_actions.py
index c4868874..4e005690 100644
--- a/wafl/runners/run_from_actions.py
+++ b/wafl/runners/run_from_actions.py
@@ -41,11 +41,11 @@ def predict_action(config, actions_list, expected_list):
             raise ValueError("The agent did not say anything.")
 
         if expected and not asyncio.run(
-                entailer.left_entails_right(
-                    last_utterance,
-                    expected,
-                    "\n".join(interface.get_utterances_list()[:-1]),
-                )
+            entailer.left_entails_right(
+                last_utterance,
+                expected,
+                "\n".join(interface.get_utterances_list()[:-1]),
+            )
         ):
             del entailer, conversation_events, interface
             raise ValueError(
diff --git a/wafl/scheduler/messages_creator.py b/wafl/scheduler/messages_creator.py
new file mode 100644
index 00000000..ec0d85cb
--- /dev/null
+++ b/wafl/scheduler/messages_creator.py
@@ -0,0 +1,70 @@
+from wafl.scheduler.web_interface_implementation import get_html_from_dialogue_item
+
+
+class MessagesCreator:
+    def __init__(self, interface):
+        self._interface = interface
+        self._toggled_windows = []
+
+    def toggle_logs(self):
+        if "logs" in self._toggled_windows:
+            self._toggled_windows.remove("logs")
+        else:
+            self._toggled_windows.append("logs")
+
+    async def get_messages_window(self):
+        conversation = ""
+        conversation += await self._get_dialogue()
+        if "logs" in self._toggled_windows:
+            conversation += await self._get_logs()
+
+        return conversation
+
+    async def _get_dialogue(self):
+        dialogue_items = self._interface.get_utterances_list_with_timestamp()
+        dialogue = []
+        for index, item in enumerate(dialogue_items):
+            dialogue.append(
+                (
+                    item[0],
+                    get_html_from_dialogue_item(
+                        item[1],
+                    ),
+                )
+            )
+
+        dialogue_items = dialogue
+        dialogue_items = sorted(dialogue_items, key=lambda x: x[0])[::-1]
+        dialogue_items = [item[1] for item in dialogue_items]
+        conversation = (
+            "<div id='dialogue' class='dialogue overflow-y-scroll rounded-lg'>"
+        )
+        conversation += "".join(dialogue_items)
+        return conversation
+
+    async def _get_logs(self):
+        choices = self._interface.get_choices_and_timestamp()
+        choices = [
+            (
+                item[0],
+                "<div class='log-row'>" + item[1] + "</div>",
+            )
+            for item in choices
+        ]
+        facts = self._interface.get_facts_and_timestamp()
+        facts = [
+            (
+                item[0],
+                "<div class='log-row'>" + item[1] + "</div>",
+            )
+            for item in facts
+        ]
+
+        choices_and_facts = choices + facts
+        choices_and_facts = sorted(choices_and_facts, key=lambda x: x[0])[::-1]
+        choices_and_facts = [item[1] for item in choices_and_facts]
+        conversation = "</div>"
+        conversation += "<div id='logs' class='logs shadow-lg overflow-y-scroll rounded-lg width' style='flex-direction: column-reverse;'>"
+        conversation += "".join(choices_and_facts)
+        conversation += "</div>"
+        return conversation
diff --git a/wafl/scheduler/web_loop.py b/wafl/scheduler/web_loop.py
index f43a5a9e..0d3510d0 100644
--- a/wafl/scheduler/web_loop.py
+++ b/wafl/scheduler/web_loop.py
@@ -4,9 +4,7 @@
 from flask import render_template, request, jsonify
 from wafl.interface.queue_interface import QueueInterface
 from wafl.logger.history_logger import HistoryLogger
-from wafl.scheduler.web_interface_implementation import (
-    get_html_from_dialogue_item,
-)
+from wafl.scheduler.messages_creator import MessagesCreator
 
 _path = os.path.dirname(__file__)
 
@@ -23,6 +21,7 @@ def __init__(
         self._conversation_id = conversation_id
         self._conversation_events = conversation_events
         self._prior_dialogue_items = ""
+        self._messages_creator = MessagesCreator(self._interface)
 
     async def index(self):
         return render_template("index.html", conversation_id=self._conversation_id)
@@ -49,7 +48,7 @@ async def reset_conversation(self):
         self._conversation_events.reload_knowledge()
         self._conversation_events.reset_discourse_memory()
         await self._interface.output("Hello. How may I help you?")
-        conversation = await self._get_conversation()
+        conversation = await self._messages_creator.get_messages_window()
         return conversation
 
     async def reload_rules(self):
@@ -59,7 +58,7 @@ async def reload_rules(self):
         return ""
 
     async def check_for_new_messages(self):
-        conversation = await self._get_conversation()
+        conversation = await self._messages_creator.get_messages_window()
         if conversation != self._prior_dialogue_items:
             self._prior_dialogue_items = conversation
             return f"""
@@ -75,7 +74,7 @@ async def check_for_new_messages(self):
             return "<div id='load_conversation'></div>"
 
     async def load_messages(self):
-        conversation = await self._get_conversation()
+        conversation = await self._messages_creator.get_messages_window()
         return conversation
 
     async def handle_output(self):
@@ -93,51 +92,10 @@ async def thumbs_down(self):
         self._history_logger.write("thumbs_down")
         return jsonify("")
 
+    async def toggle_logs(self):
+        self._messages_creator.toggle_logs()
+        return jsonify("")
+
     async def run(self):
         print(f"New web server instance {self._conversation_id} running!")
         return
-
-    async def _get_conversation(self):
-        dialogue_items = self._interface.get_utterances_list_with_timestamp()
-        dialogue = []
-        for index, item in enumerate(dialogue_items):
-            dialogue.append(
-                (
-                    item[0],
-                    get_html_from_dialogue_item(
-                        item[1],
-                    ),
-                )
-            )
-
-        choices = self._interface.get_choices_and_timestamp()
-        choices = [
-            (
-                item[0],
-                "<div class='log-row'>" + item[1] + "</div>",
-            )
-            for item in choices
-        ]
-        facts = self._interface.get_facts_and_timestamp()
-        facts = [
-            (
-                item[0],
-                "<div class='log-row'>" + item[1] + "</div>",
-            )
-            for item in facts
-        ]
-        choices_and_facts = choices + facts
-        choices_and_facts = sorted(choices_and_facts, key=lambda x: x[0])[::-1]
-        choices_and_facts = [item[1] for item in choices_and_facts]
-        dialogue_items = dialogue
-        dialogue_items = sorted(dialogue_items, key=lambda x: x[0])[::-1]
-        dialogue_items = [item[1] for item in dialogue_items]
-        conversation = (
-            "<div id='dialogue' class='dialogue overflow-y-scroll rounded-lg'>"
-        )
-        conversation += "".join(dialogue_items)
-        conversation += "</div>"
-        conversation += "<div id='logs' class='logs shadow-lg overflow-y-scroll rounded-lg width' style='flex-direction: column-reverse;'>"
-        conversation += "".join(choices_and_facts)
-        conversation += "</div>"
-        return conversation