Merge pull request #9 from prompt-security/suggested_fixes_1

vitaly-ps · web-flow · commit a16fa2756cea · 2024-04-15T16:11:03.000+03:00
Some textual and stylistic fixes followin user reports
diff --git a/ps_fuzz/app_config.py b/ps_fuzz/app_config.py
@@ -34,11 +34,11 @@ def get_attributes(self):
     def print_as_table(self):
         attributes = self.get_attributes()
         print_table(
-            title = "Configuration",
+            title = "Current configuration",
             headers = ["Option", "Value"],
             data = [[key, value] for key, value in attributes.items() if key != "system_prompt"] # print all except the system prompt
         )
-        print(f"{colorama.Style.BRIGHT}System prompt:{colorama.Style.RESET_ALL}")
+        print(f"{colorama.Style.BRIGHT}Current system prompt:{colorama.Style.RESET_ALL}")
         #print(f"{colorama.Style.DIM}{wrap_text(self.system_prompt, width=70)}{colorama.Style.RESET_ALL}")
         print(f"{colorama.Style.DIM}{self.system_prompt}{colorama.Style.RESET_ALL}")
 
@@ -180,7 +180,7 @@ def parse_cmdline_args():
     parser.add_argument('-n', '--num-attempts', type=int, default=None, help="Number of different attack prompts")
     parser.add_argument('-t', '--num-threads', type=int, default=None, help="Number of worker threads")
     parser.add_argument('-a', '--attack-temperature', type=float, default=None, help="Temperature for attack model")
-    parser.add_argument('-d', '--debug-level', type=int, default=None, help="Debug level")
+    parser.add_argument('-d', '--debug-level', type=int, default=None, help="Debug level (0-2)")
     parser.add_argument("-b", '--batch', action='store_true', help="Run the fuzzer in unattended (batch) mode, bypassing the interactive steps")
     parser.add_argument('system_prompt_file', type=str, nargs='?', default=None, help="Filename containing the system prompt")
     return parser.parse_args()
diff --git a/ps_fuzz/cli.py b/ps_fuzz/cli.py
@@ -54,9 +54,9 @@ def main():
     # Run interactive shell that allows to change configuration or run some tasks
     if args.batch:
         run_fuzzer(app_config)
-        sys.exit(0)
-
-    interactive_shell(app_config)
+    else:
+        interactive_shell(app_config)
+    print(f"{BRIGHT}{colorama.Fore.CYAN}Thank you for trying out the Prompt Security Fuzzer!{RESET}")
 
 if __name__ == "__main__":
     main()
diff --git a/ps_fuzz/prompt_injection_fuzzer.py b/ps_fuzz/prompt_injection_fuzzer.py
@@ -95,13 +95,13 @@ def fuzz_prompt_injections(client_config: ClientConfig, attack_config: AttackCon
         title = "Test results",
         headers = [
             "",
-            "Test",
+            "Attack Type",
             "Broken",
             "Resilient",
             "Errors",
             "Strength",
         ],
-        data = [
+        data = sorted([
             [
                 ERROR if test.status.error_count > 0 else RESILIENT if isResilient(test.status) else VULNERABLE,
                 f"{test.test_name + ' ':.<{50}}",
@@ -111,7 +111,7 @@ def fuzz_prompt_injections(client_config: ClientConfig, attack_config: AttackCon
                 simpleProgressBar(test.status.resilient_count, test.status.total_count, GREEN if isResilient(test.status) else RED),
             ]
             for test in tests
-        ],
+        ], key=lambda x: x[1]),
         footer_row = [
                 ERROR if all(test.status.error_count > 0 for test in tests) else RESILIENT if all(isResilient(test.status) for test in tests) else VULNERABLE,
                 f"{'Total (# tests): ':.<50}",
@@ -129,7 +129,10 @@ def fuzz_prompt_injections(client_config: ClientConfig, attack_config: AttackCon
     resilient_tests_count = sum(isResilient(test.status) for test in tests)
     total_tests_count = len(tests)
     resilient_tests_percentage = resilient_tests_count / total_tests_count * 100 if total_tests_count > 0 else 0
-    print(f"Your system prompt was resilient in {int(resilient_tests_percentage)}% ({resilient_tests_count} out of total {total_tests_count}) tests.")
+    print(f"Your system prompt passed {int(resilient_tests_percentage)}% ({resilient_tests_count} out of {total_tests_count}) of attack simulations.")
+    print()
+    print(f"To learn about the various attack types, please consult the help section and the Prompt Security Fuzzer GitHub README.")
+    print(f"You can also get a list of all available attack types by running the command '{BRIGHT}prompt-security-fuzzer --list-attacks{RESET}'.")
 
     # Print detailed test progress logs (TODO: select only some relevant representative entries and output to a "report" file, which is different from a debug .log file!)
     """
@@ -146,18 +149,32 @@ def run_interactive_chat(app_config: AppConfig):
     # Print current app configuration
     app_config.print_as_table()
     target_system_prompt = app_config.system_prompt
-    target_client = ClientLangChain(app_config.target_provider, model=app_config.target_model, temperature=0)
-    interactive_chat(client=target_client, system_prompts=[target_system_prompt])
+    try:
+        target_client = ClientLangChain(app_config.target_provider, model=app_config.target_model, temperature=0)
+        interactive_chat(client=target_client, system_prompts=[target_system_prompt])
+    except ModuleNotFoundError as e:
+        logger.warning(f"Error accessing the Target LLM provider {app_config.target_provider} with model '{app_config.target_model}': {colorama.Fore.RED}{e}{colorama.Style.RESET_ALL}")
+        return
 
 def run_fuzzer(app_config: AppConfig):
     # Print current app configuration
     app_config.print_as_table()
     target_system_prompt = app_config.system_prompt
-    target_client = ClientLangChain(app_config.target_provider, model=app_config.target_model, temperature=0)
+    try:
+        target_client = ClientLangChain(app_config.target_provider, model=app_config.target_model, temperature=0)
+    except ModuleNotFoundError as e:
+        logger.warning(f"Error accessing the Target LLM provider {app_config.target_provider} with model '{app_config.target_model}': {colorama.Fore.RED}{e}{colorama.Style.RESET_ALL}")
+        return
     client_config = ClientConfig(target_client, [target_system_prompt])
-    attack_config = AttackConfig(
-        attack_client = ClientLangChain(app_config.attack_provider, model=app_config.attack_model, temperature=app_config.attack_temperature),
-        attack_prompts_count = app_config.num_attempts
-    )
+
+    try:
+        attack_config = AttackConfig(
+            attack_client = ClientLangChain(app_config.attack_provider, model=app_config.attack_model, temperature=app_config.attack_temperature),
+            attack_prompts_count = app_config.num_attempts
+        )
+    except ModuleNotFoundError as e:
+        logger.warning(f"Error accessing the Attack LLM provider {app_config.attack_provider} with model '{app_config.attack_model}': {colorama.Fore.RED}{e}{colorama.Style.RESET_ALL}")
+        return
+
     # Run the fuzzer
     fuzz_prompt_injections(client_config, attack_config, threads_count=app_config.num_threads)