Merge pull request #326 from StampyAI/openai-fixes

Fixes and refactors of OpenAI stuff
StampyAI · Nov 1, 2023 · b4b9e61 · b4b9e61
2 parents 853e28b + 0fef240
commit b4b9e61
Show file tree

Hide file tree

Showing 6 changed files with 142 additions and 152 deletions.
diff --git a/api/openai.py b/api/openai.py
@@ -2,20 +2,18 @@
 from api.utilities.openai import OpenAIEngines
 from config import (
     openai_api_key,
-    paid_service_channel_ids,
     gpt4,
     gpt4_for_all,
     gpt4_whitelist_role_ids,
     bot_vip_ids,
     paid_service_all_channels,
     use_helicone,
-    disable_prompt_moderation
+    disable_prompt_moderation,
+    openai_allowed_sources,
 )
 from structlog import get_logger
-from servicemodules.serviceConstants import Services, openai_channel_ids
 from utilities.serviceutils import ServiceMessage
-from utilities import utilities, Utilities
-from utilities import discordutils
+from utilities import Utilities, discordutils
 if use_helicone:
     try:
         from helicone import openai
@@ -28,11 +26,12 @@
 import requests
 import json # moderation response dump
 
-CURL_REQUEST: bool # helicone breaks some moderation attribute of openai module
-if use_helicone:
-    CURL_REQUEST = True
-else:
-    CURL_REQUEST = False
+
+OPENAI_NASTY_CATEGORIES = {
+    "sexual", "hate", "harassment", "self-harm", "sexual/minors", "hate/threatening",
+    "violence/graphic", "self-harm/intent", "self-harm/instructions",
+    "harassment/threatening", "violence"
+}
 
 openai.api_key = openai_api_key
 start_sequence = "\nA:"
@@ -46,95 +45,89 @@ def __init__(self):
         self.log = get_logger()
 
     def is_channel_allowed(self, message: ServiceMessage) -> bool:
-        if message.service in openai_channel_ids and message.channel.id in openai_channel_ids[message.service]:
-            # For Rob's discord
-            return True
-        elif paid_service_all_channels:
-            return True
-        elif message.channel.id in paid_service_channel_ids:
-            # if list is empty, default
-            return True
+        channel_id = (message.channel and message.channel.id)
+        return (
+            paid_service_all_channels or
+            channel_id in openai_allowed_sources.get(message.service.value, [])
+        )
+
+    def log_error(self, error, exception=None, warning=False):
+        if warning:
+            self.log.warning(self.class_name, error=error)
         else:
-            return False
-    def is_text_risky(self, text: str) -> bool:
-        """Ask the openai moderation endpoint if the text is risky
+            self.log.error(self.class_name, error=error)
 
-        See https://platform.openai.com/docs/guides/moderation/quickstart for details"""
+        loop = asyncio.get_running_loop()
+        loop.create_task(utils.log_error(error))
+        if exception:
+            loop.create_task(utils.log_exception(exception))
+
+    def is_text_risky(self, text: str) -> bool:
+        """Ask the openai moderation endpoint if the text is risky.
 
-        allowed_categories = frozenset("violence") # Can be triggered by some AI safety terms
+        See https://platform.openai.com/docs/guides/moderation/quickstart for details.
+        """
+        allowed_categories = {"violence"} # Can be triggered by some AI safety terms
 
         if disable_prompt_moderation:
             return False
 
-        if CURL_REQUEST:
+        response = None
+        if use_helicone:
             try:
                 http_response = requests.post(
-                        'https://api.openai.com/v1/moderations',
-                        headers={
-                            "Content-Type": "application/json",
-                            "Authorization": f"Bearer {openai_api_key}"
-                            },
-                        json={
-                            "input": text
-                            }
-                        )
+                    'https://api.openai.com/v1/moderations',
+                    headers={
+                        "Content-Type": "application/json",
+                        "Authorization": f"Bearer {openai_api_key}"
+                    },
+                    json={"input": text}
+                )
             except Exception as e:
-                self.log.error(self.class_name, error="Error in Requests module trying to moderate content")
-                loop = asyncio.get_running_loop()
-                loop.create_task(utils.log_error(f"Error in Requests module trying to moderate content"))
-                loop.create_task(utils.log_exception(e))
+                self.log_error("Error in Requests module trying to moderate content", e)
                 return True
+
             if http_response.status_code == 401:
-                self.log.error(self.class_name, error="OpenAI Authentication Failed")
-                loop = asyncio.get_running_loop()
-                loop.create_task(utils.log_error(f"OpenAI Authenication Failed"))
-                loop.create_task(utils.log_exception(e))
+                self.log_error("OpenAI Authentication Failed")
                 return True
             elif http_response.status_code == 429:
-                self.log.warning(self.class_name, error="OpenAI Rate Limit Exceeded")
-                loop = asyncio.get_running_loop()
-                loop.create_task(utils.log_error(f"OpenAI Rate Limit Exceeded"))
-                loop.create_task(utils.log_exception(e))
+                self.log_error("OpenAI Rate Limit Exceeded", warning=True)
                 return True
             elif http_response.status_code != 200:
-                self.log.warning(self.class_name, error=f"Possible issue with the OpenAI API. Status: {http_response.status_code}, Content: {http_response.text}")
-                loop = asyncio.get_running_loop()
-                loop.create_task(utils.log_error(f"Possible issue with the OpenAI API. Status: {http_response.status_code}, Content: {http_response.text}"))
+                self.log_error(
+                    f"Possible issue with the OpenAI API. Status: {http_response.status_code}, Content: {http_response.text}"
+                )
                 return True
             response = http_response.json()
+
         else:
             try:
                 response = Moderation.create(input=text)
             except openai.error.AuthenticationError as e:
-                self.log.error(self.class_name, error="OpenAI Authentication Failed")
-                loop = asyncio.get_running_loop()
-                loop.create_task(utils.log_error(f"OpenAI Authenication Failed"))
-                loop.create_task(utils.log_exception(e))
+                self.log_error("OpenAI Authentication Failed", e)
                 return True
             except openai.error.RateLimitError as e:
-                self.log.warning(self.class_name, error="OpenAI Rate Limit Exceeded")
-                loop = asyncio.get_running_loop()
-                loop.create_task(utils.log_error(f"OpenAI Rate Limit Exceeded"))
-                loop.create_task(utils.log_exception(e))
+                self.log_error(self.class_name, "OpenAI Rate Limit Exceeded", e, warning=True)
                 return True
 
-        flagged: bool = response["results"][0]["flagged"]
-
-        all_morals: frozenset[str] = ["sexual", "hate", "harassment", "self-harm", "sexual/minors", "hate/threatening", "violence/graphic", "self-harm/intent", "self-harm/instructions", "harassment/threatening", "violence"]
-        violated_categories = set()
+        results = response.get("results", [])[0]
+        if not results:
+            return False
 
-        if flagged:
-            for moral in all_morals - allowed_categories:
-                if response["results"][0][moral]:
-                    violated_categories.add(moral)
+        if not results["flagged"]:
+            self.log.info(self.class_name, msg=f"Checked with content filter, it says the text looks clean")
+            return False
 
-        if len(violated_categories) > 0:
+        violated_categories = [
+            moral for moral in OPENAI_NASTY_CATEGORIES - allowed_categories if results.get(moral)
+        ]
+        if violated_categories:
             self.log.warning(self.class_name, msg=f"Text violated these unwanted categories: {violated_categories}")
             self.log.debug(self.class_name, msg=f"OpenAI moderation response: {json.dumps(response)}")
             return True
-        else:
-            self.log.info(self.class_name, msg=f"Checked with content filter, it says the text looks clean")
-            return False
+
+        self.log.info(self.class_name, msg="Checked with content filter, it doesn't violate any of our categories")
+        return False
 
     def get_engine(self, message: ServiceMessage) -> OpenAIEngines:
         """Pick the appropriate engine to respond to a message with"""
@@ -153,9 +146,9 @@ def get_response(self, engine: OpenAIEngines, prompt: str, logit_bias: dict[int,
             return ""
 
         try:
-            response = openai.Completion.create(
-                engine=str(engine),
-                prompt=prompt,
+            response = openai.ChatCompletion.create(
+                model=str(engine),
+                messages=[{'role': 'user', 'content': prompt}],
                 temperature=0,
                 max_tokens=100,
                 top_p=1,
@@ -178,8 +171,9 @@ def get_response(self, engine: OpenAIEngines, prompt: str, logit_bias: dict[int,
 
         if response["choices"]:
             choice = response["choices"][0]
-            if choice["finish_reason"] == "stop" and choice["text"].strip() != "Unknown":
-                text = choice["text"].strip(". \n").split("\n")[0]
+            text = choice.get('message', {}).get('content', '').strip()
+            if choice["finish_reason"] == "stop" and text != "Unknown":
+                text = text.strip(". \n").split("\n")[0]
                 self.log.info(self.class_name, gpt_response=text)
                 return text
 

diff --git a/config.py b/config.py
@@ -139,7 +139,6 @@ def getenv_unique_set(var_name: str, default: T = frozenset()) -> Union[frozense
 valid_bot_reboot_options = Literal["exec", False]
 bot_reboot: valid_bot_reboot_options
 paid_service_all_channels: bool
-paid_service_channel_ids: frozenset
 paid_service_for_all: bool
 paid_service_whitelist_role_ids: frozenset
 gpt4: bool
@@ -197,15 +196,27 @@ def getenv_unique_set(var_name: str, default: T = frozenset()) -> Union[frozense
     bot_reboot = cast(valid_bot_reboot_options, False)
     paid_service_for_all = True
     paid_service_all_channels = True
-    paid_service_channel_ids = frozenset()
+
     # NOTE: rob's approved stuff are in servicemodules/serviceConstants.py
+    from servicemodules import discordConstants
     paid_service_whitelist_role_ids = frozenset()
-    gpt4 = getenv_bool("GPT4")
-    gpt4_for_all = getenv_bool("GPT4_FOR_ALL")
-    gpt4_whitelist_role_ids = getenv_unique_set("GPT4_WHITELIST_ROLE_IDS", frozenset())
-    use_helicone = getenv_bool("USE_HELICONE")
-    llm_prompt = getenv("LLM_PROMPT", default=stampy_default_prompt)
-    be_shy = getenv_bool("BE_SHY")
+    openai_allowed_sources: dict[str, tuple[str, ...]] = {
+        "Discord": (
+            discordConstants.stampy_dev_priv_channel_id,
+            discordConstants.aligned_intelligences_only_channel_id,
+            discordConstants.ai_channel_id,
+            discordConstants.not_ai_channel_id,
+            discordConstants.events_channel_id,
+            discordConstants.projects_channel_id,
+            discordConstants.book_club_channel_id,
+            discordConstants.dialogues_with_stampy_channel_id,
+            discordConstants.meta_channel_id,
+            discordConstants.general_channel_id,
+            discordConstants.talk_to_stampy_channel_id,
+        ),
+        "Flask": ("flask_api",),
+    }
+
     channel_whitelist = None
     bot_error_channel_id = {
             "production": "1017527224540344380",
@@ -222,28 +233,35 @@ def getenv_unique_set(var_name: str, default: T = frozenset()) -> Union[frozense
     bot_dev_roles = getenv_unique_set("BOT_DEV_ROLES", frozenset())
     bot_dev_ids = getenv_unique_set("BOT_DEV_IDS", frozenset())
     bot_control_channel_ids = getenv_unique_set("BOT_CONTROL_CHANNEL_IDS", frozenset())
-    bot_private_channel_id = getenv("BOT_PRIVATE_CHANNEL_ID")
+    bot_private_channel_id = getenv("BOT_PRIVATE_CHANNEL_ID", '')
     bot_error_channel_id = getenv("BOT_ERROR_CHANNEL_ID", bot_private_channel_id)
     # NOTE: Rob's invite/member management functions, not ported yet
     member_role_id = getenv("MEMBER_ROLE_ID", default=None)
     bot_reboot = cast(valid_bot_reboot_options, getenv("BOT_REBOOT", default=False))
     paid_service_all_channels = getenv_bool("PAID_SERVICE_ALL_CHANNELS")
-    paid_service_channel_ids = getenv_unique_set(
-        "PAID_SERVICE_CHANNEL_IDS", frozenset()
-    )
+    openai_allowed_sources: dict[str, tuple[str, ...]] = {
+        "Discord": tuple(getenv_unique_set("PAID_SERVICE_CHANNEL_IDS", frozenset())),
+        "Flask": {
+            'production': tuple(),
+            'development': ("flask_api",)
+        }[ENVIRONMENT_TYPE],
+    }
+
     paid_service_for_all = getenv_bool("PAID_SERVICE_FOR_ALL")
     paid_service_whitelist_role_ids = getenv_unique_set(
         "PAID_SERVICE_ROLE_IDS", frozenset()
     )
-    gpt4 = getenv_bool("GPT4")
-    gpt4_for_all = getenv_bool("GPT4_FOR_ALL")
-    gpt4_whitelist_role_ids = getenv_unique_set("GPT4_WHITELIST_ROLE_IDS", frozenset())
-    use_helicone = getenv_bool("USE_HELICONE")
-    llm_prompt = getenv("LLM_PROMPT", default=stampy_default_prompt)
-    be_shy = getenv_bool("BE_SHY")
+
     channel_whitelist = getenv_unique_set("CHANNEL_WHITELIST", None)
     disable_prompt_moderation = getenv_bool("DISABLE_PROMPT_MODERATION")
 
+gpt4 = getenv_bool("GPT4")
+gpt4_for_all = getenv_bool("GPT4_FOR_ALL")
+gpt4_whitelist_role_ids = getenv_unique_set("GPT4_WHITELIST_ROLE_IDS", frozenset())
+use_helicone = getenv_bool("USE_HELICONE")
+llm_prompt = getenv("LLM_PROMPT", default=stampy_default_prompt)
+be_shy = getenv_bool("BE_SHY")
+
 discord_token: str = getenv("DISCORD_TOKEN")
 database_path: str = getenv("DATABASE_PATH")
 youtube_api_key: Optional[str] = getenv("YOUTUBE_API_KEY", default=None)

diff --git a/modules/chatgpt.py b/modules/chatgpt.py
@@ -140,38 +140,39 @@ async def chatgpt_chat(self, message: ServiceMessage) -> Response:
         else:
             im = default_italics_mark
 
-        if self.openai.is_channel_allowed(message):
-            if self.openai.is_text_risky(message.clean_content):
-                return Response(
-                    confidence=0,
-                    text="",
-                    why="GPT-3's content filter thought the prompt was risky",
-                )
-            self.log.info(
-                self.class_name,
-                msg=f"sending chat prompt to chatgpt, engine {engine} ({engine.description})",
-            )
-            chatcompletion = cast(
-                OpenAIObject,
-                openai.ChatCompletion.create(model=str(engine), messages=messages),
+        if not self.openai.is_channel_allowed(message):
+            self.log.info(self.class_name, msg="channel not allowed")
+            return Response()
+
+        if self.openai.is_text_risky(message.clean_content):
+            return Response(
+                confidence=0,
+                text="",
+                why="GPT-3's content filter thought the prompt was risky",
             )
-            print(chatcompletion)
-            if chatcompletion.choices:
-                response = chatcompletion.choices[0].message.content
 
-                # sometimes the response starts with "Stampy says:" or responds or replies etc, which we don't want
-                response = re.sub(r"^[sS]tampy\ ?[a-zA-Z]{,15}:\s?", "", response)
+        self.log.info(
+            self.class_name,
+            msg=f"sending chat prompt to chatgpt, engine {engine} ({engine.description})",
+        )
+        chatcompletion = cast(
+            OpenAIObject,
+            openai.ChatCompletion.create(model=str(engine), messages=messages),
+        )
+        if chatcompletion.choices:
+            response = chatcompletion.choices[0].message.content
 
-                self.log.info(self.class_name, response=response)
+            # sometimes the response starts with "Stampy says:" or responds or replies etc, which we don't want
+            response = re.sub(r"^[sS]tampy\ ?[a-zA-Z]{,15}:\s?", "", response)
 
-                if response:
-                    return Response(
-                        confidence=10,
-                        text=f"{im}{response}{im}",
-                        why="ChatGPT made me say it!",
-                    )
-        else:
-            self.log.info(self.class_name, msg="channel not allowed")
+            self.log.info(self.class_name, response=response)
+
+            if response:
+                return Response(
+                    confidence=10,
+                    text=f"{im}{response}{im}",
+                    why="ChatGPT made me say it!",
+                )
         return Response()
 
     def __str__(self):