Merge pull request #16 from grafana/async-openai

Add Support for AsyncOpenAI to the Python Library
grafana · Jan 10, 2024 · 6ffe059 · 6ffe059
2 parents a1c0c55 + aad36aa
commit 6ffe059
Show file tree

Hide file tree

Showing 3 changed files with 101 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -42,7 +42,7 @@ client = OpenAI(
     api_key="YOUR_OPENAI_API_KEY",
 )
 
-# Apply the custom decorator to the OpenAI API function
+# Apply the custom decorator to the OpenAI API function. To use with AsyncOpenAI, Pass use_async = True in this function.
 client.chat.completions.create = chat_v2.monitor(
     client.chat.completions.create,
     metrics_url="YOUR_PROMETHEUS_METRICS_URL",

diff --git a/python/README.md b/python/README.md
@@ -38,7 +38,7 @@ client = OpenAI(
     api_key="YOUR_OPENAI_API_KEY",
 )
 
-# Apply the custom decorator to the OpenAI API function
+# Apply the custom decorator to the OpenAI API function. To use with AsyncOpenAI, Pass `use_async` = True in this function.
 client.chat.completions.create = chat_v2.monitor(
     client.chat.completions.create,
     metrics_url="YOUR_PROMETHEUS_METRICS_URL",  # Example: "https://prometheus.grafana.net/api/prom"

diff --git a/python/src/grafana_openai_monitoring/chat_v2.py b/python/src/grafana_openai_monitoring/chat_v2.py
@@ -11,7 +11,7 @@
 from .__handlers import __send_metrics, __send_logs, __calculate_cost, __check
 
 # Decorator function to monitor chat completion
-def monitor(func, metrics_url, logs_url, metrics_username, logs_username, access_token): # pylint: disable=too-many-arguments
+def monitor(func, metrics_url, logs_url, metrics_username, logs_username, access_token, use_async=False): # pylint: disable=too-many-arguments, line-too-long
     """
     A decorator function to monitor chat completions using the OpenAI API.
 
@@ -25,6 +25,7 @@ def monitor(func, metrics_url, logs_url, metrics_username, logs_username, access
         metrics_username (str): The username for accessing Prometheus metrics.
         logs_username (str): The username for accessing Loki logs.
         access_token (str): The access token required for authentication.
+        async (bool): Whether the function is asynchronous or not.
 
     Returns:
         callable: The decorated function that monitors the API call and sends metrics/logs.
@@ -42,7 +43,7 @@ def monitor(func, metrics_url, logs_url, metrics_username, logs_username, access
                                     access_token
                             )
 
-    def wrapper(*args, **kwargs):
+    async def async_wrapper(*args, **kwargs):
         start_time = time.time()
         response = func(*args, **kwargs)
         end_time = time.time()
@@ -125,7 +126,6 @@ def wrapper(*args, **kwargs):
             f'requestDuration={duration}',
         ]
 
-
         # Send metrics to the specified metrics URL
         __send_metrics(metrics_url=metrics_url,
                        metrics_username=metrics_username,
@@ -134,4 +134,100 @@ def wrapper(*args, **kwargs):
 
         return response
 
+
+    def wrapper(*args, **kwargs):
+        # pylint: disable=no-else-return
+        if use_async is True:
+            return async_wrapper(*args, **kwargs)
+        else:
+            start_time = time.time()
+            response = func(*args, **kwargs)
+            end_time = time.time()
+            duration = end_time - start_time
+
+            # Determine prompt and model from args or kwargs
+            prompt = (
+                args[1] if args and len(args) > 1 and isinstance(args[1], str)
+                else kwargs.get('messages', [{"content": "No prompt provided"}])[0]['content']
+            )
+
+            model = (
+                args[2] if len(args) > 2 and isinstance(args[2], str)
+                else kwargs.get('model', "No model provided")
+            )
+
+            # Calculate the cost based on the response's usage
+            cost = __calculate_cost(model,
+                                    response.usage.prompt_tokens,
+                                    response.usage.completion_tokens
+                    )
+
+            # Prepare logs to be sent
+            logs = {
+                "streams": [
+                {
+                        "stream": {
+                            "job": "integrations/openai", 
+                            "prompt": prompt, 
+                            "model": response.model, 
+                            "role": response.choices[0].message.role,
+                            "finish_reason": response.choices[0].finish_reason,
+                            "prompt_tokens": str(response.usage.prompt_tokens), 
+                            "completion_tokens": str(response.usage.completion_tokens), 
+                            "total_tokens": str(response.usage.total_tokens)
+                        },
+                        "values": [
+                            [
+                                str(int(time.time()) * 1000000000),
+                                response.choices[0].message.content
+                            ]
+                        ]
+
+                    }
+                ]
+            }
+
+            # Send logs to the specified logs URL
+            __send_logs(logs_url=logs_url,
+                        logs_username=logs_username,
+                        access_token=access_token,
+                        logs=logs
+            )
+
+            # Prepare metrics to be sent
+            metrics = [
+                # Metric to track the number of completion tokens used in the response
+                f'openai,job=integrations/openai,'
+                f'source=python_chatv2,model={response.model} '
+                f'completionTokens={response.usage.completion_tokens}',
+
+                # Metric to track the number of prompt tokens used in the response
+                f'openai,job=integrations/openai,'
+                f'source=python_chatv2,model={response.model} '
+                f'promptTokens={response.usage.prompt_tokens}',
+
+                # Metric to track the total number of tokens used in the response
+                f'openai,job=integrations/openai,'
+                f'source=python_chatv2,model={response.model} '
+                f'totalTokens={response.usage.total_tokens}',
+
+                # Metric to track the usage cost based on the model and token usage
+                f'openai,job=integrations/openai,'
+                f'source=python_chatv2,model={response.model} '
+                f'usageCost={cost}',
+
+                # Metric to track the duration of the API request and response cycle
+                f'openai,job=integrations/openai,'
+                f'source=python_chatv2,model={response.model} '
+                f'requestDuration={duration}',
+            ]
+
+            # Send metrics to the specified metrics URL
+            __send_metrics(metrics_url=metrics_url,
+                           metrics_username=metrics_username,
+                           access_token=access_token,
+                           metrics=metrics)
+
+            return response
+
     return wrapper