From bf05ba3cb26cabce55ac489aeb951eb28ea39520 Mon Sep 17 00:00:00 2001
From: miararoy <miararoy@gmail.com>
Date: Tue, 14 Nov 2023 16:31:47 +0200
Subject: [PATCH] add /api and version to app.py (#169)

* add /api and version to app.py

* restructre api models to versionized directories

* split to routers and set up app init funciton

* fixed docs/schema

* add version to /health

* edit base url for tests to API_VESION

* merge main

* chage api_base on cli chat

* edit comment

* set default for default url path

* revert to 0.0.0.0 on uvicorn

* fix import in text
---
 README.md                                     |  6 +--
 src/canopy_cli/cli.py                         | 32 +++++++-----
 src/canopy_server/app.py                      | 52 ++++++++++++++-----
 .../{ => models/v1}/api_models.py             |  2 +
 tests/e2e/test_app.py                         | 29 ++++++++---
 5 files changed, 83 insertions(+), 38 deletions(-)
 rename src/canopy_server/{ => models/v1}/api_models.py (96%)

diff --git a/README.md b/README.md
index d0b925bf..bdd79ef8 100644
--- a/README.md
+++ b/README.md
@@ -197,12 +197,12 @@ This will open a similar chat interface window, but will show both the RAG and n
 
 ### Migrating an existing OpenAI application to **Canopy**
 
-If you already have an application that uses the OpenAI API, you can migrate it to **Canopy** by simply changing the API endpoint to `http://host:port/context` as follows:
+If you already have an application that uses the OpenAI API, you can migrate it to **Canopy** by simply changing the API endpoint to `http://host:port/v1`, for example with the default configuration:
 
 ```python
 import openai
 
-openai.api_base = "http://host:port/"
+openai.api_base = "http://localhost:8000/v1"
 
 # now you can use the OpenAI API as usual
 ```
@@ -212,7 +212,7 @@ or without global state change:
 ```python
 import openai
 
-openai_response = openai.Completion.create(..., api_base="http://host:port/")
+openai_response = openai.Completion.create(..., api_base="http://localhost:8000/v1")
 ```
 
 ### Running Canopy server in production
diff --git a/src/canopy_cli/cli.py b/src/canopy_cli/cli.py
index 5699af60..6b859aed 100644
--- a/src/canopy_cli/cli.py
+++ b/src/canopy_cli/cli.py
@@ -31,17 +31,18 @@
 
 from canopy import __version__
 
-from canopy_server.app import start as start_server
+from canopy_server.app import start as start_server, API_VERSION
 from .cli_spinner import Spinner
-from canopy_server.api_models import ChatDebugInfo
+from canopy_server.models.v1.api_models import ChatDebugInfo
 
 
 load_dotenv()
 if os.getenv("OPENAI_API_KEY"):
     openai.api_key = os.getenv("OPENAI_API_KEY")
 
-spinner = Spinner()
 CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
+DEFAULT_SERVER_URL = f"http://localhost:8000/{API_VERSION}"
+spinner = Spinner()
 
 
 def check_server_health(url: str):
@@ -171,8 +172,9 @@ def cli(ctx):
 
 
 @cli.command(help="Check if canopy server is running and healthy.")
-@click.option("--url", default="http://localhost:8000",
-              help="Canopy's server url. Defaults to http://localhost:8000")
+@click.option("--url", default=DEFAULT_SERVER_URL,
+              help=("Canopy's server url. "
+                    f"Defaults to {DEFAULT_SERVER_URL}"))
 def health(url):
     check_server_health(url)
     click.echo(click.style("Canopy server is healthy!", fg="green"))
@@ -432,8 +434,9 @@ def _chat(
               help="Print additional debugging information")
 @click.option("--rag/--no-rag", default=True,
               help="Compare RAG-infused Chatbot with vanilla LLM",)
-@click.option("--chat-server-url", default="http://localhost:8000",
-              help="URL of the Canopy server to use. Defaults to http://localhost:8000")
+@click.option("--chat-server-url", default=DEFAULT_SERVER_URL,
+              help=("URL of the Canopy server to use."
+                    f" Defaults to {DEFAULT_SERVER_URL}"))
 def chat(chat_server_url, rag, debug, stream):
     check_server_health(chat_server_url)
     note_msg = (
@@ -488,7 +491,7 @@ def chat(chat_server_url, rag, debug, stream):
             history=history_with_pinecone,
             message=message,
             stream=stream,
-            api_base=urljoin(chat_server_url, "/context"),
+            api_base=chat_server_url,
             print_debug_info=debug,
         )
 
@@ -527,7 +530,7 @@ def chat(chat_server_url, rag, debug, stream):
     )
 )
 @click.option("--host", default="0.0.0.0",
-              help="Hostname or ip address to bind the server to. Defaults to 0.0.0.0")
+              help="Hostname or address to bind the server to. Defaults to 0.0.0.0")
 @click.option("--port", default=8000,
               help="TCP port to bind the server to. Defaults to 8000")
 @click.option("--reload/--no-reload", default=False,
@@ -580,8 +583,9 @@ def start(host: str, port: str, reload: bool,
         """
     )
 )
-@click.option("url", "--url", default="http://localhost:8000",
-              help="URL of the Canopy server to use. Defaults to http://localhost:8000")
+@click.option("url", "--url", default=DEFAULT_SERVER_URL,
+              help=("URL of the Canopy server to use. "
+                    f"Defaults to {DEFAULT_SERVER_URL}"))
 def stop(url):
     if os.name != "nt":
         # Check if the server was started using Gunicorn
@@ -643,9 +647,9 @@ def api_docs(url):
     if generated_docs:
         import json
         from canopy_server._redocs_template import HTML_TEMPLATE
-        from canopy_server.app import app
+        from canopy_server.app import app, _init_routes
         # generate docs
-
+        _init_routes(app)
         filename = "canopy-api-docs.html"
         msg = f"Generating docs to {filename}"
         click.echo(click.style(msg, fg="green"))
@@ -653,7 +657,7 @@ def api_docs(url):
             print(HTML_TEMPLATE % json.dumps(app.openapi()), file=fd)
         webbrowser.open('file://' + os.path.realpath(filename))
     else:
-        webbrowser.open('http://localhost:8000/redoc')
+        webbrowser.open(urljoin(url, "redoc"))
 
 
 if __name__ == "__main__":
diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py
index 490dca55..e5d92b9f 100644
--- a/src/canopy_server/app.py
+++ b/src/canopy_server/app.py
@@ -18,7 +18,12 @@
 from starlette.concurrency import run_in_threadpool
 from sse_starlette.sse import EventSourceResponse
 
-from fastapi import FastAPI, HTTPException, Body
+from fastapi import (
+    FastAPI,
+    HTTPException,
+    Body,
+    APIRouter
+)
 import uvicorn
 from typing import cast, Union
 
@@ -27,7 +32,7 @@
     ChatResponse,
 )
 from canopy.models.data_models import Context, UserMessage
-from .api_models import (
+from .models.v1.api_models import (
     ChatRequest,
     ContextQueryRequest,
     ContextUpsertRequest,
@@ -64,8 +69,10 @@
 You can find your free trial OpenAI API key https://platform.openai.com/account/api-keys. You might need to log in or register for OpenAI services.
 """  # noqa: E501
 
+API_VERSION = "v1"
 
-app = FastAPI(
+# Global variables - Application
+app: FastAPI = FastAPI(
     title="Canopy API",
     description=APP_DESCRIPTION,
     version=__version__,
@@ -74,16 +81,22 @@
         "url": "https://www.apache.org/licenses/LICENSE-2.0.html",
     },
 )
+openai_api_router = APIRouter()
+context_api_router = APIRouter(prefix="/context")
+application_router = APIRouter(tags=["Application"])
 
+# Global variables - Engines
 context_engine: ContextEngine
 chat_engine: ChatEngine
 kb: KnowledgeBase
 llm: BaseLLM
+
+# Global variables - Logging
 logger: logging.Logger
 
 
-@app.post(
-    "/context/chat/completions",
+@openai_api_router.post(
+    "/chat/completions",
     response_model=None,
     responses={500: {"description": "Failed to chat with Canopy"}},  # noqa: E501
 )
@@ -126,8 +139,8 @@ def stringify_content(response: StreamingChatResponse):
         raise HTTPException(status_code=500, detail=f"Internal Service Error: {str(e)}")
 
 
-@app.post(
-    "/context/query",
+@context_api_router.post(
+    "/query",
     response_model=ContextResponse,
     responses={
         500: {"description": "Failed to query the knowledge base or build the context"}
@@ -156,8 +169,8 @@ async def query(
         raise HTTPException(status_code=500, detail=f"Internal Service Error: {str(e)}")
 
 
-@app.post(
-    "/context/upsert",
+@context_api_router.post(
+    "/upsert",
     response_model=SuccessUpsertResponse,
     responses={500: {"description": "Failed to upsert documents"}},
 )
@@ -183,8 +196,8 @@ async def upsert(
         raise HTTPException(status_code=500, detail=f"Internal Service Error: {str(e)}")
 
 
-@app.post(
-    "/context/delete",
+@context_api_router.post(
+    "/delete",
     response_model=SuccessDeleteResponse,
     responses={500: {"description": "Failed to delete documents"}},
 )
@@ -204,7 +217,7 @@ async def delete(
         raise HTTPException(status_code=500, detail=f"Internal Service Error: {str(e)}")
 
 
-@app.get(
+@application_router.get(
     "/health",
     response_model=HealthStatus,
     responses={500: {"description": "Failed to connect to Pinecone or LLM"}},
@@ -236,7 +249,7 @@ async def health_check() -> HealthStatus:
     return HealthStatus(pinecone_status="OK", llm_status="OK")
 
 
-@app.get("/shutdown")
+@application_router.get("/shutdown")
 async def shutdown() -> ShutdownResponse:
     """
     __WARNING__: Experimental method.
@@ -267,6 +280,19 @@ async def shutdown() -> ShutdownResponse:
 async def startup():
     _init_logging()
     _init_engines()
+    _init_routes(app)
+
+
+def _init_routes(app):
+    # Include the application level router (health, shutdown, ...)
+    app.include_router(application_router, include_in_schema=False)
+    app.include_router(application_router, prefix=f"/{API_VERSION}")
+    # Include the API without version == latest
+    app.include_router(context_api_router, include_in_schema=False)
+    app.include_router(openai_api_router, include_in_schema=False)
+    # Include the API version in the path, API_VERSION should be the latest version.
+    app.include_router(context_api_router, prefix=f"/{API_VERSION}", tags=["Context"])
+    app.include_router(openai_api_router, prefix=f"/{API_VERSION}", tags=["LLM"])
 
 
 def _init_logging():
diff --git a/src/canopy_server/api_models.py b/src/canopy_server/models/v1/api_models.py
similarity index 96%
rename from src/canopy_server/api_models.py
rename to src/canopy_server/models/v1/api_models.py
index 49a7872a..1ceb34d2 100644
--- a/src/canopy_server/api_models.py
+++ b/src/canopy_server/models/v1/api_models.py
@@ -4,6 +4,8 @@
 
 from canopy.models.data_models import Messages, Query, Document
 
+# TODO: consider separating these into modules: Chat, Context, Application, etc.
+
 
 class ChatRequest(BaseModel):
     model: str = Field(
diff --git a/tests/e2e/test_app.py b/tests/e2e/test_app.py
index 70e5adca..9f38b845 100644
--- a/tests/e2e/test_app.py
+++ b/tests/e2e/test_app.py
@@ -11,9 +11,11 @@
 
 from canopy.knowledge_base import KnowledgeBase
 
-from canopy_server.app import app
-from canopy_server.api_models import (HealthStatus, ContextUpsertRequest,
-                                      ContextQueryRequest, )
+from canopy_server.app import app, API_VERSION
+from canopy_server.models.v1.api_models import (
+    HealthStatus,
+    ContextUpsertRequest,
+    ContextQueryRequest)
 from .. import Tokenizer
 
 upsert_payload = ContextUpsertRequest(
@@ -63,6 +65,7 @@ def client(knowledge_base, index_name):
     os.environ["INDEX_NAME"] = index_name
     Tokenizer.clear()
     with TestClient(app) as client:
+        client.base_url = f"{client.base_url}/{API_VERSION}"
         yield client
     if index_name_before:
         os.environ["INDEX_NAME"] = index_name_before
@@ -95,7 +98,9 @@ def test_health(client):
 
 def test_upsert(client):
     # Upsert a document to the index
-    upsert_response = client.post("/context/upsert", json=upsert_payload.dict())
+    upsert_response = client.post(
+        "/context/upsert",
+        json=upsert_payload.dict())
     assert upsert_response.is_success
 
 
@@ -114,7 +119,9 @@ def test_query(client):
         max_tokens=100,
     )
 
-    query_response = client.post("/context/query", json=query_payload.dict())
+    query_response = client.post(
+        "/context/query",
+        json=query_payload.dict())
     assert query_response.is_success
 
     query_response = query_response.json()
@@ -142,7 +149,9 @@ def test_chat_required_params(client):
             }
         ]
     }
-    chat_response = client.post("/context/chat/completions", json=chat_payload)
+    chat_response = client.post(
+        "/chat/completions",
+        json=chat_payload)
     assert chat_response.is_success
     chat_response_as_json = chat_response.json()
     assert chat_response_as_json["choices"][0]["message"]["role"] == "assistant"
@@ -170,7 +179,9 @@ def test_chat_openai_additional_params(client):
         "stop": "stop string",
         "top_p": 0.5,
     }
-    chat_response = client.post("/context/chat/completions", json=chat_payload)
+    chat_response = client.post(
+        "/chat/completions",
+        json=chat_payload)
     assert chat_response.is_success
     chat_response_as_json = chat_response.json()
     assert chat_response_as_json["choices"][0]["message"]["role"] == "assistant"
@@ -189,7 +200,9 @@ def test_delete(client, knowledge_base):
     delete_payload = {
         "document_ids": doc_ids
     }
-    delete_response = client.post("/context/delete", json=delete_payload)
+    delete_response = client.post(
+        "/context/delete",
+        json=delete_payload)
     assert delete_response.is_success
 
     assert_vector_ids_not_exist(vector_ids, knowledge_base)