working streaming implementation for loading

ahmedkhaleel2004 · ahmedkhaleel2004 · commit 879b2620f8a7 · 2025-02-14T08:05:27.000-05:00
diff --git a/backend/app/prompts.py b/backend/app/prompts.py
@@ -154,7 +154,7 @@
 
 Ensure that your diagram adheres strictly to the given explanation, without adding or omitting any significant components or relationships. 
 
-As a very general direction, the provided example below is a good flow for your code:
+For general direction, the provided example below is how you should structure your code:
 
 ```mermaid
 flowchart TD 
@@ -191,6 +191,7 @@
 - In Mermaid.js syntax, we cannot include special characters for nodes without being inside quotes! For example: `EX[/api/process (Backend)]:::api` and `API -->|calls Process()| Backend` are two examples of syntax errors. They should be `EX["/api/process (Backend)"]:::api` and `API -->|"calls Process()"| Backend` respectively. Notice the quotes. This is extremely important. Make sure to include quotes for any string that contains special characters.
 - In Mermaid.js syntax, you cannot apply a class style directly within a subgraph declaration. For example: `subgraph "Frontend Layer":::frontend` is a syntax error. However, you can apply them to nodes within the subgraph. For example: `Example["Example Node"]:::frontend` is valid, and `class Example1,Example2 frontend` is valid.
 - In Mermaid.js syntax, there cannot be spaces in the relationship label names. For example: `A -->| "example relationship" | B` is a syntax error. It should be `A -->|"example relationship"| B` 
+- In Mermaid.js syntax, you cannot give subgraphs an alias like nodes. For example: `subgraph A "Layer A"` is a syntax error. It should be `subgraph "Layer A"` 
 """
 # ^^^ note: ive generated a few diagrams now and claude still writes incorrect mermaid code sometimes. in the future, refer to those generated diagrams and add important instructions to the prompt above to avoid those mistakes. examples are best.
 
diff --git a/backend/app/routers/generate.py b/backend/app/routers/generate.py
@@ -1,4 +1,5 @@
 from fastapi import APIRouter, Request, HTTPException
+from fastapi.responses import StreamingResponse
 from dotenv import load_dotenv
 from app.services.github_service import GitHubService
 from app.services.o3_mini_openrouter_service import OpenRouterO3Service
@@ -12,6 +13,8 @@
 from pydantic import BaseModel
 from functools import lru_cache
 import re
+import json
+import asyncio
 
 # from app.services.claude_service import ClaudeService
 # from app.core.limiter import limiter
@@ -49,6 +52,7 @@ class ApiRequest(BaseModel):
     github_pat: str | None = None
 
 
+# OLD NON STREAMING VERSION
 @router.post("")
 # @limiter.limit("1/minute;5/day") # TEMP: disable rate limit for growth??
 async def generate(request: Request, body: ApiRequest):
@@ -268,3 +272,149 @@ def replace_path(match):
     # Match click events: click ComponentName "path/to/something"
     click_pattern = r'click ([^\s"]+)\s+"([^"]+)"'
     return re.sub(click_pattern, replace_path, diagram)
+
+
+@router.post("/stream")
+async def generate_stream(request: Request, body: ApiRequest):
+    try:
+        # Initial validation checks
+        if len(body.instructions) > 1000:
+            return {"error": "Instructions exceed maximum length of 1000 characters"}
+
+        if body.repo in [
+            "fastapi",
+            "streamlit",
+            "flask",
+            "api-analytics",
+            "monkeytype",
+        ]:
+            return {"error": "Example repos cannot be regenerated"}
+
+        async def event_generator():
+            try:
+                # Get cached github data
+                github_data = get_cached_github_data(
+                    body.username, body.repo, body.github_pat
+                )
+                default_branch = github_data["default_branch"]
+                file_tree = github_data["file_tree"]
+                readme = github_data["readme"]
+
+                # Send initial status
+                yield f"data: {json.dumps({'status': 'started', 'message': 'Starting generation process...'})}\n\n"
+                await asyncio.sleep(0.1)
+
+                # Token count check
+                combined_content = f"{file_tree}\n{readme}"
+                token_count = o3_service.count_tokens(combined_content)
+
+                if 50000 < token_count < 195000 and not body.api_key:
+                    yield f"data: {json.dumps({'error': f'File tree and README combined exceeds token limit (50,000). Current size: {token_count} tokens. This GitHub repository is too large for my wallet, but you can continue by providing your own OpenRouter API key.'})}\n\n"
+                    return
+                elif token_count > 195000:
+                    yield f"data: {json.dumps({'error': f'Repository is too large (>195k tokens) for analysis. OpenAI o3-mini\'s max context length is 200k tokens. Current size: {token_count} tokens.'})}\n\n"
+                    return
+
+                # Prepare prompts
+                first_system_prompt = SYSTEM_FIRST_PROMPT
+                third_system_prompt = SYSTEM_THIRD_PROMPT
+                if body.instructions:
+                    first_system_prompt = (
+                        first_system_prompt
+                        + "\n"
+                        + ADDITIONAL_SYSTEM_INSTRUCTIONS_PROMPT
+                    )
+                    third_system_prompt = (
+                        third_system_prompt
+                        + "\n"
+                        + ADDITIONAL_SYSTEM_INSTRUCTIONS_PROMPT
+                    )
+
+                # Phase 1: Get explanation
+                yield f"data: {json.dumps({'status': 'explanation_sent', 'message': 'Sending explanation request to o3-mini...'})}\n\n"
+                await asyncio.sleep(0.1)
+                yield f"data: {json.dumps({'status': 'explanation', 'message': 'Analyzing repository structure...'})}\n\n"
+                explanation = ""
+                async for chunk in o3_service.call_o3_api_stream(
+                    system_prompt=first_system_prompt,
+                    data={
+                        "file_tree": file_tree,
+                        "readme": readme,
+                        "instructions": body.instructions,
+                    },
+                    api_key=body.api_key,
+                    reasoning_effort="medium",
+                ):
+                    explanation += chunk
+                    yield f"data: {json.dumps({'status': 'explanation_chunk', 'chunk': chunk})}\n\n"
+
+                if "BAD_INSTRUCTIONS" in explanation:
+                    yield f"data: {json.dumps({'error': 'Invalid or unclear instructions provided'})}\n\n"
+                    return
+
+                # Phase 2: Get component mapping
+                yield f"data: {json.dumps({'status': 'mapping_sent', 'message': 'Sending component mapping request to o3-mini...'})}\n\n"
+                await asyncio.sleep(0.1)
+                yield f"data: {json.dumps({'status': 'mapping', 'message': 'Creating component mapping...'})}\n\n"
+                full_second_response = ""
+                async for chunk in o3_service.call_o3_api_stream(
+                    system_prompt=SYSTEM_SECOND_PROMPT,
+                    data={"explanation": explanation, "file_tree": file_tree},
+                    api_key=body.api_key,
+                    reasoning_effort="medium",
+                ):
+                    full_second_response += chunk
+                    yield f"data: {json.dumps({'status': 'mapping_chunk', 'chunk': chunk})}\n\n"
+
+                # i dont think i need this anymore? but keep it here for now
+                # Extract component mapping
+                start_tag = "<component_mapping>"
+                end_tag = "</component_mapping>"
+                component_mapping_text = full_second_response[
+                    full_second_response.find(start_tag) : full_second_response.find(
+                        end_tag
+                    )
+                ]
+
+                # Phase 3: Generate Mermaid diagram
+                yield f"data: {json.dumps({'status': 'diagram_sent', 'message': 'Sending diagram generation request to o3-mini...'})}\n\n"
+                await asyncio.sleep(0.1)
+                yield f"data: {json.dumps({'status': 'diagram', 'message': 'Generating diagram...'})}\n\n"
+                mermaid_code = ""
+                async for chunk in o3_service.call_o3_api_stream(
+                    system_prompt=third_system_prompt,
+                    data={
+                        "explanation": explanation,
+                        "component_mapping": component_mapping_text,
+                        "instructions": body.instructions,
+                    },
+                    api_key=body.api_key,
+                    reasoning_effort="medium",
+                ):
+                    mermaid_code += chunk
+                    yield f"data: {json.dumps({'status': 'diagram_chunk', 'chunk': chunk})}\n\n"
+
+                # Process final diagram
+                mermaid_code = mermaid_code.replace("```mermaid", "").replace("```", "")
+                if "BAD_INSTRUCTIONS" in mermaid_code:
+                    yield f"data: {json.dumps({'error': 'Invalid or unclear instructions provided'})}\n\n"
+                    return
+
+                processed_diagram = process_click_events(
+                    mermaid_code, body.username, body.repo, default_branch
+                )
+
+                # Send final result
+                yield f"data: {json.dumps({
+                    'status': 'complete',
+                    'diagram': processed_diagram,
+                    'explanation': explanation,
+                    'mapping': component_mapping_text
+                })}\n\n"
+
+            except Exception as e:
+                yield f"data: {json.dumps({'error': str(e)})}\n\n"
+
+        return StreamingResponse(event_generator(), media_type="text/event-stream")
+    except Exception as e:
+        return {"error": str(e)}
diff --git a/backend/app/services/o3_mini_openrouter_service.py b/backend/app/services/o3_mini_openrouter_service.py
@@ -3,7 +3,9 @@
 from app.utils.format_message import format_user_message
 import tiktoken
 import os
-from typing import Literal
+import aiohttp
+import json
+from typing import Literal, AsyncGenerator
 
 load_dotenv()
 
@@ -15,6 +17,7 @@ def __init__(self):
             api_key=os.getenv("OPENROUTER_API_KEY"),
         )
         self.encoding = tiktoken.get_encoding("o200k_base")
+        self.base_url = "https://openrouter.ai/api/v1/chat/completions"
 
     def call_o3_api(
         self,
@@ -64,6 +67,68 @@ def call_o3_api(
 
         return completion.choices[0].message.content
 
+    async def call_o3_api_stream(
+        self,
+        system_prompt: str,
+        data: dict,
+        api_key: str | None = None,
+        reasoning_effort: Literal["low", "medium", "high"] = "low",
+    ) -> AsyncGenerator[str, None]:
+        """
+        Makes a streaming API call to OpenRouter O3 and yields the responses.
+
+        Args:
+            system_prompt (str): The instruction/system prompt
+            data (dict): Dictionary of variables to format into the user message
+            api_key (str | None): Optional custom API key
+
+        Yields:
+            str: Chunks of O3's response text
+        """
+        # Create the user message with the data
+        user_message = format_user_message(data)
+
+        headers = {
+            "HTTP-Referer": "https://gitdiagram.com",
+            "X-Title": "gitdiagram",
+            "Authorization": f"Bearer {api_key or self.default_client.api_key}",
+            "Content-Type": "application/json",
+        }
+
+        payload = {
+            "model": "openai/o3-mini",
+            "messages": [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_message},
+            ],
+            "max_tokens": 12000,
+            "temperature": 0.2,
+            "stream": True,
+            "reasoning_effort": reasoning_effort,
+        }
+
+        buffer = ""
+        async with aiohttp.ClientSession() as session:
+            async with session.post(
+                self.base_url, headers=headers, json=payload
+            ) as response:
+                async for line in response.content:
+                    line = line.decode("utf-8").strip()
+                    if line.startswith("data: "):
+                        if line == "data: [DONE]":
+                            break
+                        try:
+                            data = json.loads(line[6:])
+                            if (
+                                content := data.get("choices", [{}])[0]
+                                .get("delta", {})
+                                .get("content")
+                            ):
+                                yield content
+                        except json.JSONDecodeError:
+                            # Skip any non-JSON lines (like the OPENROUTER PROCESSING comments)
+                            continue
+
     def count_tokens(self, prompt: str) -> int:
         """
         Counts the number of tokens in a prompt.
diff --git a/backend/requirements.txt b/backend/requirements.txt
@@ -1,7 +1,11 @@
+aiohappyeyeballs==2.4.6
+aiohttp==3.11.12
+aiosignal==1.3.2
 annotated-types==0.7.0
 anthropic==0.42.0
 anyio==4.7.0
 api-analytics==1.2.5
+attrs==25.1.0
 certifi==2024.12.14
 cffi==1.17.1
 charset-normalizer==3.4.0
@@ -13,6 +17,7 @@ dnspython==2.7.0
 email_validator==2.2.0
 fastapi==0.115.6
 fastapi-cli==0.0.6
+frozenlist==1.5.0
 h11==0.14.0
 httpcore==1.0.7
 httptools==0.6.4
@@ -24,8 +29,10 @@ limits==3.14.1
 markdown-it-py==3.0.0
 MarkupSafe==3.0.2
 mdurl==0.1.2
+multidict==6.1.0
 openai==1.61.1
 packaging==24.2
+propcache==0.2.1
 pycparser==2.22
 pydantic==2.10.3
 pydantic_core==2.27.1
@@ -52,3 +59,4 @@ uvloop==0.21.0
 watchfiles==1.0.3
 websockets==14.1
 wrapt==1.17.0
+yarl==1.18.3
diff --git a/src/app/[username]/[repo]/page.tsx b/src/app/[username]/[repo]/page.tsx
@@ -5,7 +5,7 @@ import MainCard from "~/components/main-card";
 import Loading from "~/components/loading";
 import MermaidChart from "~/components/mermaid-diagram";
 import { useDiagram } from "~/hooks/useDiagram";
-import { ApiKeyDialog } from "~/components/api-key-dialog";
+// import { ApiKeyDialog } from "~/components/api-key-dialog";
 import { ApiKeyButton } from "~/components/api-key-button";
 import { useState } from "react";
 
@@ -18,16 +18,16 @@ export default function Repo() {
     loading,
     lastGenerated,
     cost,
-    isRegenerating,
-    showApiKeyDialog,
-    tokenCount,
+    // showApiKeyDialog,
+    // tokenCount,
     handleModify,
     handleRegenerate,
     handleCopy,
-    handleApiKeySubmit,
-    handleCloseApiKeyDialog,
+    // handleApiKeySubmit,
+    // handleCloseApiKeyDialog,
     handleOpenApiKeyDialog,
     handleExportImage,
+    state,
   } = useDiagram(params.username.toLowerCase(), params.repo.toLowerCase());
 
   return (
@@ -51,7 +51,14 @@ export default function Repo() {
       <div className="mt-8 flex w-full flex-col items-center gap-8">
         {loading ? (
           <div className="mt-12">
-            <Loading cost={cost} isModifying={!isRegenerating} />
+            <Loading
+              cost={cost}
+              status={state.status}
+              message={state.message}
+              explanation={state.explanation}
+              mapping={state.mapping}
+              diagram={state.diagram}
+            />
           </div>
         ) : error ? (
           <div className="mt-12 text-center">
@@ -77,12 +84,12 @@ export default function Repo() {
         )}
       </div>
 
-      <ApiKeyDialog
+      {/* <ApiKeyDialog
         isOpen={showApiKeyDialog}
         onClose={handleCloseApiKeyDialog}
         onSubmit={handleApiKeySubmit}
         tokenCount={tokenCount}
-      />
+      /> */}
     </div>
   );
 }
diff --git a/src/components/loading.tsx b/src/components/loading.tsx
diff --git a/src/hooks/useDiagram.ts b/src/hooks/useDiagram.ts