Skip to content

Commit

Permalink
feat(rnd,blocks): Add D-ID Block (Significant-Gravitas#7798)
Browse files Browse the repository at this point in the history
* talking head

* linting

* remove clip id, not needed

* add more descriptive name

* add min requirement to polling attempts and intervals

* add docs and link to docs

* remove extra space

* force new tab

* fix linting

* add did key to .env.template
  • Loading branch information
aarushik93 authored Aug 23, 2024
1 parent efcd0f9 commit 012bad7
Show file tree
Hide file tree
Showing 7 changed files with 299 additions and 157 deletions.
17 changes: 17 additions & 0 deletions docs/content/server/d_id.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Find available voices for D-ID

1. **ElevenLabs**
- Select any voice from the voice list: https://api.elevenlabs.io/v1/voices
- Copy the voice_id
- Use it as a string in the voice_id field in the CreateTalkingAvatarClip Block

2. **Microsoft Azure Voices**
- Select any voice from the voice gallery: https://speech.microsoft.com/portal/voicegallery
- Click on the "Sample code" tab on the right
- Copy the voice name, for example: config.SpeechSynthesisVoiceName ="en-GB-AbbiNeural"
- Use this string en-GB-AbbiNeural in the voice_id field in the CreateTalkingAvatarClip Block

3. **Amazon Polly Voices**
- Select any voice from the voice list: https://docs.aws.amazon.com/polly/latest/dg/available-voices.html
- Copy the voice name / ID
- Use it as string in the voice_id field in the CreateTalkingAvatarClip Block
1 change: 1 addition & 0 deletions docs/mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ nav:
- Setup: server/setup.md
- Advanced Setup: server/advanced_setup.md
- Using Ollama: server/ollama.md
- Using D-ID: serveer/d_id.md

- AutoGPT Agent:
- Introduction: AutoGPT/index.md
Expand Down
6 changes: 5 additions & 1 deletion rnd/autogpt_builder/src/components/SchemaTooltip.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@ const SchemaTooltip: React.FC<{ description?: string }> = ({ description }) => {
<ReactMarkdown
components={{
a: ({ node, ...props }) => (
<a className="text-blue-400 underline" {...props} />
<a
target="_blank"
className="text-blue-400 underline"
{...props}
/>
),
}}
>
Expand Down
280 changes: 125 additions & 155 deletions rnd/autogpt_builder/yarn.lock

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion rnd/autogpt_server/.env.template
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,7 @@ DISCORD_BOT_TOKEN=
SMTP_SERVER=
SMTP_PORT=
SMTP_USERNAME=
SMTP_PASSWORD=
SMTP_PASSWORD=

# D-ID
DID_API_KEY=
146 changes: 146 additions & 0 deletions rnd/autogpt_server/autogpt_server/blocks/talking_head.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import time
from typing import Literal

import requests

from autogpt_server.data.block import Block, BlockCategory, BlockOutput, BlockSchema
from autogpt_server.data.model import BlockSecret, SchemaField, SecretField


class CreateTalkingAvatarClipBlock(Block):
class Input(BlockSchema):
api_key: BlockSecret = SecretField(
key="did_api_key", description="D-ID API Key"
)
script_input: str = SchemaField(
description="The text input for the script", default="Welcome to AutoGPT"
)
provider: Literal["microsoft", "elevenlabs", "amazon"] = SchemaField(
description="The voice provider to use", default="microsoft"
)
voice_id: str = SchemaField(
description="The voice ID to use, get list of voices [here](https://docs.agpt.co/server/d_id)",
default="en-US-JennyNeural",
)
presenter_id: str = SchemaField(
description="The presenter ID to use", default="amy-Aq6OmGZnMt"
)
driver_id: str = SchemaField(
description="The driver ID to use", default="Vcq0R4a8F0"
)
result_format: Literal["mp4", "gif", "wav"] = SchemaField(
description="The desired result format", default="mp4"
)
crop_type: Literal["wide", "square", "vertical"] = SchemaField(
description="The crop type for the presenter", default="wide"
)
subtitles: bool = SchemaField(
description="Whether to include subtitles", default=False
)
ssml: bool = SchemaField(description="Whether the input is SSML", default=False)
max_polling_attempts: int = SchemaField(
description="Maximum number of polling attempts", default=30, ge=5
)
polling_interval: int = SchemaField(
description="Interval between polling attempts in seconds", default=10, ge=5
)

class Output(BlockSchema):
video_url: str = SchemaField(description="The URL of the created video")
error: str = SchemaField(description="Error message if the request failed")

def __init__(self):
super().__init__(
id="98c6f503-8c47-4b1c-a96d-351fc7c87dab",
description="This block integrates with D-ID to create video clips and retrieve their URLs.",
categories={BlockCategory.AI},
input_schema=CreateTalkingAvatarClipBlock.Input,
output_schema=CreateTalkingAvatarClipBlock.Output,
test_input={
"api_key": "your_test_api_key",
"script_input": "Welcome to AutoGPT",
"voice_id": "en-US-JennyNeural",
"presenter_id": "amy-Aq6OmGZnMt",
"driver_id": "Vcq0R4a8F0",
"result_format": "mp4",
"crop_type": "wide",
"subtitles": False,
"ssml": False,
"max_polling_attempts": 5,
"polling_interval": 5,
},
test_output=[
(
"video_url",
"https://d-id.com/api/clips/abcd1234-5678-efgh-ijkl-mnopqrstuvwx/video",
),
],
test_mock={
"create_clip": lambda *args, **kwargs: {
"id": "abcd1234-5678-efgh-ijkl-mnopqrstuvwx",
"status": "created",
},
"get_clip_status": lambda *args, **kwargs: {
"status": "done",
"result_url": "https://d-id.com/api/clips/abcd1234-5678-efgh-ijkl-mnopqrstuvwx/video",
},
},
)

def create_clip(self, api_key: str, payload: dict) -> dict:
url = "https://api.d-id.com/clips"
headers = {
"accept": "application/json",
"content-type": "application/json",
"authorization": f"Basic {api_key}",
}
response = requests.post(url, json=payload, headers=headers)
response.raise_for_status()
return response.json()

def get_clip_status(self, api_key: str, clip_id: str) -> dict:
url = f"https://api.d-id.com/clips/{clip_id}"
headers = {"accept": "application/json", "authorization": f"Basic {api_key}"}
response = requests.get(url, headers=headers)
response.raise_for_status()
return response.json()

def run(self, input_data: Input) -> BlockOutput:
try:
# Create the clip
payload = {
"script": {
"type": "text",
"subtitles": str(input_data.subtitles).lower(),
"provider": {
"type": input_data.provider,
"voice_id": input_data.voice_id,
},
"ssml": str(input_data.ssml).lower(),
"input": input_data.script_input,
},
"config": {"result_format": input_data.result_format},
"presenter_config": {"crop": {"type": input_data.crop_type}},
"presenter_id": input_data.presenter_id,
"driver_id": input_data.driver_id,
}

response = self.create_clip(input_data.api_key.get_secret_value(), payload)
clip_id = response["id"]

# Poll for clip status
for _ in range(input_data.max_polling_attempts):
status_response = self.get_clip_status(
input_data.api_key.get_secret_value(), clip_id
)
if status_response["status"] == "done":
yield "video_url", status_response["result_url"]
return
elif status_response["status"] == "error":
yield "error", f"Clip creation failed: {status_response.get('error', 'Unknown error')}"
return
time.sleep(input_data.polling_interval)

yield "error", "Clip creation timed out"
except Exception as e:
yield "error", str(e)
1 change: 1 addition & 0 deletions rnd/autogpt_server/autogpt_server/util/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ class Secrets(UpdateTrackingModel["Secrets"], BaseSettings):

medium_api_key: str = Field(default="", description="Medium API key")
medium_author_id: str = Field(default="", description="Medium author ID")
did_api_key: str = Field(default="", description="D-ID API Key")

discord_bot_token: str = Field(default="", description="Discord bot token")

Expand Down

0 comments on commit 012bad7

Please sign in to comment.