Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for raw text file as input #49

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 26 additions & 7 deletions podcastfy/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,16 @@
app = typer.Typer()


def process_content(urls=None, transcript_file=None, tts_model="openai", generate_audio=True, config=None,
def process_content(urls=None, transcript_file=None, raw_text=None,
tts_model="openai", generate_audio=True, config=None,
conversation_config: Optional[Dict[str, Any]] = None, image_paths: Optional[List[str]] = None):
"""
Process URLs, a transcript file, or image paths to generate a podcast or transcript.

Args:
urls (Optional[List[str]]): A list of URLs to process.
transcript_file (Optional[str]): Path to a transcript file.
raw_text (Optional[str]): Text to process.
tts_model (str): The TTS model to use ('openai' or 'elevenlabs'). Defaults to 'openai'.
generate_audio (bool): Whether to generate audio or just a transcript. Defaults to True.
config (Config): Configuration object to use. If None, default config will be loaded.
Expand Down Expand Up @@ -59,14 +61,18 @@
contents = [content_extractor.extract_content(link) for link in urls]
# Combine all extracted content
combined_content = "\n\n".join(contents)
elif raw_text:
combined_content = raw_text
else:
combined_content = "" # Empty string if no URLs provided

# Generate Q&A content
random_filename = f"transcript_{uuid.uuid4().hex}.txt"
transcript_filepath = os.path.join(config.get('output_directories')['transcripts'], random_filename)
qa_content = content_generator.generate_qa_content(
combined_content, image_file_paths=image_paths or [], output_filepath=transcript_filepath
combined_content,
#image_file_paths=image_paths or [], # FIXME
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't commenting this prevent podcastfy from generating audio from images?

output_filepath=transcript_filepath
)

if generate_audio:
Expand Down Expand Up @@ -97,6 +103,9 @@
transcript: typer.FileText = typer.Option(
None, "--transcript", "-t", help="Path to a transcript file"
),
raw_file: typer.FileText = typer.Option(
None, "--raw-file", "-r", help="File containing raw text"
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a bit confusing: the user is passing raw text but the flag is raw file. It should be consistent. Consider renaming the flag to simply --text

),
tts_model: str = typer.Option(
None, "--tts-model", "-tts", help="TTS model to use (openai or elevenlabs)"
),
Expand Down Expand Up @@ -133,16 +142,22 @@
)
else:
urls_list = urls or []
raw_text = None
if file:
urls_list.extend([line.strip() for line in file if line.strip()])
elif raw_file:
raw_text = raw_file.read()

if not urls_list and not image_paths:
if not urls_list and not image_paths and not raw_file:
raise typer.BadParameter(
"No input provided. Use --url to specify URLs, --file to specify a file containing URLs, --transcript for a transcript file, or --image for image files."
"No input provided. Use --url to specify URLs, --file to specify a file containing URLs, "
"--transcript for a transcript file, --raw-file for a file containing raw text, "
"or --image for image files."
)

final_output = process_content(
urls=urls_list,
urls=urls_list,
raw_text=raw_text,
tts_model=tts_model,
generate_audio=not transcript_only,
config=config,
Expand All @@ -166,106 +181,110 @@
app()


def generate_podcast(
urls: Optional[List[str]] = None,
url_file: Optional[str] = None,
raw_text: Optional[str] = None,
transcript_file: Optional[str] = None,
tts_model: Optional[str] = None,
transcript_only: bool = False,
config: Optional[Dict[str, Any]] = None,
conversation_config: Optional[Dict[str, Any]] = None,
image_paths: Optional[List[str]] = None
) -> Optional[str]:
"""
Generate a podcast or transcript from a list of URLs, a file containing URLs, a transcript file, or image files.

Args:
urls (Optional[List[str]]): List of URLs to process.
url_file (Optional[str]): Path to a file containing URLs, one per line.
raw_text (Optional[str]): Text to process.
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how about calling it simply text, instead of raw_text

transcript_file (Optional[str]): Path to a transcript file.
tts_model (Optional[str]): TTS model to use ('openai' or 'elevenlabs').
transcript_only (bool): Generate only a transcript without audio. Defaults to False.
config (Optional[Dict[str, Any]]): User-provided configuration dictionary.
conversation_config (Optional[Dict[str, Any]]): User-provided conversation configuration dictionary.
image_paths (Optional[List[str]]): List of image file paths to process.

Returns:
Optional[str]: Path to the final podcast audio file, or None if only generating a transcript.

Example:
>>> from podcastfy.client import generate_podcast
>>> result = generate_podcast(
... image_paths=['/path/to/image1.jpg', '/path/to/image2.png'],
... tts_model='elevenlabs',
... config={
... 'main': {
... 'default_tts_model': 'elevenlabs'
... },
... 'output_directories': {
... 'audio': '/custom/path/to/audio',
... 'transcripts': '/custom/path/to/transcripts'
... }
... },
... conversation_config={
... 'word_count': 150,
... 'conversation_style': ['informal', 'friendly'],
... 'podcast_name': 'My Custom Podcast'
... }
... )
"""
try:
# Load default config
default_config = load_config()

# Update config if provided
if config:
if isinstance(config, dict):
# Create a deep copy of the default config
updated_config = copy.deepcopy(default_config)
# Update the copy with user-provided values
updated_config.configure(**config)
default_config = updated_config
elif isinstance(config, Config):
# If it's already a Config object, use it directly
default_config = config
else:
raise ValueError("Config must be either a dictionary or a Config object")

main_config = default_config.config.get('main', {})

# Use provided tts_model if specified, otherwise use the one from config
if tts_model is None:
tts_model = main_config.get('default_tts_model', 'openai')

if transcript_file:
if image_paths:
logger.warning("Image paths are ignored when using a transcript file.")
return process_content(
transcript_file=transcript_file,
tts_model=tts_model,
generate_audio=not transcript_only,
config=default_config,
conversation_config=conversation_config
)
else:
urls_list = urls or []
if url_file:
with open(url_file, 'r') as file:
urls_list.extend([line.strip() for line in file if line.strip()])

if not urls_list and not image_paths:
if not urls_list and not image_paths and not raw_text:
raise ValueError(
"No input provided. Please provide either 'urls', 'url_file', 'transcript_file', or 'image_paths'."
"No input provided. Please provide either 'urls', 'url_file', 'transcript_file', "
"'raw_text' or 'image_paths'."
)

return process_content(
urls=urls_list,
raw_text=raw_text,
tts_model=tts_model,
generate_audio=not transcript_only,
config=default_config,
conversation_config=conversation_config,
image_paths=image_paths
)

except Exception as e:
logger.error(f"An error occurred: {str(e)}")
Expand Down