PodcastVox / src /podcast.py
Plat
init
3a09141
from tqdm import tqdm
import logging
from .agent import BloggerAgent, WriterAgent, StructureAgent, Conversation
from .fetcher import AutoFetcher
from .voicevox import VoiceVoxClient, SpeakerId, Audio
class PodcastStudio:
def __init__(self, api_key: str, logging_level: int = logging.INFO):
self.blogger = BloggerAgent(api_key=api_key)
self.writer = WriterAgent(api_key=api_key)
self.structure_agent = StructureAgent(api_key=api_key)
self.logger = logging.getLogger(__name__)
self.logger.setLevel(logging_level)
self.fetcher = AutoFetcher()
async def create_conversation(self, url: str) -> tuple[str, str, Conversation]:
self.logger.info(f"Fetching paper from {url}...")
paper = await self.fetcher.fetch(url)
self.logger.info("Paper fetched successfully.")
self.logger.debug(
f"Paper content: {paper[:100]}..."
) # Log first 100 characters
self.logger.info("Creating blog from paper...")
blog = await self.blogger.task(paper)
self.logger.info("Blog created successfully.")
self.logger.debug(f"{blog[:100]}...") # Log first 100 characters
self.logger.info("Creating dialogue from blog...")
dialogue = await self.writer.task(paper, blog)
self.logger.info("Dialogue created successfully.")
self.logger.debug(f"{dialogue[:100]}...") # Log first 100 characters
self.logger.info("Structuring conversation from dialogue...")
conversation = await self.structure_agent.task(dialogue)
self.logger.info("Conversation structured successfully.")
for _d in conversation.conversation:
self.logger.debug(f"{_d.role}: {_d.content[:100]}...")
return blog, dialogue, conversation
async def record_podcast(
self,
conversation: Conversation,
voicevox_client: VoiceVoxClient,
speaker_id: SpeakerId,
supporter_id: SpeakerId,
) -> Audio:
progress_bar = tqdm(
total=len(conversation.conversation),
desc="Synthesizing audio",
ncols=100,
)
async def _synthesis(
speaker_id: SpeakerId,
text: str,
index: int,
progress: tqdm,
) -> tuple[int, Audio]:
audio_query = await voicevox_client.post_audio_query(
text=text,
speaker=speaker_id,
)
if audio_query.tempoDynamicsScale is not None:
audio_query.tempoDynamicsScale = 1.1
else:
audio_query.speedScale = 1.1
audio = await voicevox_client.post_synthesis(
speaker=speaker_id,
audio_query=audio_query,
)
progress.update(1)
progress.set_postfix({"text": text[:20] + "..."})
return index, audio
results = []
for i, dialogue in enumerate(conversation.conversation):
results.append(
await _synthesis(
speaker_id=(
speaker_id if dialogue.role == "speaker" else supporter_id
),
text=dialogue.content,
index=i,
progress=progress_bar,
)
)
progress_bar.close()
# sort results by index
results.sort(key=lambda x: x[0])
audios = [audio for _, audio in results]
# connect audio files
podcast = await voicevox_client.post_connect_waves(
audio_list=audios,
)
return podcast