PodcastVox / src /podcast.py
Plat
init
3a09141
raw
history blame
3.66 kB
from tqdm import tqdm
import logging
from .agent import BloggerAgent, WriterAgent, StructureAgent, Conversation
from .fetcher import AutoFetcher
from .voicevox import VoiceVoxClient, SpeakerId, Audio
class PodcastStudio:
def __init__(self, api_key: str, logging_level: int = logging.INFO):
self.blogger = BloggerAgent(api_key=api_key)
self.writer = WriterAgent(api_key=api_key)
self.structure_agent = StructureAgent(api_key=api_key)
self.logger = logging.getLogger(__name__)
self.logger.setLevel(logging_level)
self.fetcher = AutoFetcher()
async def create_conversation(self, url: str) -> tuple[str, str, Conversation]:
self.logger.info(f"Fetching paper from {url}...")
paper = await self.fetcher.fetch(url)
self.logger.info("Paper fetched successfully.")
self.logger.debug(
f"Paper content: {paper[:100]}..."
) # Log first 100 characters
self.logger.info("Creating blog from paper...")
blog = await self.blogger.task(paper)
self.logger.info("Blog created successfully.")
self.logger.debug(f"{blog[:100]}...") # Log first 100 characters
self.logger.info("Creating dialogue from blog...")
dialogue = await self.writer.task(paper, blog)
self.logger.info("Dialogue created successfully.")
self.logger.debug(f"{dialogue[:100]}...") # Log first 100 characters
self.logger.info("Structuring conversation from dialogue...")
conversation = await self.structure_agent.task(dialogue)
self.logger.info("Conversation structured successfully.")
for _d in conversation.conversation:
self.logger.debug(f"{_d.role}: {_d.content[:100]}...")
return blog, dialogue, conversation
async def record_podcast(
self,
conversation: Conversation,
voicevox_client: VoiceVoxClient,
speaker_id: SpeakerId,
supporter_id: SpeakerId,
) -> Audio:
progress_bar = tqdm(
total=len(conversation.conversation),
desc="Synthesizing audio",
ncols=100,
)
async def _synthesis(
speaker_id: SpeakerId,
text: str,
index: int,
progress: tqdm,
) -> tuple[int, Audio]:
audio_query = await voicevox_client.post_audio_query(
text=text,
speaker=speaker_id,
)
if audio_query.tempoDynamicsScale is not None:
audio_query.tempoDynamicsScale = 1.1
else:
audio_query.speedScale = 1.1
audio = await voicevox_client.post_synthesis(
speaker=speaker_id,
audio_query=audio_query,
)
progress.update(1)
progress.set_postfix({"text": text[:20] + "..."})
return index, audio
results = []
for i, dialogue in enumerate(conversation.conversation):
results.append(
await _synthesis(
speaker_id=(
speaker_id if dialogue.role == "speaker" else supporter_id
),
text=dialogue.content,
index=i,
progress=progress_bar,
)
)
progress_bar.close()
# sort results by index
results.sort(key=lambda x: x[0])
audios = [audio for _, audio in results]
# connect audio files
podcast = await voicevox_client.post_connect_waves(
audio_list=audios,
)
return podcast