Spaces:
Running
Running
from tqdm import tqdm | |
import logging | |
from .agent import BloggerAgent, WriterAgent, StructureAgent, Conversation | |
from .fetcher import AutoFetcher | |
from .voicevox import VoiceVoxClient, SpeakerId, Audio | |
class PodcastStudio: | |
def __init__(self, api_key: str, logging_level: int = logging.INFO): | |
self.blogger = BloggerAgent(api_key=api_key) | |
self.writer = WriterAgent(api_key=api_key) | |
self.structure_agent = StructureAgent(api_key=api_key) | |
self.logger = logging.getLogger(__name__) | |
self.logger.setLevel(logging_level) | |
self.fetcher = AutoFetcher() | |
async def create_conversation(self, url: str) -> tuple[str, str, Conversation]: | |
self.logger.info(f"Fetching paper from {url}...") | |
paper = await self.fetcher.fetch(url) | |
self.logger.info("Paper fetched successfully.") | |
self.logger.debug( | |
f"Paper content: {paper[:100]}..." | |
) # Log first 100 characters | |
self.logger.info("Creating blog from paper...") | |
blog = await self.blogger.task(paper) | |
self.logger.info("Blog created successfully.") | |
self.logger.debug(f"{blog[:100]}...") # Log first 100 characters | |
self.logger.info("Creating dialogue from blog...") | |
dialogue = await self.writer.task(paper, blog) | |
self.logger.info("Dialogue created successfully.") | |
self.logger.debug(f"{dialogue[:100]}...") # Log first 100 characters | |
self.logger.info("Structuring conversation from dialogue...") | |
conversation = await self.structure_agent.task(dialogue) | |
self.logger.info("Conversation structured successfully.") | |
for _d in conversation.conversation: | |
self.logger.debug(f"{_d.role}: {_d.content[:100]}...") | |
return blog, dialogue, conversation | |
async def record_podcast( | |
self, | |
conversation: Conversation, | |
voicevox_client: VoiceVoxClient, | |
speaker_id: SpeakerId, | |
supporter_id: SpeakerId, | |
) -> Audio: | |
progress_bar = tqdm( | |
total=len(conversation.conversation), | |
desc="Synthesizing audio", | |
ncols=100, | |
) | |
async def _synthesis( | |
speaker_id: SpeakerId, | |
text: str, | |
index: int, | |
progress: tqdm, | |
) -> tuple[int, Audio]: | |
audio_query = await voicevox_client.post_audio_query( | |
text=text, | |
speaker=speaker_id, | |
) | |
if audio_query.tempoDynamicsScale is not None: | |
audio_query.tempoDynamicsScale = 1.1 | |
else: | |
audio_query.speedScale = 1.1 | |
audio = await voicevox_client.post_synthesis( | |
speaker=speaker_id, | |
audio_query=audio_query, | |
) | |
progress.update(1) | |
progress.set_postfix({"text": text[:20] + "..."}) | |
return index, audio | |
results = [] | |
for i, dialogue in enumerate(conversation.conversation): | |
results.append( | |
await _synthesis( | |
speaker_id=( | |
speaker_id if dialogue.role == "speaker" else supporter_id | |
), | |
text=dialogue.content, | |
index=i, | |
progress=progress_bar, | |
) | |
) | |
progress_bar.close() | |
# sort results by index | |
results.sort(key=lambda x: x[0]) | |
audios = [audio for _, audio in results] | |
# connect audio files | |
podcast = await voicevox_client.post_connect_waves( | |
audio_list=audios, | |
) | |
return podcast | |