Spaces:

p1atdev
/

PodcastVox

Running

PodcastVox / src /podcast.py

Plat

init

3a09141 6 days ago

3.66 kB

	from tqdm import tqdm
	import logging

	from .agent import BloggerAgent, WriterAgent, StructureAgent, Conversation
	from .fetcher import AutoFetcher
	from .voicevox import VoiceVoxClient, SpeakerId, Audio


	class PodcastStudio:
	def __init__(self, api_key: str, logging_level: int = logging.INFO):
	self.blogger = BloggerAgent(api_key=api_key)
	self.writer = WriterAgent(api_key=api_key)
	self.structure_agent = StructureAgent(api_key=api_key)

	self.logger = logging.getLogger(__name__)
	self.logger.setLevel(logging_level)

	self.fetcher = AutoFetcher()

	async def create_conversation(self, url: str) -> tuple[str, str, Conversation]:
	self.logger.info(f"Fetching paper from {url}...")
	paper = await self.fetcher.fetch(url)
	self.logger.info("Paper fetched successfully.")
	self.logger.debug(
	f"Paper content: {paper[:100]}..."
	) # Log first 100 characters

	self.logger.info("Creating blog from paper...")
	blog = await self.blogger.task(paper)
	self.logger.info("Blog created successfully.")
	self.logger.debug(f"{blog[:100]}...") # Log first 100 characters

	self.logger.info("Creating dialogue from blog...")
	dialogue = await self.writer.task(paper, blog)
	self.logger.info("Dialogue created successfully.")
	self.logger.debug(f"{dialogue[:100]}...") # Log first 100 characters

	self.logger.info("Structuring conversation from dialogue...")
	conversation = await self.structure_agent.task(dialogue)
	self.logger.info("Conversation structured successfully.")
	for _d in conversation.conversation:
	self.logger.debug(f"{_d.role}: {_d.content[:100]}...")

	return blog, dialogue, conversation

	async def record_podcast(
	self,
	conversation: Conversation,
	voicevox_client: VoiceVoxClient,
	speaker_id: SpeakerId,
	supporter_id: SpeakerId,
	) -> Audio:
	progress_bar = tqdm(
	total=len(conversation.conversation),
	desc="Synthesizing audio",
	ncols=100,
	)

	async def _synthesis(
	speaker_id: SpeakerId,
	text: str,
	index: int,
	progress: tqdm,
	) -> tuple[int, Audio]:
	audio_query = await voicevox_client.post_audio_query(
	text=text,
	speaker=speaker_id,
	)
	if audio_query.tempoDynamicsScale is not None:
	audio_query.tempoDynamicsScale = 1.1
	else:
	audio_query.speedScale = 1.1

	audio = await voicevox_client.post_synthesis(
	speaker=speaker_id,
	audio_query=audio_query,
	)
	progress.update(1)

	progress.set_postfix({"text": text[:20] + "..."})

	return index, audio

	results = []
	for i, dialogue in enumerate(conversation.conversation):
	results.append(
	await _synthesis(
	speaker_id=(
	speaker_id if dialogue.role == "speaker" else supporter_id
	),
	text=dialogue.content,
	index=i,
	progress=progress_bar,
	)
	)
	progress_bar.close()

	# sort results by index
	results.sort(key=lambda x: x[0])

	audios = [audio for _, audio in results]

	# connect audio files
	podcast = await voicevox_client.post_connect_waves(
	audio_list=audios,
	)
	return podcast