Spaces:

p1atdev
/

PodcastVox

Running

PodcastVox / src /podcast.py

Plat

init

3a09141 11 days ago

3.66 kB

	from tqdm import tqdm
	import logging

	from .agent import BloggerAgent, WriterAgent, StructureAgent, Conversation
	from .fetcher import AutoFetcher
	from .voicevox import VoiceVoxClient, SpeakerId, Audio


	class PodcastStudio:
	def __init__(self, api_key: str, logging_level: int = logging.INFO):
	self.blogger = BloggerAgent(api_key=api_key)
	self.writer = WriterAgent(api_key=api_key)
	self.structure_agent = StructureAgent(api_key=api_key)

	self.logger = logging.getLogger(__name__)
	self.logger.setLevel(logging_level)

	self.fetcher = AutoFetcher()

	async def create_conversation(self, url: str) -> tuple[str, str, Conversation]:
	self.logger.info(f"Fetching paper from {url}...")
	paper = await self.fetcher.fetch(url)
	self.logger.info("Paper fetched successfully.")
	self.logger.debug(
	f"Paper content: {paper[:100]}..."
	) # Log first 100 characters

	self.logger.info("Creating blog from paper...")
	blog = await self.blogger.task(paper)
	self.logger.info("Blog created successfully.")
	self.logger.debug(f"{blog[:100]}...") # Log first 100 characters

	self.logger.info("Creating dialogue from blog...")
	dialogue = await self.writer.task(paper, blog)
	self.logger.info("Dialogue created successfully.")
	self.logger.debug(f"{dialogue[:100]}...") # Log first 100 characters

	self.logger.info("Structuring conversation from dialogue...")
	conversation = await self.structure_agent.task(dialogue)
	self.logger.info("Conversation structured successfully.")
	for _d in conversation.conversation:
	self.logger.debug(f"{_d.role}: {_d.content[:100]}...")

	return blog, dialogue, conversation

	async def record_podcast(
	self,
	conversation: Conversation,
	voicevox_client: VoiceVoxClient,
	speaker_id: SpeakerId,
	supporter_id: SpeakerId,
	) -> Audio:
	progress_bar = tqdm(
	total=len(conversation.conversation),
	desc="Synthesizing audio",
	ncols=100,
	)

	async def _synthesis(
	speaker_id: SpeakerId,
	text: str,
	index: int,
	progress: tqdm,
	) -> tuple[int, Audio]:
	audio_query = await voicevox_client.post_audio_query(
	text=text,
	speaker=speaker_id,
	)
	if audio_query.tempoDynamicsScale is not None:
	audio_query.tempoDynamicsScale = 1.1
	else:
	audio_query.speedScale = 1.1

	audio = await voicevox_client.post_synthesis(
	speaker=speaker_id,
	audio_query=audio_query,
	)
	progress.update(1)

	progress.set_postfix({"text": text[:20] + "..."})

	return index, audio

	results = []
	for i, dialogue in enumerate(conversation.conversation):
	results.append(
	await _synthesis(
	speaker_id=(
	speaker_id if dialogue.role == "speaker" else supporter_id
	),
	text=dialogue.content,
	index=i,
	progress=progress_bar,
	)
	)
	progress_bar.close()

	# sort results by index
	results.sort(key=lambda x: x[0])

	audios = [audio for _, audio in results]

	# connect audio files
	podcast = await voicevox_client.post_connect_waves(
	audio_list=audios,
	)
	return podcast