import os import re import json import logging import requests from typing import Dict from uuid import uuid4 from concurrent.futures import ThreadPoolExecutor, as_completed from flask import Flask, request, Response, jsonify, send_from_directory from webscout.Provider.Deepinfra import DeepInfra # -------------------- Flask & Config Setup -------------------- app = Flask(__name__) AUDIO_DIR = "static/audio" os.makedirs(AUDIO_DIR, exist_ok=True) # -------------------- Logging Setup -------------------- logging.basicConfig( level=logging.INFO, format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", handlers=[ logging.StreamHandler() ] ) logger = logging.getLogger("TrueSyncAI") # -------------------- AI Model Setup -------------------- SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT") VOICE = os.getenv("VOICE") BASE_MODEL = DeepInfra(is_conversation=False, update_file=False, system_prompt=SYSTEM_PROMPT) # -------------------- TTS Generator -------------------- def generate_tts(text:str, verbose:bool = True): PROVIDER_URL: str = "https://www.openai.fm/api/generate" headers = { 'sec-ch-ua-platform': '"Windows"', 'Referer': 'https://www.openai.fm/', 'sec-ch-ua': '"Microsoft Edge";v="137", "Chromium";v="137", "Not/A)Brand";v="24"', 'sec-ch-ua-mobile': '?0', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36 Edg/137.0.0.0', 'DNT': '1', 'Range': 'bytes=0-', } def _remove_emojis(text): emoji_pattern = re.compile( "[" "\U0001F600-\U0001F64F" # Emoticons "\U0001F300-\U0001F5FF" # Symbols & Pictographs "\U0001F680-\U0001F6FF" # Transport & Map symbols "\U0001F700-\U0001F77F" # Alchemical Symbols "\U0001F780-\U0001F7FF" # Geometric Shapes Extended "\U0001F800-\U0001F8FF" # Supplemental Arrows-C "\U0001F900-\U0001F9FF" # Supplemental Symbols and Pictographs "\U0001FA00-\U0001FA6F" # Chess Symbols "\U0001FA70-\U0001FAFF" # Symbols and Pictographs Extended-A "\U00002700-\U000027BF" # Dingbats "\U000024C2-\U0001F251" # Enclosed characters "]+", flags=re.UNICODE) return emoji_pattern.sub(r'', text).strip() def _generate_audio_chunk(text: str, chunk_number: int) -> tuple[int, bytes]: """ Generate audio for a single text chunk. Args: text (str): The text chunk to convert. chunk_number (int): The sequence number of the chunk. Returns: tuple[int, bytes]: Chunk number and audio data. Raises: TTSRequestError: If the request fails. """ params = { 'input': _remove_emojis(text), 'prompt': VOICE, 'voice': 'alloy', 'generation': str(uuid4()), } if verbose:logger.info(f"Processing Text: {params['input'][:70]}...") while True: try: response = requests.get( PROVIDER_URL, params=params, headers=headers ) response.raise_for_status() if response.content: if verbose: logger.info(f"Chunk {chunk_number} processed successfully.") return chunk_number, response.content if verbose: logger.info(f"No data received for chunk {chunk_number}.") except Exception as e: if verbose: logger.info(f"Error processing chunk {chunk_number}: {e}") time.sleep(1) def _split_text_by_fullstop(text:str, max_length:int = 800): parts = [] while len(text) > max_length: # Find the last period (.) within the first max_length characters split_index = text.rfind('.', 0, max_length) if split_index == -1: # If there's no full stop, force split at max_length split_index = max_length else: split_index += 1 # Include the period in the current part part = text[:split_index].strip() parts.append(part) text = text[split_index:].strip() if text: parts.append(text) return parts sentences = _split_text_by_fullstop(text) try: with ThreadPoolExecutor() as executor: futures = { executor.submit(_generate_audio_chunk, sentence.strip(), i): i for i, sentence in enumerate(sentences, start=1) } audio_chunks: Dict[int, bytes] = {} for future in as_completed(futures): chunk_num = futures[future] try: part_number, audio_data = future.result() audio_chunks[part_number] = audio_data except Exception as e: if verbose: logger.info(f"Failed to generate audio for chunk {chunk_num}: {e}") filename = f"{uuid4().hex}.wav" filepath = os.path.join(AUDIO_DIR, filename) with open(filepath, 'wb') as f: for chunk_num in sorted(audio_chunks.keys()): f.write(audio_chunks[chunk_num]) if verbose: logger.info(f"TTS audio generated: {filename}") return f"static/audio/{filename}" except Exception as e: logger.exception(f"Failed to generate audio: {e}") return None # -------------------- Chat Route -------------------- @app.route("/chat", methods=["POST"]) def chat(): try: data = request.get_json(force=True) prompt = str(data.get("prompt", "")).strip() if not prompt: logger.warning("Missing prompt in request") return jsonify({"error": "Missing prompt"}), 400 logger.info(f"Received prompt: {prompt[:60]}...") def generate(): collected_response = "" try: for chunk in BASE_MODEL.chat(prompt=prompt, stream=True): collected_response += chunk yield f"data: {json.dumps({'response': chunk})}\n\n" # After all chunks are sent audio_url = generate_tts(collected_response) yield f"data: {json.dumps({'done': True, 'full_response': collected_response, 'audio_url': audio_url})}\n\n" except Exception as stream_err: logger.exception("Error during streaming chat response") yield f"data: {json.dumps({'error': 'Streaming error occurred'})}\n\n" return Response(generate(), mimetype='text/event-stream') except Exception as e: logger.exception("Chat endpoint failed") return jsonify({"error": str(e)}), 500 # -------------------- Realtime Chat Route -------------------- @app.route("/realtime", methods=["POST"]) def realtime(): try: data = request.get_json(force=True) prompt = str(data.get("prompt", "")).strip() if not prompt: logger.warning("Missing prompt in request") return jsonify({"error": "Missing prompt"}), 400 logger.info(f"Received prompt: {prompt[:60]}...") response = BASE_MODEL.chat(prompt=prompt, stream=False) print(f"Response: {response}") audio_url = generate_tts(response) print(f"Audio URL: {audio_url}") return jsonify(f"data: {json.dumps({'done': True, 'full_response': response, 'audio_url': audio_url})}") except Exception as e: logger.exception("Chat endpoint failed") return jsonify({"error": str(e)}), 500 # -------------------- Serve Audio -------------------- @app.route("/static/audio/") def serve_audio(filename): try: response = send_from_directory(AUDIO_DIR, filename) response.headers['Cache-Control'] = 'public, max-age=3600' logger.info(f"Serving audio file: {filename}") return response except Exception as e: logger.exception(f"Failed to serve audio: {filename}") return jsonify({"error": "Audio file not found"}), 404 # -------------------- Health Check -------------------- @app.route("/") def index(): return "🚀 TrueSyncAI Streaming API is live!" # -------------------- Run Server -------------------- if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)