|
import os |
|
import re |
|
import json |
|
import logging |
|
import requests |
|
from typing import Dict |
|
from uuid import uuid4 |
|
from concurrent.futures import ThreadPoolExecutor, as_completed |
|
from flask import Flask, request, Response, jsonify, send_from_directory |
|
from webscout.Provider.Deepinfra import DeepInfra |
|
|
|
|
|
app = Flask(__name__) |
|
AUDIO_DIR = "static/audio" |
|
os.makedirs(AUDIO_DIR, exist_ok=True) |
|
|
|
|
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", |
|
handlers=[ |
|
logging.StreamHandler() |
|
] |
|
) |
|
logger = logging.getLogger("TrueSyncAI") |
|
|
|
|
|
SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT") |
|
VOICE = os.getenv("VOICE") |
|
BASE_MODEL = DeepInfra(is_conversation=False, update_file=False, system_prompt=SYSTEM_PROMPT) |
|
|
|
|
|
|
|
|
|
def generate_tts(text:str, verbose:bool = True): |
|
PROVIDER_URL: str = "https://www.openai.fm/api/generate" |
|
headers = { |
|
'sec-ch-ua-platform': '"Windows"', |
|
'Referer': 'https://www.openai.fm/', |
|
'sec-ch-ua': '"Microsoft Edge";v="137", "Chromium";v="137", "Not/A)Brand";v="24"', |
|
'sec-ch-ua-mobile': '?0', |
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36 Edg/137.0.0.0', |
|
'DNT': '1', |
|
'Range': 'bytes=0-', |
|
} |
|
|
|
def _remove_emojis(text): |
|
emoji_pattern = re.compile( |
|
"[" |
|
"\U0001F600-\U0001F64F" |
|
"\U0001F300-\U0001F5FF" |
|
"\U0001F680-\U0001F6FF" |
|
"\U0001F700-\U0001F77F" |
|
"\U0001F780-\U0001F7FF" |
|
"\U0001F800-\U0001F8FF" |
|
"\U0001F900-\U0001F9FF" |
|
"\U0001FA00-\U0001FA6F" |
|
"\U0001FA70-\U0001FAFF" |
|
"\U00002700-\U000027BF" |
|
"\U000024C2-\U0001F251" |
|
"]+", flags=re.UNICODE) |
|
return emoji_pattern.sub(r'', text).strip() |
|
|
|
def _generate_audio_chunk(text: str, chunk_number: int) -> tuple[int, bytes]: |
|
""" |
|
Generate audio for a single text chunk. |
|
|
|
Args: |
|
text (str): The text chunk to convert. |
|
chunk_number (int): The sequence number of the chunk. |
|
|
|
Returns: |
|
tuple[int, bytes]: Chunk number and audio data. |
|
|
|
Raises: |
|
TTSRequestError: If the request fails. |
|
""" |
|
params = { |
|
'input': _remove_emojis(text), |
|
'prompt': VOICE, |
|
'voice': 'alloy', |
|
'generation': str(uuid4()), |
|
} |
|
if verbose:logger.info(f"Processing Text: {params['input'][:70]}...") |
|
while True: |
|
try: |
|
response = requests.get( |
|
PROVIDER_URL, |
|
params=params, |
|
headers=headers |
|
) |
|
response.raise_for_status() |
|
|
|
if response.content: |
|
if verbose: |
|
logger.info(f"Chunk {chunk_number} processed successfully.") |
|
return chunk_number, response.content |
|
|
|
if verbose: |
|
logger.info(f"No data received for chunk {chunk_number}.") |
|
|
|
except Exception as e: |
|
if verbose: |
|
logger.info(f"Error processing chunk {chunk_number}: {e}") |
|
time.sleep(1) |
|
|
|
def _split_text_by_fullstop(text:str, max_length:int = 800): |
|
parts = [] |
|
while len(text) > max_length: |
|
|
|
split_index = text.rfind('.', 0, max_length) |
|
if split_index == -1: |
|
|
|
split_index = max_length |
|
else: |
|
split_index += 1 |
|
|
|
part = text[:split_index].strip() |
|
parts.append(part) |
|
text = text[split_index:].strip() |
|
if text: |
|
parts.append(text) |
|
return parts |
|
|
|
sentences = _split_text_by_fullstop(text) |
|
|
|
try: |
|
with ThreadPoolExecutor() as executor: |
|
futures = { |
|
executor.submit(_generate_audio_chunk, sentence.strip(), i): i |
|
for i, sentence in enumerate(sentences, start=1) |
|
} |
|
|
|
audio_chunks: Dict[int, bytes] = {} |
|
|
|
for future in as_completed(futures): |
|
chunk_num = futures[future] |
|
try: |
|
part_number, audio_data = future.result() |
|
audio_chunks[part_number] = audio_data |
|
except Exception as e: |
|
if verbose: |
|
logger.info(f"Failed to generate audio for chunk {chunk_num}: {e}") |
|
|
|
filename = f"{uuid4().hex}.wav" |
|
filepath = os.path.join(AUDIO_DIR, filename) |
|
with open(filepath, 'wb') as f: |
|
for chunk_num in sorted(audio_chunks.keys()): |
|
f.write(audio_chunks[chunk_num]) |
|
|
|
if verbose: |
|
logger.info(f"TTS audio generated: {filename}") |
|
return f"static/audio/{filename}" |
|
|
|
except Exception as e: |
|
logger.exception(f"Failed to generate audio: {e}") |
|
return None |
|
|
|
|
|
@app.route("/chat", methods=["POST"]) |
|
def chat(): |
|
try: |
|
data = request.get_json(force=True) |
|
prompt = str(data.get("prompt", "")).strip() |
|
if not prompt: |
|
logger.warning("Missing prompt in request") |
|
return jsonify({"error": "Missing prompt"}), 400 |
|
|
|
logger.info(f"Received prompt: {prompt[:60]}...") |
|
|
|
def generate(): |
|
collected_response = "" |
|
try: |
|
for chunk in BASE_MODEL.chat(prompt=prompt, stream=True): |
|
collected_response += chunk |
|
yield f"data: {json.dumps({'response': chunk})}\n\n" |
|
|
|
|
|
audio_url = generate_tts(collected_response) |
|
yield f"data: {json.dumps({'done': True, 'full_response': collected_response, 'audio_url': audio_url})}\n\n" |
|
|
|
except Exception as stream_err: |
|
logger.exception("Error during streaming chat response") |
|
yield f"data: {json.dumps({'error': 'Streaming error occurred'})}\n\n" |
|
|
|
return Response(generate(), mimetype='text/event-stream') |
|
|
|
except Exception as e: |
|
logger.exception("Chat endpoint failed") |
|
return jsonify({"error": str(e)}), 500 |
|
|
|
|
|
@app.route("/realtime", methods=["POST"]) |
|
def realtime(): |
|
try: |
|
data = request.get_json(force=True) |
|
prompt = str(data.get("prompt", "")).strip() |
|
if not prompt: |
|
logger.warning("Missing prompt in request") |
|
return jsonify({"error": "Missing prompt"}), 400 |
|
|
|
logger.info(f"Received prompt: {prompt[:60]}...") |
|
|
|
response = BASE_MODEL.chat(prompt=prompt, stream=False) |
|
print(f"Response: {response}") |
|
audio_url = generate_tts(response) |
|
print(f"Audio URL: {audio_url}") |
|
return jsonify(f"data: {json.dumps({'done': True, 'full_response': response, 'audio_url': audio_url})}") |
|
|
|
except Exception as e: |
|
logger.exception("Chat endpoint failed") |
|
return jsonify({"error": str(e)}), 500 |
|
|
|
|
|
@app.route("/static/audio/<filename>") |
|
def serve_audio(filename): |
|
try: |
|
response = send_from_directory(AUDIO_DIR, filename) |
|
response.headers['Cache-Control'] = 'public, max-age=3600' |
|
logger.info(f"Serving audio file: {filename}") |
|
return response |
|
except Exception as e: |
|
logger.exception(f"Failed to serve audio: {filename}") |
|
return jsonify({"error": "Audio file not found"}), 404 |
|
|
|
|
|
|
|
@app.route("/") |
|
def index(): |
|
return "🚀 TrueSyncAI Streaming API is live!" |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
app.run(host="0.0.0.0", port=7860) |