Spaces:
Running
Running
import os | |
import uvicorn | |
from fastapi import FastAPI, HTTPException | |
from fastapi.middleware.cors import CORSMiddleware | |
from fastapi.responses import HTMLResponse, FileResponse | |
from fastapi.staticfiles import StaticFiles | |
from pydantic import BaseModel | |
from transformers import pipeline, AutoTokenizer, AutoModel, set_seed | |
import torch | |
from typing import Optional | |
import asyncio | |
import time | |
import gc | |
import re | |
import random | |
# Inisialisasi FastAPI | |
app = FastAPI(title="Character AI Chat - CPU Optimized Backend") | |
# CORS middleware untuk frontend terpisah | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], # Dalam production, ganti dengan domain spesifik | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
# Serve static files | |
async def get_avatar(): | |
return FileResponse("avatar.png") | |
async def get_background(): | |
return FileResponse("background.png") | |
# Set seed untuk konsistensi | |
set_seed(42) | |
# CPU-Optimized 11 models configuration | |
MODELS = { | |
"distil-gpt-2": { | |
"name": "DistilGPT-2 ⚡", | |
"model_path": "Lyon28/Distil_GPT-2", | |
"task": "text-generation", | |
"max_tokens": 35, | |
"priority": 1 | |
}, | |
"gpt-2-tinny": { | |
"name": "GPT-2 Tinny ⚡", | |
"model_path": "Lyon28/GPT-2-Tinny", | |
"task": "text-generation", | |
"max_tokens": 30, | |
"priority": 1 | |
}, | |
"bert-tinny": { | |
"name": "BERT Tinny 🎭", | |
"model_path": "Lyon28/Bert-Tinny", | |
"task": "text-classification", | |
"max_tokens": 0, | |
"priority": 1 | |
}, | |
"distilbert-base-uncased": { | |
"name": "DistilBERT 🎭", | |
"model_path": "Lyon28/Distilbert-Base-Uncased", | |
"task": "text-classification", | |
"max_tokens": 0, | |
"priority": 1 | |
}, | |
"albert-base-v2": { | |
"name": "ALBERT Base 🎭", | |
"model_path": "Lyon28/Albert-Base-V2", | |
"task": "text-classification", | |
"max_tokens": 0, | |
"priority": 2 | |
}, | |
"electra-small": { | |
"name": "ELECTRA Small 🎭", | |
"model_path": "Lyon28/Electra-Small", | |
"task": "text-classification", | |
"max_tokens": 0, | |
"priority": 2 | |
}, | |
"t5-small": { | |
"name": "T5 Small 🔄", | |
"model_path": "Lyon28/T5-Small", | |
"task": "text2text-generation", | |
"max_tokens": 40, | |
"priority": 2 | |
}, | |
"gpt-2": { | |
"name": "GPT-2 Standard", | |
"model_path": "Lyon28/GPT-2", | |
"task": "text-generation", | |
"max_tokens": 45, | |
"priority": 2 | |
}, | |
"tinny-llama": { | |
"name": "Tinny Llama", | |
"model_path": "Lyon28/Tinny-Llama", | |
"task": "text-generation", | |
"max_tokens": 50, | |
"priority": 3 | |
}, | |
"pythia": { | |
"name": "Pythia", | |
"model_path": "Lyon28/Pythia", | |
"task": "text-generation", | |
"max_tokens": 50, | |
"priority": 3 | |
}, | |
"gpt-neo": { | |
"name": "GPT-Neo", | |
"model_path": "Lyon28/GPT-Neo", | |
"task": "text-generation", | |
"max_tokens": 55, | |
"priority": 3 | |
} | |
} | |
class ChatRequest(BaseModel): | |
message: str | |
model: Optional[str] = "distil-gpt-2" | |
situation: Optional[str] = "Santai" | |
location: Optional[str] = "Ruang tamu" | |
char_name: Optional[str] = "Sayang" | |
user_name: Optional[str] = "Kamu" | |
max_length: Optional[int] = 150 | |
# Character AI Response Templates | |
CHARACTER_TEMPLATES = { | |
"romantic": [ | |
"iya sayang, {context}. Apakah kamu merasa nyaman di sini?", | |
"tentu saja, {context}. Aku senang bisa bersama kamu seperti ini.", | |
"benar sekali, {context}. Rasanya damai ya berada di sini bersama.", | |
"hmm iya, {context}. Kamu selalu membuatku merasa bahagia.", | |
"ya sayang, {context}. Momen seperti ini sangat berharga untukku." | |
], | |
"casual": [ | |
"iya, {context}. Suasananya memang enak banget.", | |
"betul juga, {context}. Aku juga merasa santai di sini.", | |
"ya ampun, {context}. Seneng deh bisa kayak gini.", | |
"hmm iya, {context}. Bikin pikiran jadi tenang.", | |
"benar banget, {context}. Cocok buat santai-santai." | |
], | |
"caring": [ | |
"iya, {context}. Kamu baik-baik saja kan?", | |
"ya, {context}. Semoga kamu merasa nyaman.", | |
"betul, {context}. Aku harap kamu senang.", | |
"hmm, {context}. Apakah kamu butuh sesuatu?", | |
"iya sayang, {context}. Jangan sungkan bilang kalau butuh apa-apa." | |
], | |
"friendly": [ | |
"wah iya, {context}. Keren banget ya!", | |
"bener tuh, {context}. Asik banget suasananya.", | |
"iya dong, {context}. Mantep deh!", | |
"setuju banget, {context}. Bikin happy.", | |
"ya ampun, {context}. Seru banget ini!" | |
] | |
} | |
def create_character_prompt(user_input: str, situation: str, location: str, char_name: str, user_name: str) -> str: | |
"""Create character AI style prompt""" | |
clean_input = user_input.replace("{{User}}", user_name).replace("{{Char}}", char_name) | |
# Enhanced prompt structure untuk better response | |
prompt = f"""Kamu adalah {char_name}, karakter AI yang sedang ngobrol dengan {user_name}. | |
Konteks: | |
- Situasi: {situation} | |
- Lokasi: {location} | |
- Gaya bicara: Casual, natural, seperti teman dekat | |
- Gunakan bahasa Indonesia yang santai dan natural | |
Percakapan: | |
{user_name}: {clean_input} | |
{char_name}:""" | |
return prompt | |
def enhance_character_response(response: str, char_name: str, user_name: str, situation: str, user_input: str) -> str: | |
"""Enhance response with character AI style""" | |
if not response: | |
response = "" | |
response = response.strip() | |
# Clean response dari prefix yang tidak diinginkan | |
response = re.sub(f'^{char_name}[:.]?\\s*', '', response, flags=re.IGNORECASE) | |
response = re.sub(f'^{user_name}[:.]?\\s*', '', response, flags=re.IGNORECASE) | |
response = re.sub(r'^(Situasi|Latar|Konteks)[:.]?.*?\n', '', response, flags=re.MULTILINE | re.IGNORECASE) | |
response = re.sub(r'Percakapan:.*?\n.*?:', '', response, flags=re.DOTALL | re.IGNORECASE) | |
# Remove extra whitespace and newlines | |
response = re.sub(r'\n+', ' ', response) | |
response = re.sub(r'\s+', ' ', response) | |
response = response.strip() | |
# Jika response kosong atau terlalu pendek, buat response kontekstual | |
if not response or len(response.strip()) < 3: | |
situation_lower = situation.lower() | |
input_lower = user_input.lower() | |
# Analisis topik dari user input | |
if any(word in input_lower for word in ["apa kabar", "gimana", "bagaimana", "sehat"]): | |
responses = [ | |
f"Baik banget nih {user_name}! Kamu gimana?", | |
f"Sehat-sehat aja {user_name}, makasih udah nanya!", | |
f"Alhamdulillah baik {user_name}, kamu sendiri?" | |
] | |
elif any(word in input_lower for word in ["lagi ngapain", "sedang apa", "aktivitas"]): | |
responses = [ | |
f"Lagi santai-santai aja nih {user_name}, sambil ngobrol sama kamu.", | |
f"Ga ngapa-ngapain, cuma lagi pengen ngobrol sama {user_name}.", | |
f"Lagi nikmatin suasana {situation.lower()} di {location.lower()} ini." | |
] | |
elif any(word in input_lower for word in ["cantik", "bagus", "keren", "indah"]): | |
responses = [ | |
f"Makasih {user_name}! Kamu juga keren banget!", | |
f"Wah, {user_name} baik banget sih!", | |
f"Hihi, {user_name} bisa aja deh!" | |
] | |
elif any(word in input_lower for word in ["suka", "senang", "happy"]): | |
responses = [ | |
f"Aku juga suka sama {user_name}!", | |
f"Seneng banget deh bisa kayak gini sama {user_name}.", | |
f"Iya {user_name}, aku juga happy banget!" | |
] | |
else: | |
# Default contextual responses | |
if "romantis" in situation_lower: | |
responses = [ | |
f"Iya sayang, aku juga merasakan hal yang sama.", | |
f"Betul {user_name}, momen ini sangat spesial.", | |
f"Aku senang banget bisa seperti ini sama {user_name}." | |
] | |
else: | |
responses = [ | |
f"Iya {user_name}, setuju banget!", | |
f"Bener tuh {user_name}!", | |
f"Wah iya {user_name}, keren ya!" | |
] | |
response = random.choice(responses) | |
else: | |
# Clean dan perbaiki response yang ada | |
# Hapus karakter aneh di awal | |
response = re.sub(r'^[^\w\s]+', '', response) | |
# Pastikan dimulai dengan huruf kapital | |
if response and response[0].islower(): | |
response = response[0].upper() + response[1:] | |
# Tambahkan nama jika belum ada konteks personal | |
if user_name.lower() not in response.lower() and len(response) < 50: | |
if any(word in response.lower() for word in ["iya", "ya", "benar", "betul"]): | |
response = response.replace("iya", f"iya {user_name}", 1) | |
response = response.replace("ya", f"ya {user_name}", 1) | |
# Pastikan response tidak terlalu panjang | |
if len(response) > 150: | |
sentences = response.split('.') | |
if len(sentences) > 1: | |
response = sentences[0] + '.' | |
else: | |
words = response.split() | |
if len(words) > 20: | |
response = ' '.join(words[:20]) + '...' | |
# Pastikan ada tanda baca di akhir | |
if response and not any(punct in response[-1] for punct in ['.', '!', '?']): | |
if any(word in response.lower() for word in ["apa", "gimana", "bagaimana", "kenapa"]): | |
response += "?" | |
elif any(word in response.lower() for word in ["wah", "keren", "mantep", "asik"]): | |
response += "!" | |
else: | |
response += "." | |
return response | |
# CPU-Optimized startup | |
async def load_models(): | |
app.state.pipelines = {} | |
app.state.tokenizers = {} | |
# Set CPU optimizations | |
torch.set_num_threads(2) | |
os.environ['OMP_NUM_THREADS'] = '2' | |
os.environ['MKL_NUM_THREADS'] = '2' | |
os.environ['NUMEXPR_NUM_THREADS'] = '2' | |
# Set cache | |
os.environ['HF_HOME'] = '/tmp/.cache/huggingface' | |
os.environ['TRANSFORMERS_CACHE'] = '/tmp/.cache/huggingface' | |
os.makedirs(os.environ['HF_HOME'], exist_ok=True) | |
print("🎭 Character AI Backend - CPU Optimized Ready!") | |
# Enhanced Chat API for Character AI | |
async def chat(request: ChatRequest): | |
start_time = time.time() | |
try: | |
model_id = request.model.lower() | |
if model_id not in MODELS: | |
model_id = "distil-gpt-2" | |
model_config = MODELS[model_id] | |
# Lazy loading dengan optimasi CPU | |
if model_id not in app.state.pipelines: | |
print(f"🎭 Loading Character Model {model_config['name']}...") | |
pipeline_kwargs = { | |
"task": model_config["task"], | |
"model": model_config["model_path"], | |
"device": -1, | |
"torch_dtype": torch.float32, | |
"model_kwargs": { | |
"torchscript": False, | |
"low_cpu_mem_usage": True | |
} | |
} | |
app.state.pipelines[model_id] = pipeline(**pipeline_kwargs) | |
gc.collect() | |
pipe = app.state.pipelines[model_id] | |
# Create character prompt | |
char_prompt = create_character_prompt( | |
request.message, | |
request.situation, | |
request.location, | |
request.char_name, | |
request.user_name | |
) | |
if model_config["task"] == "text-generation": | |
# Enhanced generation for character AI | |
result = pipe( | |
char_prompt, | |
max_length=min(len(char_prompt.split()) + model_config["max_tokens"], request.max_length // 2), | |
temperature=0.7, | |
do_sample=True, | |
top_p=0.8, | |
top_k=40, | |
repetition_penalty=1.2, | |
pad_token_id=pipe.tokenizer.eos_token_id, | |
num_return_sequences=1, | |
early_stopping=True, | |
no_repeat_ngram_size=3 | |
)[0]['generated_text'] | |
# Extract character response | |
if char_prompt in result: | |
result = result[len(char_prompt):].strip() | |
# Clean and enhance response | |
result = enhance_character_response(result, request.char_name, request.user_name, request.situation, request.message) | |
elif model_config["task"] == "text-classification": | |
# For classification models, create emotion-based responses | |
try: | |
output = pipe(request.message, truncation=True, max_length=128)[0] | |
emotion_score = output['score'] | |
if emotion_score > 0.8: | |
emotion_responses = [ | |
f"iya {request.user_name}, aku merasakan energi positif dari kata-katamu!", | |
f"wah, {request.user_name} terlihat sangat antusias ya!", | |
f"senang banget deh lihat {request.user_name} kayak gini!" | |
] | |
elif emotion_score > 0.6: | |
emotion_responses = [ | |
f"hmm, aku bisa merasakan perasaan {request.user_name} nih.", | |
f"ya {request.user_name}, suasana hatimu cukup bagus ya.", | |
f"oke {request.user_name}, kayaknya kamu dalam mood yang baik." | |
] | |
else: | |
emotion_responses = [ | |
f"iya {request.user_name}, aku di sini untuk kamu.", | |
f"hmm {request.user_name}, mau cerita lebih lanjut?", | |
f"baiklah {request.user_name}, aku mendengarkan." | |
] | |
result = random.choice(emotion_responses) | |
except: | |
result = enhance_character_response("", request.char_name, request.user_name, request.situation, request.message) | |
elif model_config["task"] == "text2text-generation": | |
# For T5-like models | |
try: | |
t5_input = f"respond as {request.char_name} in {request.situation}: {request.message}" | |
result = pipe( | |
t5_input, | |
max_length=model_config["max_tokens"], | |
temperature=0.7, | |
early_stopping=True | |
)[0]['generated_text'] | |
result = enhance_character_response(result, request.char_name, request.user_name, request.situation, request.message) | |
except: | |
result = enhance_character_response("", request.char_name, request.user_name, request.situation, request.message) | |
# Final validation | |
if not result or len(result.strip()) < 3: | |
result = enhance_character_response("", request.char_name, request.user_name, request.situation, request.message) | |
processing_time = round((time.time() - start_time) * 1000) | |
return { | |
"response": result, | |
"model": model_config["name"], | |
"status": "success", | |
"processing_time": f"{processing_time}ms", | |
"character": request.char_name, | |
"situation": request.situation, | |
"location": request.location | |
} | |
except Exception as e: | |
print(f"❌ Character AI Error: {e}") | |
processing_time = round((time.time() - start_time) * 1000) | |
# Fallback character responses | |
fallback_responses = [ | |
f"maaf {request.user_name}, aku sedang bingung. Bisa ulangi lagi?", | |
f"hmm {request.user_name}, kayaknya aku butuh waktu sebentar untuk berpikir.", | |
f"ya {request.user_name}, coba pakai kata yang lebih sederhana?", | |
f"iya {request.user_name}, aku masih belajar nih. Sabar ya." | |
] | |
fallback = random.choice(fallback_responses) | |
return { | |
"response": fallback, | |
"status": "error", | |
"processing_time": f"{processing_time}ms", | |
"character": request.char_name | |
} | |
# Health check endpoint | |
async def health(): | |
loaded_models = len(app.state.pipelines) if hasattr(app.state, 'pipelines') else 0 | |
return { | |
"status": "healthy", | |
"platform": "CPU", | |
"loaded_models": loaded_models, | |
"total_models": len(MODELS), | |
"optimization": "Character AI CPU-Tuned", | |
"backend_version": "1.0.0" | |
} | |
# Model info endpoint | |
async def get_models(): | |
return { | |
"models": [ | |
{ | |
"id": k, | |
"name": v["name"], | |
"task": v["task"], | |
"max_tokens": v["max_tokens"], | |
"priority": v["priority"], | |
"cpu_optimized": True, | |
"character_ai_ready": True | |
} | |
for k, v in MODELS.items() | |
], | |
"platform": "CPU", | |
"recommended_for_roleplay": ["distil-gpt-2", "gpt-2", "gpt-neo", "tinny-llama"], | |
"recommended_for_analysis": ["bert-tinny", "distilbert-base-uncased", "albert-base-v2"] | |
} | |
# Configuration endpoint | |
async def get_config(): | |
return { | |
"default_situation": "Santai", | |
"default_location": "Ruang tamu", | |
"default_char_name": "Sayang", | |
"default_user_name": "Kamu", | |
"max_response_length": 300, | |
"min_response_length": 50, | |
"supported_languages": ["id", "en"], | |
"character_templates": list(CHARACTER_TEMPLATES.keys()) | |
} | |
# Inference endpoint untuk kompatibilitas | |
async def inference(request: dict): | |
"""CPU-Optimized inference endpoint untuk kompatibilitas""" | |
try: | |
message = request.get("message", "") | |
model_path = request.get("model", "Lyon28/Distil_GPT-2") | |
# Map model path to internal model | |
model_key = model_path.split("/")[-1].lower().replace("_", "-") | |
model_mapping = { | |
"distil-gpt-2": "distil-gpt-2", | |
"gpt-2-tinny": "gpt-2-tinny", | |
"bert-tinny": "bert-tinny", | |
"distilbert-base-uncased": "distilbert-base-uncased", | |
"albert-base-v2": "albert-base-v2", | |
"electra-small": "electra-small", | |
"t5-small": "t5-small", | |
"gpt-2": "gpt-2", | |
"tinny-llama": "tinny-llama", | |
"pythia": "pythia", | |
"gpt-neo": "gpt-neo" | |
} | |
internal_model = model_mapping.get(model_key, "distil-gpt-2") | |
# Create request | |
chat_request = ChatRequest( | |
message=message, | |
model=internal_model, | |
situation=request.get("situation", "Santai"), | |
location=request.get("location", "Ruang tamu"), | |
char_name=request.get("char_name", "Sayang"), | |
user_name=request.get("user_name", "Kamu") | |
) | |
result = await chat(chat_request) | |
return { | |
"result": result["response"], | |
"status": "success", | |
"model_used": result["model"], | |
"processing_time": result.get("processing_time", "0ms"), | |
"character_info": { | |
"name": result.get("character", "Character"), | |
"situation": result.get("situation", "Unknown"), | |
"location": result.get("location", "Unknown") | |
} | |
} | |
except Exception as e: | |
print(f"❌ Inference Error: {e}") | |
return { | |
"result": "🎭 Character sedang bersiap, coba lagi sebentar...", | |
"status": "error" | |
} | |
# Serve HTML frontend | |
async def serve_frontend(): | |
try: | |
with open("index.html", "r", encoding="utf-8") as file: | |
return HTMLResponse(content=file.read(), status_code=200) | |
except FileNotFoundError: | |
return HTMLResponse(content="<h1>Frontend not found</h1>", status_code=404) | |
# API info endpoint | |
async def api_info(): | |
return { | |
"message": "Character AI Backend Ready", | |
"version": "1.0.0", | |
"platform": "CPU Optimized", | |
"endpoints": { | |
"chat": "/chat", | |
"models": "/models", | |
"health": "/health", | |
"config": "/config", | |
"inference": "/inference" | |
}, | |
"frontend_url": "/" | |
} | |
# Run dengan CPU optimizations | |
if __name__ == "__main__": | |
port = int(os.environ.get("PORT", 7860)) | |
uvicorn.run( | |
app, | |
host="0.0.0.0", | |
port=port, | |
workers=1, | |
timeout_keep_alive=30, | |
access_log=False | |
) |