Spaces:
Running
on
Zero
Running
on
Zero
import sys | |
import subprocess | |
import os | |
# First, try to install all dependencies | |
packages_to_install = [ | |
"gradio", | |
"torch", | |
"transformers", | |
"accelerate", | |
"einops", | |
"timm", | |
"av", | |
"opencv-python-headless" # Using headless version for better compatibility | |
] | |
for package in packages_to_install: | |
print(f"Installing {package}...") | |
try: | |
subprocess.check_call([sys.executable, "-m", "pip", "install", package]) | |
print(f"Successfully installed {package}") | |
except Exception as e: | |
print(f"Error installing {package}: {e}") | |
# Now proceed with the actual application | |
import gradio as gr | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import gc | |
import datetime | |
import time | |
import spaces | |
# --- ์ค์ --- | |
MODEL_ID = "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B" | |
MAX_NEW_TOKENS = 512 | |
# Hugging Face ํ ํฐ ์ค์ - ํ๊ฒฝ ๋ณ์์์ ๊ฐ์ ธ์ค๊ธฐ | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
if not HF_TOKEN: | |
print("๊ฒฝ๊ณ : HF_TOKEN ํ๊ฒฝ ๋ณ์๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค. ๋น๊ณต๊ฐ ๋ชจ๋ธ์ ์ ๊ทผํ ์ ์์ ์ ์์ต๋๋ค.") | |
# --- ํ๊ฒฝ ์ค์ --- | |
print("--- ํ๊ฒฝ ์ค์ ---") | |
print(f"PyTorch ๋ฒ์ : {torch.__version__}") | |
print(f"์คํ ์ฅ์น: {torch.device('cuda' if torch.cuda.is_available() else 'cpu')}") | |
print(f"HF_TOKEN ์ค์ ์ฌ๋ถ: {'์์' if HF_TOKEN else '์์'}") | |
# --- ๋ชจ๋ธ ๋ฐ ํ ํฌ๋์ด์ ๋ก๋ฉ --- | |
print(f"--- ๋ชจ๋ธ ๋ก๋ฉ ์ค: {MODEL_ID} ---") | |
print("์ฒซ ์คํ ์ ๋ช ๋ถ ์ ๋ ์์๋ ์ ์์ต๋๋ค...") | |
model = None | |
tokenizer = None | |
load_successful = False | |
stop_token_ids_list = [] # stop_token_ids_list ์ด๊ธฐํ | |
try: | |
start_load_time = time.time() | |
# ์์์ ๋ฐ๋ผ device_map ์ค์ | |
device_map = "auto" if torch.cuda.is_available() else "cpu" | |
dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
# ํ ํฌ๋์ด์ ๋ก๋ฉ | |
tokenizer_kwargs = { | |
"trust_remote_code": True | |
} | |
# HF_TOKEN์ด ์ค์ ๋์ด ์์ผ๋ฉด ์ถ๊ฐ | |
if HF_TOKEN: | |
tokenizer_kwargs["token"] = HF_TOKEN | |
tokenizer = AutoTokenizer.from_pretrained( | |
MODEL_ID, | |
**tokenizer_kwargs | |
) | |
# ๋ชจ๋ธ ๋ก๋ฉ | |
model_kwargs = { | |
"torch_dtype": dtype, | |
"device_map": device_map, | |
"trust_remote_code": True | |
} | |
# HF_TOKEN์ด ์ค์ ๋์ด ์์ผ๋ฉด ์ถ๊ฐ | |
if HF_TOKEN: | |
model_kwargs["token"] = HF_TOKEN | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_ID, | |
**model_kwargs | |
) | |
model.eval() | |
load_time = time.time() - start_load_time | |
print(f"--- ๋ชจ๋ธ ๋ฐ ํ ํฌ๋์ด์ ๋ก๋ฉ ์๋ฃ: {load_time:.2f}์ด ์์ ---") | |
load_successful = True | |
# --- ์ค์ง ํ ํฐ ์ค์ --- | |
stop_token_strings = ["</s>", "<|endoftext|>"] | |
temp_stop_ids = [tokenizer.convert_tokens_to_ids(token) for token in stop_token_strings] | |
if tokenizer.eos_token_id is not None and tokenizer.eos_token_id not in temp_stop_ids: | |
temp_stop_ids.append(tokenizer.eos_token_id) | |
elif tokenizer.eos_token_id is None: | |
print("๊ฒฝ๊ณ : tokenizer.eos_token_id๊ฐ None์ ๋๋ค. ์ค์ง ํ ํฐ์ ์ถ๊ฐํ ์ ์์ต๋๋ค.") | |
stop_token_ids_list = [tid for tid in temp_stop_ids if tid is not None] | |
if not stop_token_ids_list: | |
print("๊ฒฝ๊ณ : ์ค์ง ํ ํฐ ID๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค. ๊ฐ๋ฅํ๋ฉด ๊ธฐ๋ณธ EOS๋ฅผ ์ฌ์ฉํ๊ณ , ๊ทธ๋ ์ง ์์ผ๋ฉด ์์ฑ์ด ์ฌ๋ฐ๋ฅด๊ฒ ์ค์ง๋์ง ์์ ์ ์์ต๋๋ค.") | |
if tokenizer.eos_token_id is not None: | |
stop_token_ids_list = [tokenizer.eos_token_id] | |
else: | |
print("์ค๋ฅ: ๊ธฐ๋ณธ EOS๋ฅผ ํฌํจํ์ฌ ์ค์ง ํ ํฐ์ ์ฐพ์ ์ ์์ต๋๋ค. ์์ฑ์ด ๋ฌดํ์ ์คํ๋ ์ ์์ต๋๋ค.") | |
print(f"์ฌ์ฉํ ์ค์ง ํ ํฐ ID: {stop_token_ids_list}") | |
except Exception as e: | |
print(f"!!! ๋ชจ๋ธ ๋ก๋ฉ ์ค๋ฅ: {e}") | |
if 'model' in locals() and model is not None: del model | |
if 'tokenizer' in locals() and tokenizer is not None: del tokenizer | |
gc.collect() | |
raise gr.Error(f"๋ชจ๋ธ {MODEL_ID} ๋ก๋ฉ์ ์คํจํ์ต๋๋ค. ์ ํ๋ฆฌ์ผ์ด์ ์ ์์ํ ์ ์์ต๋๋ค. ์ค๋ฅ: {e}") | |
# --- ์์คํ ํ๋กฌํํธ ์ ์ --- | |
def get_system_prompt(): | |
current_date = datetime.datetime.now().strftime("%Y-%m-%d (%A)") | |
return ( | |
f"- ์ค๋์ {current_date}์ ๋๋ค.\n" | |
f"- ์ฌ์ฉ์์ ์ง๋ฌธ์ ๋ํด ์น์ ํ๊ณ ์์ธํ๊ฒ ํ๊ตญ์ด๋ก ๋ต๋ณํด์ผ ํฉ๋๋ค." | |
) | |
# --- ์์ ํจ์ --- | |
def warmup_model(): | |
if not load_successful or model is None or tokenizer is None: | |
print("์์ ๊ฑด๋๋ฐ๊ธฐ: ๋ชจ๋ธ์ด ์ฑ๊ณต์ ์ผ๋ก ๋ก๋๋์ง ์์์ต๋๋ค.") | |
return | |
print("--- ๋ชจ๋ธ ์์ ์์ ---") | |
try: | |
start_warmup_time = time.time() | |
warmup_message = "์๋ ํ์ธ์" | |
# ๋ชจ๋ธ์ ๋ง๋ ํ์์ผ๋ก ์ ๋ ฅ ๊ตฌ์ฑ | |
system_prompt = get_system_prompt() | |
# MiMo ๋ชจ๋ธ์ ํ๋กฌํํธ ํ์์ ๋ง๊ฒ ์กฐ์ | |
prompt = f"Human: {warmup_message}\nAssistant:" | |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
# ์ค์ง ํ ํฐ์ด ๋น์ด ์๋์ง ํ์ธํ๊ณ ์ ์ ํ ์ฒ๋ฆฌ | |
gen_kwargs = { | |
"max_new_tokens": 10, | |
"pad_token_id": tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.pad_token_id, | |
"do_sample": False | |
} | |
if stop_token_ids_list: | |
gen_kwargs["eos_token_id"] = stop_token_ids_list | |
else: | |
print("์์ ๊ฒฝ๊ณ : ์์ฑ์ ์ ์๋ ์ค์ง ํ ํฐ์ด ์์ต๋๋ค.") | |
with torch.no_grad(): | |
output_ids = model.generate(**inputs, **gen_kwargs) | |
del inputs | |
del output_ids | |
gc.collect() | |
warmup_time = time.time() - start_warmup_time | |
print(f"--- ๋ชจ๋ธ ์์ ์๋ฃ: {warmup_time:.2f}์ด ์์ ---") | |
except Exception as e: | |
print(f"!!! ๋ชจ๋ธ ์์ ์ค ์ค๋ฅ ๋ฐ์: {e}") | |
finally: | |
gc.collect() | |
# --- ์ถ๋ก ํจ์ --- | |
def predict(message, history): | |
""" | |
HyperCLOVAX-SEED-Vision-Instruct-3B ๋ชจ๋ธ์ ์ฌ์ฉํ์ฌ ์๋ต์ ์์ฑํฉ๋๋ค. | |
'history'๋ Gradio 'messages' ํ์์ ๊ฐ์ ํฉ๋๋ค: List[Dict]. | |
""" | |
if model is None or tokenizer is None: | |
return "์ค๋ฅ: ๋ชจ๋ธ์ด ๋ก๋๋์ง ์์์ต๋๋ค." | |
# ๋ํ ๊ธฐ๋ก ์ฒ๋ฆฌ | |
history_text = "" | |
if isinstance(history, list): | |
for turn in history: | |
if isinstance(turn, tuple) and len(turn) == 2: | |
history_text += f"Human: {turn[0]}\nAssistant: {turn[1]}\n" | |
# MiMo ๋ชจ๋ธ ์ ๋ ฅ ํ์์ ๋ง๊ฒ ํ๋กฌํํธ ๊ตฌ์ฑ | |
prompt = f"{history_text}Human: {message}\nAssistant:" | |
inputs = None | |
output_ids = None | |
try: | |
# ์ ๋ ฅ ์ค๋น | |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
input_length = inputs.input_ids.shape[1] | |
print(f"\n์ ๋ ฅ ํ ํฐ ์: {input_length}") | |
except Exception as e: | |
print(f"!!! ์ ๋ ฅ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}") | |
return f"์ค๋ฅ: ์ ๋ ฅ ํ์์ ์ฒ๋ฆฌํ๋ ์ค ๋ฌธ์ ๊ฐ ๋ฐ์ํ์ต๋๋ค. ({e})" | |
try: | |
print("์๋ต ์์ฑ ์ค...") | |
generation_start_time = time.time() | |
# ์์ฑ ์ธ์ ์ค๋น, ๋น์ด ์๋ stop_token_ids_list ์ฒ๋ฆฌ | |
gen_kwargs = { | |
"max_new_tokens": MAX_NEW_TOKENS, | |
"pad_token_id": tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.pad_token_id, | |
"do_sample": True, | |
"temperature": 0.7, | |
"top_p": 0.9, | |
"repetition_penalty": 1.1 | |
} | |
if stop_token_ids_list: | |
gen_kwargs["eos_token_id"] = stop_token_ids_list | |
else: | |
print("์์ฑ ๊ฒฝ๊ณ : ์ ์๋ ์ค์ง ํ ํฐ์ด ์์ต๋๋ค.") | |
with torch.no_grad(): | |
output_ids = model.generate(**inputs, **gen_kwargs) | |
generation_time = time.time() - generation_start_time | |
print(f"์์ฑ ์๋ฃ: {generation_time:.2f}์ด ์์.") | |
except Exception as e: | |
print(f"!!! ๋ชจ๋ธ ์์ฑ ์ค ์ค๋ฅ ๋ฐ์: {e}") | |
if inputs is not None: del inputs | |
if output_ids is not None: del output_ids | |
gc.collect() | |
return f"์ค๋ฅ: ์๋ต์ ์์ฑํ๋ ์ค ๋ฌธ์ ๊ฐ ๋ฐ์ํ์ต๋๋ค. ({e})" | |
# ์๋ต ๋์ฝ๋ฉ | |
response = "์ค๋ฅ: ์๋ต ์์ฑ์ ์คํจํ์ต๋๋ค." | |
if output_ids is not None: | |
try: | |
new_tokens = output_ids[0, input_length:] | |
response = tokenizer.decode(new_tokens, skip_special_tokens=True) | |
print(f"์ถ๋ ฅ ํ ํฐ ์: {len(new_tokens)}") | |
del new_tokens | |
except Exception as e: | |
print(f"!!! ์๋ต ๋์ฝ๋ฉ ์ค ์ค๋ฅ ๋ฐ์: {e}") | |
response = "์ค๋ฅ: ์๋ต์ ๋์ฝ๋ฉํ๋ ์ค ๋ฌธ์ ๊ฐ ๋ฐ์ํ์ต๋๋ค." | |
# ๋ฉ๋ชจ๋ฆฌ ์ ๋ฆฌ | |
if inputs is not None: del inputs | |
if output_ids is not None: del output_ids | |
gc.collect() | |
print("๋ฉ๋ชจ๋ฆฌ ์ ๋ฆฌ ์๋ฃ.") | |
return response.strip() | |
# --- Gradio ์ธํฐํ์ด์ค ์ค์ --- | |
print("--- Gradio ์ธํฐํ์ด์ค ์ค์ ์ค ---") | |
examples = [ | |
["์๋ ํ์ธ์! ์๊ธฐ์๊ฐ ์ข ํด์ฃผ์ธ์."], | |
["์ธ๊ณต์ง๋ฅ๊ณผ ๋จธ์ ๋ฌ๋์ ์ฐจ์ด์ ์ ๋ฌด์์ธ๊ฐ์?"], | |
["๋ฅ๋ฌ๋ ๋ชจ๋ธ ํ์ต ๊ณผ์ ์ ๋จ๊ณ๋ณ๋ก ์๋ ค์ฃผ์ธ์."], | |
["์ ์ฃผ๋ ์ฌํ ๊ณํ์ ์ธ์ฐ๊ณ ์๋๋ฐ, 3๋ฐ 4์ผ ์ถ์ฒ ์ฝ์ค ์ข ์๋ ค์ฃผ์ธ์."], | |
] | |
# ๋ชจ๋ธ ์ด๋ฆ์ ๋ง๊ฒ ํ์ดํ ์กฐ์ | |
title = "๐ค HyperCLOVAX-SEED-Vision-Instruct-3B" | |
# ChatInterface๋ฅผ ์ฌ์ฉํ์ฌ ์์ฒด Chatbot ์ปดํฌ๋ํธ ๊ด๋ฆฌ | |
demo = gr.ChatInterface( | |
fn=predict, | |
title=title, | |
description=( | |
f"**๋ชจ๋ธ:** {MODEL_ID}\n" | |
), | |
examples=examples, | |
cache_examples=False, | |
theme=gr.themes.Soft(), | |
) | |
# --- ์ ํ๋ฆฌ์ผ์ด์ ์คํ --- | |
if __name__ == "__main__": | |
if load_successful: | |
warmup_model() | |
else: | |
print("๋ชจ๋ธ ๋ก๋ฉ์ ์คํจํ์ฌ ์์ ์ ๊ฑด๋๋๋๋ค.") | |
print("--- Gradio ์ฑ ์คํ ์ค ---") | |
demo.queue().launch( | |
# share=True # ๊ณต๊ฐ ๋งํฌ๋ฅผ ์ํ๋ฉด ์ฃผ์ ํด์ | |
# server_name="0.0.0.0" # ๋ก์ปฌ ๋คํธ์ํฌ ์ ๊ทผ์ ์ํ๋ฉด ์ฃผ์ ํด์ | |
) |