Spaces:
Running
Running
import logging | |
import torch | |
import random | |
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed | |
class TextGenerator: | |
def __init__( | |
self, | |
model_name="gpt2", | |
device="cuda", | |
max_new_tokens=50, | |
temperature=1.0, | |
top_p=0.95, | |
seed=None | |
): | |
self.model_name = model_name | |
self.device = device | |
self.max_new_tokens = max_new_tokens | |
self.temperature = temperature | |
self.top_p = top_p | |
self.seed = seed | |
logging.info(f"[TextGenerator] Загрузка модели {model_name} на {device} ...") | |
self.tokenizer = AutoTokenizer.from_pretrained(model_name) | |
self.model = AutoModelForCausalLM.from_pretrained(model_name).to(device) | |
if seed is not None: | |
set_seed(seed) | |
logging.info(f"[TextGenerator] Сид генерации установлен через transformers.set_seed({seed})") | |
else: | |
logging.info("[TextGenerator] Сид генерации не установлен (seed=None)") | |
# --- Примеры для few-shot обучения --- | |
self.fewshot_examples = [ | |
("happy", "We finally made it!", "We finally made it! I’ve never felt so alive and proud of what we accomplished."), | |
("sad", "He didn't come back.", "He didn't come back. I waited all night, hoping to see him again."), | |
("anger", "Why would you do that?", "Why would you do that? You had no right to interfere!"), | |
("fear", "Did you hear that?", "Did you hear that? Something’s moving outside the window..."), | |
("surprise", "Oh wow, really?", "Oh wow, really? I didn’t see that coming at all!"), | |
("disgust", "That smell is awful.", "That smell is awful. I feel like I’m going to be sick."), | |
("neutral", "Let's meet at noon.", "Let's meet at noon. We’ll have plenty of time to talk then.") | |
] | |
def build_prompt(self, emotion: str, partial_text: str) -> str: | |
few_shot = random.sample(self.fewshot_examples, 2) | |
examples_str = "" | |
for emo, text, cont in few_shot: | |
examples_str += ( | |
f"Example:\n" | |
f"Emotion: {emo}\n" | |
f"Text: {text}\n" | |
f"Continuation: {cont}\n\n" | |
) | |
prompt = ( | |
"You are a helpful assistant that generates emotionally-aligned sentence continuations.\n" | |
"You must include the original sentence in the output, and then continue it in a fluent and emotionally appropriate way.\n\n" | |
f"{examples_str}" | |
f"Now try:\n" | |
f"Emotion: {emotion}\n" | |
f"Text: {partial_text}\n" | |
f"Continuation:" | |
) | |
return prompt | |
def generate_text(self, emotion: str, partial_text: str = "") -> str: | |
prompt = self.build_prompt(emotion, partial_text) | |
logging.debug(f"[TextGenerator] prompt:\n{prompt}") | |
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device) | |
output_ids = self.model.generate( | |
**inputs, | |
max_new_tokens=self.max_new_tokens, | |
do_sample=True, | |
top_p=self.top_p, | |
temperature=self.temperature, | |
pad_token_id=self.tokenizer.eos_token_id | |
) | |
full_text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
logging.debug(f"[TextGenerator] decoded:\n{full_text}") | |
# Вытаскиваем то, что идёт после последнего "Continuation:" | |
if "Continuation:" in full_text: | |
result = full_text.split("Continuation:")[-1].strip() | |
else: | |
result = full_text.strip() | |
result = result.split("\n")[0].strip() | |
return result | |