Spaces:
No application file
No application file
Delete AmInSide
Browse files- AmInSide/0_data_gen/app.py +0 -14
- AmInSide/0_data_gen/generate_dataset.py +0 -22
- AmInSide/1_train/train.py +0 -35
- AmInSide/2_space/README.md +0 -6
- AmInSide/2_space/app.py +0 -14
- AmInSide/2_space/requirements.txt +0 -5
AmInSide/0_data_gen/app.py
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
2 |
-
import gradio as gr
|
3 |
-
|
4 |
-
model_id = "Hodely/AmInSide1.0" # Reemplaza con tu repo real
|
5 |
-
|
6 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
7 |
-
model = AutoModelForCausalLM.from_pretrained(model_id)
|
8 |
-
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
9 |
-
|
10 |
-
def chat(user_input):
|
11 |
-
result = pipe(user_input, max_new_tokens=200, temperature=0.7, do_sample=True)
|
12 |
-
return result[0]['generated_text']
|
13 |
-
|
14 |
-
gr.Interface(fn=chat, inputs="text", outputs="text", title="🧠 AmInSide1.0").launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
AmInSide/0_data_gen/generate_dataset.py
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
from transformers import pipeline
|
2 |
-
import json
|
3 |
-
|
4 |
-
generator = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct", tokenizer="mistralai/Mistral-7B-Instruct")
|
5 |
-
|
6 |
-
seed_prompts = [
|
7 |
-
"Dame 10 preguntas filosóficas con respuestas profundas.",
|
8 |
-
"Genera 5 ejemplos tipo ChatGPT con tono sarcástico pero sabio.",
|
9 |
-
"Crea 10 instrucciones para IA educativa con respuestas creativas."
|
10 |
-
]
|
11 |
-
|
12 |
-
output = []
|
13 |
-
for prompt in seed_prompts:
|
14 |
-
result = generator(prompt, max_new_tokens=512)[0]["generated_text"]
|
15 |
-
# ¡Aquí puedes separar y limpiar! Por ahora simplificamos
|
16 |
-
output.append({"instruction": prompt, "response": result})
|
17 |
-
|
18 |
-
with open("instruct_dataset.jsonl", "w", encoding="utf-8") as f:
|
19 |
-
for example in output:
|
20 |
-
f.write(json.dumps(example, ensure_ascii=False) + "\n")
|
21 |
-
|
22 |
-
print("✅ Dataset generado.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
AmInSide/1_train/train.py
DELETED
@@ -1,35 +0,0 @@
|
|
1 |
-
from datasets import load_dataset
|
2 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
|
3 |
-
from peft import get_peft_model, LoraConfig, TaskType
|
4 |
-
import torch
|
5 |
-
|
6 |
-
model_id = "microsoft/phi-3-mini-4k-instruct"
|
7 |
-
dataset_path = "../0_data_gen/instruct_dataset.jsonl"
|
8 |
-
|
9 |
-
# Carga dataset personalizado
|
10 |
-
data = load_dataset("json", data_files=dataset_path)
|
11 |
-
|
12 |
-
# Tokenización
|
13 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
14 |
-
def tokenize(example):
|
15 |
-
return tokenizer(f"<|user|>{example['instruction']}<|assistant|>{example['response']}", truncation=True, padding="max_length", max_length=512)
|
16 |
-
tokenized = data["train"].map(tokenize)
|
17 |
-
|
18 |
-
# Carga modelo + PEFT
|
19 |
-
model = AutoModelForCausalLM.from_pretrained(model_id)
|
20 |
-
peft_config = LoraConfig(task_type=TaskType.CAUSAL_LM, inference_mode=False, r=8, lora_alpha=16, lora_dropout=0.05)
|
21 |
-
model = get_peft_model(model, peft_config)
|
22 |
-
|
23 |
-
# Entrenamiento
|
24 |
-
training_args = TrainingArguments(
|
25 |
-
output_dir="./model",
|
26 |
-
per_device_train_batch_size=2,
|
27 |
-
num_train_epochs=3,
|
28 |
-
save_total_limit=1,
|
29 |
-
logging_steps=10,
|
30 |
-
learning_rate=2e-4,
|
31 |
-
fp16=torch.cuda.is_available()
|
32 |
-
)
|
33 |
-
|
34 |
-
trainer = Trainer(model=model, args=training_args, train_dataset=tokenized)
|
35 |
-
trainer.train()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
AmInSide/2_space/README.md
DELETED
@@ -1,6 +0,0 @@
|
|
1 |
-
# AmInSide1.0
|
2 |
-
Este es el modelo de lenguaje creado por Hodely. Entrenado con ejemplos generados por otro modelo, afinado para combinar inteligencia, humor y creatividad.
|
3 |
-
|
4 |
-
🔹 Entrenado con LoRA
|
5 |
-
🔹 Base: Phi-3 / Mistral
|
6 |
-
🔹 Fine-tuned en estilo instructivo con dataset propio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
AmInSide/2_space/app.py
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
2 |
-
import gradio as gr
|
3 |
-
|
4 |
-
model_id = "Hodely/AmInSide1.0" # Reemplaza con tu repo real
|
5 |
-
|
6 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
7 |
-
model = AutoModelForCausalLM.from_pretrained(model_id)
|
8 |
-
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
9 |
-
|
10 |
-
def chat(user_input):
|
11 |
-
result = pipe(user_input, max_new_tokens=200, temperature=0.7, do_sample=True)
|
12 |
-
return result[0]['generated_text']
|
13 |
-
|
14 |
-
gr.Interface(fn=chat, inputs="text", outputs="text", title="🧠 AmInSide1.0").launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
AmInSide/2_space/requirements.txt
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
transformers
|
2 |
-
gradio
|
3 |
-
peft
|
4 |
-
datasets
|
5 |
-
accelerate
|
|
|
|
|
|
|
|
|
|
|
|