Hodely commited on
Commit
aabab56
·
verified ·
1 Parent(s): 03cdea3

Upload 6 files

Browse files
0_data_gen/app.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
2
+ import gradio as gr
3
+
4
+ model_id = "Hodely/AmInSide1.0" # Reemplaza con tu repo real
5
+
6
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
7
+ model = AutoModelForCausalLM.from_pretrained(model_id)
8
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
9
+
10
+ def chat(user_input):
11
+ result = pipe(user_input, max_new_tokens=200, temperature=0.7, do_sample=True)
12
+ return result[0]['generated_text']
13
+
14
+ gr.Interface(fn=chat, inputs="text", outputs="text", title="🧠 AmInSide1.0").launch()
0_data_gen/generate_dataset.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ import json
3
+
4
+ generator = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct", tokenizer="mistralai/Mistral-7B-Instruct")
5
+
6
+ seed_prompts = [
7
+ "Dame 10 preguntas filosóficas con respuestas profundas.",
8
+ "Genera 5 ejemplos tipo ChatGPT con tono sarcástico pero sabio.",
9
+ "Crea 10 instrucciones para IA educativa con respuestas creativas."
10
+ ]
11
+
12
+ output = []
13
+ for prompt in seed_prompts:
14
+ result = generator(prompt, max_new_tokens=512)[0]["generated_text"]
15
+ # ¡Aquí puedes separar y limpiar! Por ahora simplificamos
16
+ output.append({"instruction": prompt, "response": result})
17
+
18
+ with open("instruct_dataset.jsonl", "w", encoding="utf-8") as f:
19
+ for example in output:
20
+ f.write(json.dumps(example, ensure_ascii=False) + "\n")
21
+
22
+ print("✅ Dataset generado.")
1_train/train.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
3
+ from peft import get_peft_model, LoraConfig, TaskType
4
+ import torch
5
+
6
+ model_id = "microsoft/phi-3-mini-4k-instruct"
7
+ dataset_path = "../0_data_gen/instruct_dataset.jsonl"
8
+
9
+ # Carga dataset personalizado
10
+ data = load_dataset("json", data_files=dataset_path)
11
+
12
+ # Tokenización
13
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
14
+ def tokenize(example):
15
+ return tokenizer(f"<|user|>{example['instruction']}<|assistant|>{example['response']}", truncation=True, padding="max_length", max_length=512)
16
+ tokenized = data["train"].map(tokenize)
17
+
18
+ # Carga modelo + PEFT
19
+ model = AutoModelForCausalLM.from_pretrained(model_id)
20
+ peft_config = LoraConfig(task_type=TaskType.CAUSAL_LM, inference_mode=False, r=8, lora_alpha=16, lora_dropout=0.05)
21
+ model = get_peft_model(model, peft_config)
22
+
23
+ # Entrenamiento
24
+ training_args = TrainingArguments(
25
+ output_dir="./model",
26
+ per_device_train_batch_size=2,
27
+ num_train_epochs=3,
28
+ save_total_limit=1,
29
+ logging_steps=10,
30
+ learning_rate=2e-4,
31
+ fp16=torch.cuda.is_available()
32
+ )
33
+
34
+ trainer = Trainer(model=model, args=training_args, train_dataset=tokenized)
35
+ trainer.train()
2_space/README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # AmInSide1.0
2
+ Este es el modelo de lenguaje creado por Hodely. Entrenado con ejemplos generados por otro modelo, afinado para combinar inteligencia, humor y creatividad.
3
+
4
+ 🔹 Entrenado con LoRA
5
+ 🔹 Base: Phi-3 / Mistral
6
+ 🔹 Fine-tuned en estilo instructivo con dataset propio
2_space/app.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
2
+ import gradio as gr
3
+
4
+ model_id = "Hodely/AmInSide1.0" # Reemplaza con tu repo real
5
+
6
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
7
+ model = AutoModelForCausalLM.from_pretrained(model_id)
8
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
9
+
10
+ def chat(user_input):
11
+ result = pipe(user_input, max_new_tokens=200, temperature=0.7, do_sample=True)
12
+ return result[0]['generated_text']
13
+
14
+ gr.Interface(fn=chat, inputs="text", outputs="text", title="🧠 AmInSide1.0").launch()
2_space/requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers
2
+ gradio
3
+ peft
4
+ datasets
5
+ accelerate