Hodely commited on
Commit
03cdea3
·
verified ·
1 Parent(s): 61dc560

Delete AmInSide

Browse files
AmInSide/0_data_gen/app.py DELETED
@@ -1,14 +0,0 @@
1
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
2
- import gradio as gr
3
-
4
- model_id = "Hodely/AmInSide1.0" # Reemplaza con tu repo real
5
-
6
- tokenizer = AutoTokenizer.from_pretrained(model_id)
7
- model = AutoModelForCausalLM.from_pretrained(model_id)
8
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
9
-
10
- def chat(user_input):
11
- result = pipe(user_input, max_new_tokens=200, temperature=0.7, do_sample=True)
12
- return result[0]['generated_text']
13
-
14
- gr.Interface(fn=chat, inputs="text", outputs="text", title="🧠 AmInSide1.0").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
AmInSide/0_data_gen/generate_dataset.py DELETED
@@ -1,22 +0,0 @@
1
- from transformers import pipeline
2
- import json
3
-
4
- generator = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct", tokenizer="mistralai/Mistral-7B-Instruct")
5
-
6
- seed_prompts = [
7
- "Dame 10 preguntas filosóficas con respuestas profundas.",
8
- "Genera 5 ejemplos tipo ChatGPT con tono sarcástico pero sabio.",
9
- "Crea 10 instrucciones para IA educativa con respuestas creativas."
10
- ]
11
-
12
- output = []
13
- for prompt in seed_prompts:
14
- result = generator(prompt, max_new_tokens=512)[0]["generated_text"]
15
- # ¡Aquí puedes separar y limpiar! Por ahora simplificamos
16
- output.append({"instruction": prompt, "response": result})
17
-
18
- with open("instruct_dataset.jsonl", "w", encoding="utf-8") as f:
19
- for example in output:
20
- f.write(json.dumps(example, ensure_ascii=False) + "\n")
21
-
22
- print("✅ Dataset generado.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
AmInSide/1_train/train.py DELETED
@@ -1,35 +0,0 @@
1
- from datasets import load_dataset
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
3
- from peft import get_peft_model, LoraConfig, TaskType
4
- import torch
5
-
6
- model_id = "microsoft/phi-3-mini-4k-instruct"
7
- dataset_path = "../0_data_gen/instruct_dataset.jsonl"
8
-
9
- # Carga dataset personalizado
10
- data = load_dataset("json", data_files=dataset_path)
11
-
12
- # Tokenización
13
- tokenizer = AutoTokenizer.from_pretrained(model_id)
14
- def tokenize(example):
15
- return tokenizer(f"<|user|>{example['instruction']}<|assistant|>{example['response']}", truncation=True, padding="max_length", max_length=512)
16
- tokenized = data["train"].map(tokenize)
17
-
18
- # Carga modelo + PEFT
19
- model = AutoModelForCausalLM.from_pretrained(model_id)
20
- peft_config = LoraConfig(task_type=TaskType.CAUSAL_LM, inference_mode=False, r=8, lora_alpha=16, lora_dropout=0.05)
21
- model = get_peft_model(model, peft_config)
22
-
23
- # Entrenamiento
24
- training_args = TrainingArguments(
25
- output_dir="./model",
26
- per_device_train_batch_size=2,
27
- num_train_epochs=3,
28
- save_total_limit=1,
29
- logging_steps=10,
30
- learning_rate=2e-4,
31
- fp16=torch.cuda.is_available()
32
- )
33
-
34
- trainer = Trainer(model=model, args=training_args, train_dataset=tokenized)
35
- trainer.train()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
AmInSide/2_space/README.md DELETED
@@ -1,6 +0,0 @@
1
- # AmInSide1.0
2
- Este es el modelo de lenguaje creado por Hodely. Entrenado con ejemplos generados por otro modelo, afinado para combinar inteligencia, humor y creatividad.
3
-
4
- 🔹 Entrenado con LoRA
5
- 🔹 Base: Phi-3 / Mistral
6
- 🔹 Fine-tuned en estilo instructivo con dataset propio
 
 
 
 
 
 
 
AmInSide/2_space/app.py DELETED
@@ -1,14 +0,0 @@
1
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
2
- import gradio as gr
3
-
4
- model_id = "Hodely/AmInSide1.0" # Reemplaza con tu repo real
5
-
6
- tokenizer = AutoTokenizer.from_pretrained(model_id)
7
- model = AutoModelForCausalLM.from_pretrained(model_id)
8
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
9
-
10
- def chat(user_input):
11
- result = pipe(user_input, max_new_tokens=200, temperature=0.7, do_sample=True)
12
- return result[0]['generated_text']
13
-
14
- gr.Interface(fn=chat, inputs="text", outputs="text", title="🧠 AmInSide1.0").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
AmInSide/2_space/requirements.txt DELETED
@@ -1,5 +0,0 @@
1
- transformers
2
- gradio
3
- peft
4
- datasets
5
- accelerate