futzone commited on
Commit
48b3ff3
Β·
1 Parent(s): 311ac7e
Files changed (1) hide show
  1. app.py +69 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
3
+ from peft import get_peft_model, LoraConfig, TaskType
4
+ from datasets import load_dataset
5
+ import torch
6
+
7
+ st.title("🧠 Llama 3.1 8B Uzbek Fine-Tuning")
8
+
9
+
10
+ @st.cache_resource
11
+ def load_tokenizer_model(model_id):
12
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ model_id,
15
+ load_in_8bit=True,
16
+ device_map="auto"
17
+ )
18
+ return tokenizer, model
19
+
20
+
21
+ def tokenize_function(example, tokenizer):
22
+ prompt = f"<s>[INST] {example['instruction']} [/INST] {example['output']} </s>"
23
+ return tokenizer(prompt, truncation=True, padding="max_length", max_length=512)
24
+
25
+
26
+ def fine_tune(model_id, dataset_path):
27
+ tokenizer, model = load_tokenizer_model(model_id)
28
+
29
+ # LoRA config
30
+ lora_config = LoraConfig(
31
+ task_type=TaskType.CAUSAL_LM,
32
+ r=8,
33
+ lora_alpha=32,
34
+ lora_dropout=0.1
35
+ )
36
+ model = get_peft_model(model, lora_config)
37
+
38
+ dataset = load_dataset("json", data_files=dataset_path)
39
+ tokenized = dataset["train"].map(lambda x: tokenize_function(x, tokenizer))
40
+
41
+ training_args = TrainingArguments(
42
+ per_device_train_batch_size=1,
43
+ num_train_epochs=2,
44
+ output_dir="./results",
45
+ logging_dir="./logs",
46
+ save_strategy="no",
47
+ learning_rate=2e-4
48
+ )
49
+
50
+ trainer = Trainer(
51
+ model=model,
52
+ args=training_args,
53
+ train_dataset=tokenized
54
+ )
55
+
56
+ trainer.train()
57
+ model.save_pretrained("finetuned_model")
58
+ tokenizer.save_pretrained("finetuned_model")
59
+ return "βœ… Fine-tuning tugadi!"
60
+
61
+
62
+ # πŸ“Œ UI
63
+ model_id = st.text_input("Model ID (masalan: qahorov/Llama-3-1-8B-Uzbek)", "qahorov/Llama-3-1-8B-Uzbek")
64
+ dataset_path = st.text_input("Dataset path (masalan: datasets/train.json)", "datasets/train.json")
65
+
66
+ if st.button("πŸš€ Start Fine-Tuning"):
67
+ st.warning("Fine-tuning boshlandi! Bu vaqt oladi...")
68
+ status = fine_tune(model_id, dataset_path)
69
+ st.success(status)