Spaces:
Running
Running
File size: 6,589 Bytes
9d2bb4c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
"""
Hugging Face Space App for Free H200 Training
This app runs nano-coder training on HF's free H200 GPU (4 minutes daily)
"""
import os
import subprocess
import time
import gradio as gr
from datetime import datetime, timedelta
# Configuration
MAX_TRAINING_TIME = 3.5 * 60 # 3.5 minutes to be safe
TRAINING_SCRIPT = "hf_free_training.py"
DATA_PREP_SCRIPT = "prepare_code_dataset.py"
def check_daily_limit():
"""Check if we've used today's free H200 time."""
today = datetime.now().date()
limit_file = f"daily_limit_{today}.txt"
if os.path.exists(limit_file):
with open(limit_file, 'r') as f:
last_run = f.read().strip()
if last_run == str(today):
return False, "Daily H200 limit reached. Try again tomorrow!"
return True, "Ready to train!"
def mark_daily_usage():
"""Mark that we've used today's free time."""
today = datetime.now().date()
limit_file = f"daily_limit_{today}.txt"
with open(limit_file, 'w') as f:
f.write(str(today))
def run_training():
"""Run the free H200 training."""
# Check daily limit
can_run, message = check_daily_limit()
if not can_run:
return message
try:
# Mark usage
mark_daily_usage()
# Prepare dataset if not already done
if not os.path.exists("data/python-codes-25k/train.bin"):
print("Preparing dataset...")
subprocess.run(["python", DATA_PREP_SCRIPT], check=True)
# Run training
print("Starting free H200 training...")
start_time = time.time()
# Run training with timeout
process = subprocess.Popen(
["python", TRAINING_SCRIPT],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True
)
output_lines = []
while True:
elapsed = time.time() - start_time
if elapsed > MAX_TRAINING_TIME:
process.terminate()
output_lines.append(f"\nβ° Time limit reached ({elapsed/60:.1f} minutes)")
break
line = process.stdout.readline()
if not line and process.poll() is not None:
break
if line:
output_lines.append(line.strip())
print(line.strip())
# Wait for process to finish
process.wait()
# Check if training completed successfully
if process.returncode == 0:
result = "β
Training completed successfully!\n\n" + "\n".join(output_lines[-20:]) # Last 20 lines
else:
result = "β Training failed or was interrupted.\n\n" + "\n".join(output_lines[-20:])
return result
except Exception as e:
return f"β Error during training: {str(e)}"
def check_model_status():
"""Check if trained model exists."""
model_path = "out-nano-coder-free/ckpt.pt"
if os.path.exists(model_path):
# Get file size
size = os.path.getsize(model_path) / (1024 * 1024) # MB
return f"β
Model found! Size: {size:.1f} MB"
else:
return "β No trained model found. Run training first."
def generate_sample_code(prompt, max_tokens=100, temperature=0.8):
"""Generate code using the trained model."""
if not os.path.exists("out-nano-coder-free/ckpt.pt"):
return "β No trained model found. Please run training first."
try:
# Import and run sampling
from sample_nano_coder import load_model, load_vocab, generate_code
model, checkpoint = load_model()
stoi, itos = load_vocab()
# Generate code
completion = generate_code(model, stoi, itos, prompt, max_tokens, temperature, 200)
return f"Generated code:\n\n{completion}"
except Exception as e:
return f"β Error generating code: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="Nano-Coder Free H200 Training") as demo:
gr.Markdown("# π Nano-Coder Free H200 Training")
gr.Markdown("Train a nanoGPT model for Python code generation using Hugging Face's free H200 GPU (4 minutes daily)")
with gr.Row():
with gr.Column():
gr.Markdown("### π― Training Control")
train_button = gr.Button("π Start Free H200 Training", variant="primary")
status_text = gr.Textbox(label="Training Status", lines=10, interactive=False)
with gr.Column():
gr.Markdown("### π Model Status")
model_status_button = gr.Button("π Check Model Status")
model_status_text = gr.Textbox(label="Model Status", lines=2, interactive=False)
with gr.Row():
with gr.Column():
gr.Markdown("### π¨ Code Generation")
code_prompt = gr.Textbox(
label="Code Prompt",
placeholder="def fibonacci(n):\n ",
lines=3
)
with gr.Row():
max_tokens = gr.Slider(50, 500, 100, label="Max Tokens")
temperature = gr.Slider(0.1, 2.0, 0.8, label="Temperature")
generate_button = gr.Button("β¨ Generate Code")
generated_code = gr.Textbox(label="Generated Code", lines=10, interactive=False)
# Event handlers
train_button.click(
fn=run_training,
outputs=status_text
)
model_status_button.click(
fn=check_model_status,
outputs=model_status_text
)
generate_button.click(
fn=generate_sample_code,
inputs=[code_prompt, max_tokens, temperature],
outputs=generated_code
)
gr.Markdown("""
### π Instructions
1. **Daily Limit**: You get 4 minutes of free H200 GPU time per day
2. **Training**: Click "Start Free H200 Training" to begin
3. **Model**: Check model status after training
4. **Generation**: Use the trained model to generate Python code
### βοΈ Model Configuration (Free Tier)
- **Layers**: 6 (reduced from 12)
- **Heads**: 6 (reduced from 12)
- **Embedding**: 384 (reduced from 768)
- **Context**: 512 tokens
- **Parameters**: ~15M (vs 124M full model)
### π‘ Tips
- Training automatically stops at 3.5 minutes to be safe
- Model checkpoints are saved to HF Hub
- Use shorter prompts for better results
""")
if __name__ == "__main__":
demo.launch() |