G1 / app.py
PKU-ML's picture
Update app.py
d7d384c verified
raw
history blame
2.21 kB
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
# GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model_path = "PKU-ML/G1-7B"
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype="auto",
device_map="auto"
).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_path)
INSTRUCTION_TEMPLATE = """
{instruction}
Solve the above problem efficiently and clearly. The last line of your response should be of the following format: 'Therefore, the final answer is: $\\boxed{{ANSWER}}$. I hope it is correct' (without quotes) where ANSWER is just the final number or expression that solves the problem. Think step by step before answering.
""".strip()
def generate_response(prompt):
model.eval()
messages = [
{"role": "user", "content": INSTRUCTION_TEMPLATE.format(instruction=prompt)}
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
generated_ids = model.generate(
**model_inputs,
max_new_tokens=4096,
top_p=0.95,
top_k=30,
temperature=0.6
)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
return response
interface = gr.Interface(
fn=generate_response,
inputs=[
gr.Textbox(label="Your Message", placeholder="Write your question..."),
# gr.Slider(label="Max Length", minimum=50, maximum=200, step=10, value=100),
# gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.05, value=0.65),
# gr.Slider(label="Top-p (nucleus)", minimum=0.1, maximum=1.0, step=0.05, value=0.8),
],
outputs=gr.Textbox(label="Response"),
title="G1",
description="Ask a graph reasoning question",
theme="huggingface",
)
if __name__ == "__main__":
interface.launch()