Spaces:

PKU-ML
/

G1

Runtime error

App Files Files Community

G1 / app.py

PKU-ML

Update app.py

d7d384c verified 4 months ago

raw

history blame

2.21 kB

	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import gradio as gr



	# GPU
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"Using device: {device}")


	model_path = "PKU-ML/G1-7B"
	print("Loading model...")
	model = AutoModelForCausalLM.from_pretrained(
	model_path,
	torch_dtype="auto",
	device_map="auto"
	).to(device)
	tokenizer = AutoTokenizer.from_pretrained(model_path)


	INSTRUCTION_TEMPLATE = """
	{instruction}

	Solve the above problem efficiently and clearly. The last line of your response should be of the following format: 'Therefore, the final answer is: $\\boxed{{ANSWER}}$. I hope it is correct' (without quotes) where ANSWER is just the final number or expression that solves the problem. Think step by step before answering.
	""".strip()



	def generate_response(prompt):
	model.eval()

	messages = [
	{"role": "user", "content": INSTRUCTION_TEMPLATE.format(instruction=prompt)}
	]
	text = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)
	model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

	generated_ids = model.generate(
	**model_inputs,
	max_new_tokens=4096,
	top_p=0.95,
	top_k=30,
	temperature=0.6
	)
	generated_ids = [
	output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
	]

	response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
	return response


	interface = gr.Interface(
	fn=generate_response,
	inputs=[
	gr.Textbox(label="Your Message", placeholder="Write your question..."),
	# gr.Slider(label="Max Length", minimum=50, maximum=200, step=10, value=100),
	# gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.05, value=0.65),
	# gr.Slider(label="Top-p (nucleus)", minimum=0.1, maximum=1.0, step=0.05, value=0.8),
	],
	outputs=gr.Textbox(label="Response"),
	title="G1",
	description="Ask a graph reasoning question",
	theme="huggingface",
	)

	if __name__ == "__main__":
	interface.launch()