Spaces:
Sleeping
Sleeping
File size: 2,406 Bytes
38742d7 6634f63 53c0aa9 3de2dd6 734f1a7 3de2dd6 1978c10 113c3ed cfee1b3 d1485a4 d447070 f0def04 d447070 dd32415 d447070 f0def04 d447070 6634f63 ca391f9 a50a704 d447070 ca391f9 075e4d8 a1a08d2 075e4d8 d447070 29ba4e2 e50fa51 38742d7 e73c7fc 6b84df3 e73c7fc 38742d7 d1485a4 6634f63 38742d7 ca391f9 38742d7 7be16c8 f0def04 1978c10 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import gradio as gr
from functools import lru_cache
import os
import spaces
import gradio as gr
from transformers import AutoTokenizer, AutoModel,AutoModelForCausalLM
import torch
# 假设openai_client已定义,例如:
device = "cuda"
MODEL_NAME = "kevinpro/R-PRM-7B-DPO"
print("Start dowload")
def load_model():
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,torch_dtype="bfloat16").to(device)
print(f"Model loaded in {device}")
return model
model = load_model()
print("Ednd dowload")
# Loading the tokenizer once, because re-loading it takes about 1.5 seconds each time
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
@lru_cache(maxsize=100)
def translate(text: str):
return _translate(text)
# Only assign GPU if cache not used
@spaces.GPU
def _translate(text: str):
input_tokens = (
tokenizer(text, return_tensors="pt")
.input_ids[0]
.cpu()
.numpy()
.tolist()
)
translated_chunk = model.generate(
input_ids=torch.tensor([input_tokens]).to(device),
max_length=len(input_tokens) + 2048,
num_return_sequences=1,
)
full_output = tokenizer.decode(translated_chunk[0], skip_special_tokens=True).strip()
print(full_output)
return full_output
description = """
<div style="text-align: center;">
<h1 style="color: #0077be; font-size: 3em;">R-PRM, powered by NJUNLP</h1>
<h3 style="font-size: 3em;">🚀 We introduce Reasoning-Driven Process Reward Modeling (R-PRM), a novel approach that enhances LLMs' ability to evaluate mathematical reasoning step-by-step. By leveraging stronger LLMs to generate seed data, optimizing preferences without additional annotations, and scaling inference-time computation, R-PRM delivers comprehensive, transparent, and robust assessments of reasoning processes.</h3>
</div>
"""
examples_inputs = [["test"]]
with gr.Blocks() as demo:
gr.Markdown(description)
with gr.Row():
input_text = gr.Textbox(label="Input Text", lines=6)
with gr.Row():
btn = gr.Button("Translate text")
with gr.Row():
output = gr.Textbox(label="Output Text", lines=6)
btn.click(
translate,
inputs=[input_text],
outputs=output,
)
examples = gr.Examples(examples=examples_inputs,inputs=[input_text], fn=translate, outputs=output, cache_examples=True)
print("Prepared")
demo.launch()
|