Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
import transformers | |
from langchain.llms import CTransformers | |
from langchain import PromptTemplate, LLMChain | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
# model = AutoModelForCausalLM.from_pretrained("marella/gpt-2-ggml", hf=True) | |
# tokenizer = AutoTokenizer.from_pretrained(model) | |
# access_token = os.getenv("Llama2") | |
def greet(text): | |
llm = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGML", model_file = 'llama-2-7b-chat.ggmlv3.q2_K.bin', callbacks=[StreamingStdOutCallbackHandler()]) | |
template = """ | |
[INST] <<SYS>> | |
You are a helpful, respectful and honest assistant that performs summaries of text. Write a concise summary of the following text. | |
<</SYS>> | |
{text}[/INST] | |
""" | |
prompt = PromptTemplate(template=template, input_variables=["text"]) | |
llm_chain = LLMChain(prompt=prompt, llm=llm) | |
summary = llm_chain.run(text) | |
return summary | |
# model = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML", model_file = 'llama-2-7b-chat.ggmlv3.q4_K_S.bin', hf=True) | |
# tokenizer = AutoTokenizer.from_pretrained(model) | |
# model = "meta-llama/Llama-2-7b-hf" | |
# tokenizer = AutoTokenizer.from_pretrained(model, token=access_token) | |
# pipeline = transformers.pipeline( | |
# "text-generation", | |
# model=model, | |
# tokenizer=tokenizer, | |
# torch_dtype=torch.bfloat16, | |
# trust_remote_code=True, | |
# device_map="auto", | |
# max_length=512, | |
# max_new_tokens=256, | |
# do_sample=True, | |
# top_k=10, | |
# num_return_sequences=1, | |
# eos_token_id=tokenizer.eos_token_id, | |
# token=access_token | |
# ) | |
# llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0,'repetition_penalty':1.1}) | |
# template = """Write a concise summary of the following: | |
# "{text}" | |
# CONCISE SUMMARY:""" | |
# prompt = PromptTemplate(template=template, input_variables=["text"]) | |
# llm_chain = LLMChain(prompt=prompt, llm=llm) | |
# return llm_chain.run(text) | |
with gr.Blocks() as demo: | |
text = gr.Textbox(label="Text") | |
summary = gr.Textbox(label="Summary") | |
greet_btn = gr.Button("Submit") | |
clear = gr.ClearButton([text, summary]) | |
greet_btn.click(fn=greet, inputs=text, outputs=summary, api_name="greet") | |
demo.launch() |