Spaces:
Runtime error
Runtime error
File size: 2,389 Bytes
cb67dcf 4e5a5be 53f76b1 48d5a82 5ed2636 4cb6632 cb67dcf 48d5a82 d84d90d 374cee2 cb67dcf 48d5a82 5ed2636 53f76b1 48d5a82 01748e7 48d5a82 01748e7 48d5a82 01748e7 6b0b7f4 53f76b1 48d5a82 53f76b1 48d5a82 5b87039 48d5a82 5b87039 48d5a82 cb67dcf e80f947 cb67dcf eeb493a 2f1bde3 9d6a48d cb67dcf 761feb6 cb67dcf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import gradio as gr
import torch
import transformers
from langchain.llms import CTransformers
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
# model = AutoModelForCausalLM.from_pretrained("marella/gpt-2-ggml", hf=True)
# tokenizer = AutoTokenizer.from_pretrained(model)
# access_token = os.getenv("Llama2")
def greet(text):
llm = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGML", model_file = 'llama-2-7b-chat.ggmlv3.q2_K.bin', callbacks=[StreamingStdOutCallbackHandler()])
template = """
[INST] <<SYS>>
You are a helpful, respectful and honest assistant that performs summaries of text. Write a concise summary of the following text.
<</SYS>>
{text}[/INST]
"""
prompt = PromptTemplate(template=template, input_variables=["text"])
llm_chain = LLMChain(prompt=prompt, llm=llm)
summary = llm_chain.run(text)
return summary
# model = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML", model_file = 'llama-2-7b-chat.ggmlv3.q4_K_S.bin', hf=True)
# tokenizer = AutoTokenizer.from_pretrained(model)
# model = "meta-llama/Llama-2-7b-hf"
# tokenizer = AutoTokenizer.from_pretrained(model, token=access_token)
# pipeline = transformers.pipeline(
# "text-generation",
# model=model,
# tokenizer=tokenizer,
# torch_dtype=torch.bfloat16,
# trust_remote_code=True,
# device_map="auto",
# max_length=512,
# max_new_tokens=256,
# do_sample=True,
# top_k=10,
# num_return_sequences=1,
# eos_token_id=tokenizer.eos_token_id,
# token=access_token
# )
# llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0,'repetition_penalty':1.1})
# template = """Write a concise summary of the following:
# "{text}"
# CONCISE SUMMARY:"""
# prompt = PromptTemplate(template=template, input_variables=["text"])
# llm_chain = LLMChain(prompt=prompt, llm=llm)
# return llm_chain.run(text)
with gr.Blocks() as demo:
text = gr.Textbox(label="Text")
summary = gr.Textbox(label="Summary")
greet_btn = gr.Button("Submit")
clear = gr.ClearButton([text, summary])
greet_btn.click(fn=greet, inputs=text, outputs=summary, api_name="greet")
demo.launch() |