llm-demo1-finalfinal

Runtime error

File size: 2,389 Bytes

cb67dcf
4e5a5be
53f76b1
48d5a82
 
 
5ed2636
4cb6632
 
cb67dcf
48d5a82
d84d90d
374cee2
cb67dcf
48d5a82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ed2636
 
 
53f76b1
48d5a82
 
 
 
01748e7
 
 
48d5a82
 
 
01748e7
48d5a82
 
01748e7
6b0b7f4
53f76b1
48d5a82
53f76b1
48d5a82
 
 
5b87039
48d5a82
 
5b87039
48d5a82
cb67dcf
 
e80f947
cb67dcf
 
 
eeb493a
2f1bde3
9d6a48d
cb67dcf
761feb6
cb67dcf

import gradio as gr
import torch
import transformers
from langchain.llms import CTransformers
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# model = AutoModelForCausalLM.from_pretrained("marella/gpt-2-ggml", hf=True)
# tokenizer = AutoTokenizer.from_pretrained(model)

# access_token = os.getenv("Llama2")

def greet(text):

    llm = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGML", model_file = 'llama-2-7b-chat.ggmlv3.q2_K.bin', callbacks=[StreamingStdOutCallbackHandler()])
    
    template = """
    [INST] <<SYS>>
    You are a helpful, respectful and honest assistant that performs summaries of text. Write a concise summary of the following text.
    <</SYS>>
    {text}[/INST]
    """
    
    prompt = PromptTemplate(template=template, input_variables=["text"])
    llm_chain = LLMChain(prompt=prompt, llm=llm)
    summary = llm_chain.run(text)

    return summary
    
    
    
    
    # model = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML", model_file = 'llama-2-7b-chat.ggmlv3.q4_K_S.bin', hf=True)
    # tokenizer = AutoTokenizer.from_pretrained(model)
    
    # model = "meta-llama/Llama-2-7b-hf"
    # tokenizer = AutoTokenizer.from_pretrained(model, token=access_token)
    
    # pipeline = transformers.pipeline(
    # "text-generation",
    # model=model,
    # tokenizer=tokenizer,
    # torch_dtype=torch.bfloat16,
    # trust_remote_code=True,
    # device_map="auto",
    # max_length=512,
    # max_new_tokens=256,
    # do_sample=True,
    # top_k=10,
    # num_return_sequences=1,
    # eos_token_id=tokenizer.eos_token_id,
    # token=access_token
    # )

    # llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0,'repetition_penalty':1.1})

    # template = """Write a concise summary of the following:
                # "{text}"
                # CONCISE SUMMARY:"""

    # prompt = PromptTemplate(template=template, input_variables=["text"])
    # llm_chain = LLMChain(prompt=prompt, llm=llm)
    
    # return llm_chain.run(text)

with gr.Blocks() as demo:

    text = gr.Textbox(label="Text")
    summary = gr.Textbox(label="Summary")
    greet_btn = gr.Button("Submit")
    clear = gr.ClearButton([text, summary])
    greet_btn.click(fn=greet, inputs=text, outputs=summary, api_name="greet")
    


demo.launch()