import gradio as gr import torch import transformers from langchain.llms import CTransformers from langchain import PromptTemplate, LLMChain from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler # model = AutoModelForCausalLM.from_pretrained("marella/gpt-2-ggml", hf=True) # tokenizer = AutoTokenizer.from_pretrained(model) # access_token = os.getenv("Llama2") def greet(text): llm = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGML", model_file = 'llama-2-7b-chat.ggmlv3.q2_K.bin', callbacks=[StreamingStdOutCallbackHandler()]) template = """ [INST] <> You are a helpful, respectful and honest assistant that performs summaries of text. Write a concise summary of the following text. <> {text}[/INST] """ prompt = PromptTemplate(template=template, input_variables=["text"]) llm_chain = LLMChain(prompt=prompt, llm=llm) summary = llm_chain.run(text) return summary # model = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML", model_file = 'llama-2-7b-chat.ggmlv3.q4_K_S.bin', hf=True) # tokenizer = AutoTokenizer.from_pretrained(model) # model = "meta-llama/Llama-2-7b-hf" # tokenizer = AutoTokenizer.from_pretrained(model, token=access_token) # pipeline = transformers.pipeline( # "text-generation", # model=model, # tokenizer=tokenizer, # torch_dtype=torch.bfloat16, # trust_remote_code=True, # device_map="auto", # max_length=512, # max_new_tokens=256, # do_sample=True, # top_k=10, # num_return_sequences=1, # eos_token_id=tokenizer.eos_token_id, # token=access_token ) # llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0,'repetition_penalty':1.1}) # template = """Write a concise summary of the following: # "{text}" # CONCISE SUMMARY:""" # prompt = PromptTemplate(template=template, input_variables=["text"]) # llm_chain = LLMChain(prompt=prompt, llm=llm) # return llm_chain.run(text) with gr.Blocks() as demo: text = gr.Textbox(label="Text") summary = gr.Textbox(label="Summary") greet_btn = gr.Button("Submit") clear = gr.ClearButton([text, summary]) greet_btn.click(fn=greet, inputs=text, outputs=summary, api_name="greet") demo.launch()