llm-demo1-finalfinal

Runtime error

App Files Files Community

llm-demo1-finalfinal / app.py

storresbusquets

Update app.py

6b0b7f4 over 1 year ago

raw

history blame

2.39 kB

	import gradio as gr
	import torch
	import transformers
	from langchain.llms import CTransformers
	from langchain import PromptTemplate, LLMChain
	from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

	# model = AutoModelForCausalLM.from_pretrained("marella/gpt-2-ggml", hf=True)
	# tokenizer = AutoTokenizer.from_pretrained(model)

	# access_token = os.getenv("Llama2")

	def greet(text):

	llm = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGML", model_file = 'llama-2-7b-chat.ggmlv3.q2_K.bin', callbacks=[StreamingStdOutCallbackHandler()])

	template = """
	[INST] <<SYS>>
	You are a helpful, respectful and honest assistant that performs summaries of text. Write a concise summary of the following text.
	<</SYS>>
	{text}[/INST]
	"""

	prompt = PromptTemplate(template=template, input_variables=["text"])
	llm_chain = LLMChain(prompt=prompt, llm=llm)
	summary = llm_chain.run(text)

	return summary




	# model = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML", model_file = 'llama-2-7b-chat.ggmlv3.q4_K_S.bin', hf=True)
	# tokenizer = AutoTokenizer.from_pretrained(model)

	# model = "meta-llama/Llama-2-7b-hf"
	# tokenizer = AutoTokenizer.from_pretrained(model, token=access_token)

	# pipeline = transformers.pipeline(
	# "text-generation",
	# model=model,
	# tokenizer=tokenizer,
	# torch_dtype=torch.bfloat16,
	# trust_remote_code=True,
	# device_map="auto",
	# max_length=512,
	# max_new_tokens=256,
	# do_sample=True,
	# top_k=10,
	# num_return_sequences=1,
	# eos_token_id=tokenizer.eos_token_id,
	# token=access_token
	# )

	# llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0,'repetition_penalty':1.1})

	# template = """Write a concise summary of the following:
	# "{text}"
	# CONCISE SUMMARY:"""

	# prompt = PromptTemplate(template=template, input_variables=["text"])
	# llm_chain = LLMChain(prompt=prompt, llm=llm)

	# return llm_chain.run(text)

	with gr.Blocks() as demo:

	text = gr.Textbox(label="Text")
	summary = gr.Textbox(label="Summary")
	greet_btn = gr.Button("Submit")
	clear = gr.ClearButton([text, summary])
	greet_btn.click(fn=greet, inputs=text, outputs=summary, api_name="greet")



	demo.launch()