Spaces:

LISA-Kadi
/

LISA-demo

Running

App Files Files Community

LISA-demo / llms.py

Kadi-IAM

Remove not used codes

646f8c2 10 months ago

raw

history blame

2.22 kB

	# from langchain import HuggingFaceHub, LLMChain
	from langchain.llms import HuggingFacePipeline
	from transformers import (
	AutoModelForCausalLM,
	AutoTokenizer,
	pipeline,
	)
	from transformers import LlamaForCausalLM, AutoModelForCausalLM, LlamaTokenizer
	from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
	from langchain_groq import ChatGroq


	from langchain.chat_models import ChatOpenAI
	from langchain.llms import HuggingFaceTextGenInference


	def get_llm_hf_online(inference_api_url=""):
	"""Get LLM using huggingface inference."""

	if not inference_api_url: # default api url
	inference_api_url = (
	"https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
	)

	llm = HuggingFaceTextGenInference(
	verbose=True, # Provides detailed logs of operation
	max_new_tokens=1024, # Maximum number of token that can be generated.
	top_p=0.95, # Threshold for controlling randomness in text generation process.
	temperature=0.1,
	inference_server_url=inference_api_url,
	timeout=10, # Timeout for connection with the url
	)

	return llm


	def get_llm_hf_local(model_path):
	"""Get local LLM."""

	model = LlamaForCausalLM.from_pretrained(
	model_path, device_map="auto"
	)
	tokenizer = AutoTokenizer.from_pretrained(model_path)

	# print('making a pipeline...')
	# max_length has typically been deprecated for max_new_tokens
	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	max_new_tokens=1024, # better setting?
	model_kwargs={"temperature": 0.1}, # better setting?
	)
	llm = HuggingFacePipeline(pipeline=pipe)

	return llm



	def get_llm_openai_chat(model_name, inference_server_url):
	"""Get openai-like LLM."""

	llm = ChatOpenAI(
	model=model_name,
	openai_api_key="EMPTY",
	openai_api_base=inference_server_url,
	max_tokens=1024, # better setting?
	temperature=0,
	)

	return llm


	def get_groq_chat(model_name="llama-3.1-70b-versatile"):

	llm = ChatGroq(temperature=0, model_name=model_name)
	return llm