|
|
|
from langchain.llms import HuggingFacePipeline |
|
from transformers import ( |
|
AutoModelForCausalLM, |
|
AutoTokenizer, |
|
pipeline, |
|
) |
|
from transformers import LlamaForCausalLM, AutoModelForCausalLM, LlamaTokenizer |
|
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler |
|
from langchain_groq import ChatGroq |
|
|
|
|
|
from langchain.chat_models import ChatOpenAI |
|
from langchain.llms import HuggingFaceTextGenInference |
|
|
|
|
|
def get_llm_hf_online(inference_api_url=""): |
|
"""Get LLM using huggingface inference.""" |
|
|
|
if not inference_api_url: |
|
inference_api_url = ( |
|
"https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta" |
|
) |
|
|
|
llm = HuggingFaceTextGenInference( |
|
verbose=True, |
|
max_new_tokens=1024, |
|
top_p=0.95, |
|
temperature=0.1, |
|
inference_server_url=inference_api_url, |
|
timeout=10, |
|
) |
|
|
|
return llm |
|
|
|
|
|
def get_llm_hf_local(model_path): |
|
"""Get local LLM.""" |
|
|
|
model = LlamaForCausalLM.from_pretrained( |
|
model_path, device_map="auto" |
|
) |
|
tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
|
|
|
|
|
|
pipe = pipeline( |
|
"text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
max_new_tokens=1024, |
|
model_kwargs={"temperature": 0.1}, |
|
) |
|
llm = HuggingFacePipeline(pipeline=pipe) |
|
|
|
return llm |
|
|
|
|
|
|
|
def get_llm_openai_chat(model_name, inference_server_url): |
|
"""Get openai-like LLM.""" |
|
|
|
llm = ChatOpenAI( |
|
model=model_name, |
|
openai_api_key="EMPTY", |
|
openai_api_base=inference_server_url, |
|
max_tokens=1024, |
|
temperature=0, |
|
) |
|
|
|
return llm |
|
|
|
|
|
def get_groq_chat(model_name="llama-3.1-70b-versatile"): |
|
|
|
llm = ChatGroq(temperature=0, model_name=model_name) |
|
return llm |