boryasbora commited on
Commit
e71849e
·
verified ·
1 Parent(s): 9d19f68

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -10
app.py CHANGED
@@ -21,7 +21,12 @@ os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
21
  os.environ['LANGCHAIN_API_KEY'] = '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'
22
 
23
 
24
-
 
 
 
 
 
25
  def load_from_pickle(filename):
26
  with open(filename, "rb") as file:
27
  return pickle.load(file)
@@ -70,16 +75,17 @@ def get_chain(temperature):
70
  child_splitter = RecursiveCharacterTextSplitter(chunk_size=300,
71
  chunk_overlap=50)
72
  retriever = load_retriever(docstore_path,chroma_path,embeddings,child_splitter,parent_splitter)
73
- model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
74
- # Replace the local OLMOLLM with the Hugging Face model
75
- pipe = pipeline(
76
- "text-generation",
77
- model=model_name,
78
- max_length=3000, # Allows for 2,093 input tokens + some generated tokens
79
- max_new_tokens=500, # Generates up to 100 new tokens
80
- temperature=temperature # Adjust temperature for response creativity
81
- )
82
 
 
 
 
 
 
 
 
 
 
83
 
84
  llm = HuggingFacePipeline(pipeline=pipe)
85
 
 
21
  os.environ['LANGCHAIN_API_KEY'] = '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'
22
 
23
 
24
+ @st.cache_resource
25
+ def load_model():
26
+ model_name = "bigscience/bloom-1b7"
27
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
28
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_8bit=True)
29
+ return model, tokenizer
30
  def load_from_pickle(filename):
31
  with open(filename, "rb") as file:
32
  return pickle.load(file)
 
75
  child_splitter = RecursiveCharacterTextSplitter(chunk_size=300,
76
  chunk_overlap=50)
77
  retriever = load_retriever(docstore_path,chroma_path,embeddings,child_splitter,parent_splitter)
78
+ model, tokenizer = load_model()
 
 
 
 
 
 
 
 
79
 
80
+ pipe = pipeline(
81
+ "text-generation",
82
+ model=model,
83
+ tokenizer=tokenizer,
84
+ max_length=2048,
85
+ temperature=temperature,
86
+ top_p=0.95,
87
+ repetition_penalty=1.15
88
+ )
89
 
90
  llm = HuggingFacePipeline(pipeline=pipe)
91