Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -21,7 +21,12 @@ os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
|
|
21 |
os.environ['LANGCHAIN_API_KEY'] = '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'
|
22 |
|
23 |
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
25 |
def load_from_pickle(filename):
|
26 |
with open(filename, "rb") as file:
|
27 |
return pickle.load(file)
|
@@ -70,16 +75,17 @@ def get_chain(temperature):
|
|
70 |
child_splitter = RecursiveCharacterTextSplitter(chunk_size=300,
|
71 |
chunk_overlap=50)
|
72 |
retriever = load_retriever(docstore_path,chroma_path,embeddings,child_splitter,parent_splitter)
|
73 |
-
|
74 |
-
# Replace the local OLMOLLM with the Hugging Face model
|
75 |
-
pipe = pipeline(
|
76 |
-
"text-generation",
|
77 |
-
model=model_name,
|
78 |
-
max_length=3000, # Allows for 2,093 input tokens + some generated tokens
|
79 |
-
max_new_tokens=500, # Generates up to 100 new tokens
|
80 |
-
temperature=temperature # Adjust temperature for response creativity
|
81 |
-
)
|
82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
llm = HuggingFacePipeline(pipeline=pipe)
|
85 |
|
|
|
21 |
os.environ['LANGCHAIN_API_KEY'] = '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'
|
22 |
|
23 |
|
24 |
+
@st.cache_resource
|
25 |
+
def load_model():
|
26 |
+
model_name = "bigscience/bloom-1b7"
|
27 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
28 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_8bit=True)
|
29 |
+
return model, tokenizer
|
30 |
def load_from_pickle(filename):
|
31 |
with open(filename, "rb") as file:
|
32 |
return pickle.load(file)
|
|
|
75 |
child_splitter = RecursiveCharacterTextSplitter(chunk_size=300,
|
76 |
chunk_overlap=50)
|
77 |
retriever = load_retriever(docstore_path,chroma_path,embeddings,child_splitter,parent_splitter)
|
78 |
+
model, tokenizer = load_model()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
+
pipe = pipeline(
|
81 |
+
"text-generation",
|
82 |
+
model=model,
|
83 |
+
tokenizer=tokenizer,
|
84 |
+
max_length=2048,
|
85 |
+
temperature=temperature,
|
86 |
+
top_p=0.95,
|
87 |
+
repetition_penalty=1.15
|
88 |
+
)
|
89 |
|
90 |
llm = HuggingFacePipeline(pipeline=pipe)
|
91 |
|