boryasbora commited on
Commit
4c7e272
·
verified ·
1 Parent(s): ef07eb2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -27
app.py CHANGED
@@ -4,7 +4,6 @@ import pickle
4
  from langchain.prompts import ChatPromptTemplate
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain_huggingface import HuggingFaceEmbeddings
7
- from transformers import pipeline
8
  from langchain_community.llms import HuggingFacePipeline
9
  from langchain.retrievers import ParentDocumentRetriever
10
  from langchain.storage import InMemoryStore
@@ -14,18 +13,18 @@ from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptT
14
  from langchain_core.output_parsers import StrOutputParser
15
  from langchain_core.runnables import RunnableLambda
16
  from datetime import date
17
- from transformers import AutoModelForCausalLM, AutoTokenizer
18
  # from setup import download_olmo_model, OLMO_MODEL
19
 
20
  # Ensure model is downloaded before proceeding
21
- @st.cache_resource
22
- def load_model():
23
- model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
24
- tokenizer = AutoTokenizer.from_pretrained(model_name)
25
- model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_8bit=True)
26
- return model, tokenizer
27
 
28
- model, tokenizer = load_model()
29
 
30
  # # Define the path to your bash script
31
  # script_path = "./start.sh"
@@ -96,30 +95,20 @@ def get_chain(temperature):
96
  child_splitter = RecursiveCharacterTextSplitter(chunk_size=300,
97
  chunk_overlap=50)
98
  retriever = load_retriever(docstore_path,chroma_path,embeddings,child_splitter,parent_splitter)
99
-
100
  # Replace the local OLMOLLM with the Hugging Face model
101
  pipe = pipeline(
102
- "text-generation",
103
- model=model,
104
- tokenizer=tokenizer,
105
- max_length=4000,
106
- temperature=temperature,
107
- top_p=0.95,
108
- repetition_penalty=1.15
109
- )
110
 
111
- llm = HuggingFacePipeline(pipeline=pipe)
112
 
113
-
114
 
115
 
116
- # Initialize LangChain
117
- # llm = HuggingFaceLLM(
118
- # model_id="EleutherAI/gpt-neo-1.3B", # or another suitable model
119
- # temperature=temperature,
120
- # max_tokens=256
121
- # )
122
-
123
  today = date.today()
124
  # Response prompt
125
  response_prompt_template = """You are an assistant who helps Ocean Hack Week community to answer their questions. I am going to ask you a question. Your response should be comprehensive and not contradicted with the following context if they are relevant. Otherwise, ignore them if they are not relevant.
 
4
  from langchain.prompts import ChatPromptTemplate
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain_huggingface import HuggingFaceEmbeddings
 
7
  from langchain_community.llms import HuggingFacePipeline
8
  from langchain.retrievers import ParentDocumentRetriever
9
  from langchain.storage import InMemoryStore
 
13
  from langchain_core.output_parsers import StrOutputParser
14
  from langchain_core.runnables import RunnableLambda
15
  from datetime import date
16
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
17
  # from setup import download_olmo_model, OLMO_MODEL
18
 
19
  # Ensure model is downloaded before proceeding
20
+ # @st.cache_resource
21
+ # def load_model():
22
+ # model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
23
+ # tokenizer = AutoTokenizer.from_pretrained(model_name)
24
+ # model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_8bit=True)
25
+ # return model, tokenizer
26
 
27
+ # model, tokenizer = load_model()
28
 
29
  # # Define the path to your bash script
30
  # script_path = "./start.sh"
 
95
  child_splitter = RecursiveCharacterTextSplitter(chunk_size=300,
96
  chunk_overlap=50)
97
  retriever = load_retriever(docstore_path,chroma_path,embeddings,child_splitter,parent_splitter)
98
+ model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
99
  # Replace the local OLMOLLM with the Hugging Face model
100
  pipe = pipeline(
101
+ "text-generation",
102
+ model=model_name,
103
+ max_length=3000, # Allows for 2,093 input tokens + some generated tokens
104
+ max_new_tokens=500, # Generates up to 100 new tokens
105
+ temperature=temperature # Adjust temperature for response creativity
106
+ )
 
 
107
 
 
108
 
109
+ llm = HuggingFacePipeline(pipeline=pipe)
110
 
111
 
 
 
 
 
 
 
 
112
  today = date.today()
113
  # Response prompt
114
  response_prompt_template = """You are an assistant who helps Ocean Hack Week community to answer their questions. I am going to ask you a question. Your response should be comprehensive and not contradicted with the following context if they are relevant. Otherwise, ignore them if they are not relevant.