Spaces:

abhivsh
/

Engg-SS_ChatBOT

Runtime error

App Files Files Community

abhivsh commited on Apr 15, 2024

Commit

e4eb7c7

verified ·

1 Parent(s): 6515db3

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -22

app.py CHANGED Viewed

@@ -14,12 +14,20 @@ from langchain_openai import ChatOpenAI
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
 from langchain.chains import VectorDBQA
-from langchain.llms import OpenAI
 import gradio as gr
-import os
 import requests
 import sys
 sys.path.append('../..')
@@ -53,10 +61,6 @@ vectordb = initialize.initialize()
-from langchain import HuggingFacePipeline, PromptTemplate, LLMChain, RetrievalQA
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-import torch
 quantization_config = {
     "load_in_4bit": True,
     "bnb_4bit_compute_dtype": torch.float16,
@@ -64,20 +68,12 @@ quantization_config = {
     "bnb_4bit_use_double_quant": True,
 }
-llm = HuggingFacePipeline(pipeline=pipeline)
-model_id = "mistralai/Mistral-7B-Instruct-v0.1"
-model_4bit = AutoModelForCausalLM.from_pretrained(
-    model_id, device="cuda", quantization_config=quantization_config
-)
-tokenizer = AutoTokenizer.from_pretrained(model_id)
 pipeline = pipeline(
     "text-generation",
     model=model_4bit,
     tokenizer=tokenizer,
     use_cache=True,
-    device=0,  # '0' is for GPU, 'cpu' for CPU
     max_length=500,
     do_sample=True,
     top_k=5,
@@ -86,19 +82,23 @@ pipeline = pipeline(
     pad_token_id=tokenizer.eos_token_id,
 )
-template = """[INST] You are a helpful, respectful and honest assistant. Answer exactly in few words from the context
-Answer the question below from the context below:
-{context}
-{question} [/INST]
-"""
-def chat_query(retrieverQA, text_query):
     retrieverQA = RetrievalQA.from_chain_type(llm=llm, chain_type="retrieval", retriever=vectordb.as_retriever(), verbose=True)
-    result = retrieverQA.run(text_query)
     return result

 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
 from langchain.chains import VectorDBQA
+from langchain_community.llms import OpenAI
+from langchain_core.prompts import PromptTemplate
+from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
+from langchain.chains import LLMChain
+from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import torch
 import gradio as gr
 import requests
+import os
 import sys
 sys.path.append('../..')
 quantization_config = {
     "load_in_4bit": True,
     "bnb_4bit_compute_dtype": torch.float16,
     "bnb_4bit_use_double_quant": True,
 }
 pipeline = pipeline(
     "text-generation",
     model=model_4bit,
     tokenizer=tokenizer,
     use_cache=True,
+    device='cpu',  # '0' is for GPU, 'cpu' for CPU
     max_length=500,
     do_sample=True,
     top_k=5,
     pad_token_id=tokenizer.eos_token_id,
 )
+llm = HuggingFacePipeline(pipeline=pipeline)
+model_id = "mistralai/Mistral-7B-Instruct-v0.1"
+model_4bit = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+# template = """[INST] You are a helpful, respectful and honest assistant. Answer exactly in few words from the context
+# Answer the question below from the context below:
+# {context}
+# {question} [/INST]
+# """
+def chat_query(message, history):
     retrieverQA = RetrievalQA.from_chain_type(llm=llm, chain_type="retrieval", retriever=vectordb.as_retriever(), verbose=True)
+    result = retrieverQA.run()
     return result