Spaces:

himel06
/

BanglaRAG

Runtime error

App Files Files Community

himel06 commited on Aug 17, 2024

Commit

3fb63a8

verified ·

1 Parent(s): ccea2c8

Update BanglaRAG/bangla_rag_pipeline.py

Browse files

Files changed (1) hide show

BanglaRAG/bangla_rag_pipeline.py +20 -98

BanglaRAG/bangla_rag_pipeline.py CHANGED Viewed

@@ -4,7 +4,6 @@ from transformers import (
     AutoTokenizer,
     AutoModelForCausalLM,
     pipeline,
-    GenerationConfig,
     BitsAndBytesConfig,
 )
 from langchain_core.prompts import PromptTemplate
@@ -14,25 +13,12 @@ from langchain_community.vectorstores import Chroma
 from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
 from langchain_core.runnables import RunnableParallel, RunnablePassthrough
 from langchain_core.output_parsers import StrOutputParser
-from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
-from rich import print as rprint
-from rich.panel import Panel
-from tqdm import tqdm
 import warnings
-import re
 warnings.filterwarnings("ignore")
 class BanglaRAGChain:
-    """
-    Bangla Retrieval-Augmented Generation (RAG) Chain for question answering.
-    This class uses a HuggingFace/local language model for text generation, a Chroma vector database for
-    document retrieval, and a custom prompt template to create a RAG chain that can generate
-    responses to user queries in Bengali.
-    """
     def __init__(self):
-        """Initializes the BanglaRAGChain with default parameters."""
         self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.chat_model_id = None
         self.embed_model_id = None
@@ -71,22 +57,6 @@ class BanglaRAGChain:
         chunk_overlap=150,
         hf_token=None,
     ):
-        """
-        Loads the required models and data for the RAG chain.
-        Args:
-            chat_model_id (str): The Hugging Face model ID for the chat model.
-            embed_model_id (str): The Hugging Face model ID for the embedding model.
-            text_path (str): Path to the text file to be indexed.
-            quantization (bool): Whether to quantize the model or not.
-            k (int): The number of documents to retrieve.
-            top_k (int): The top_k parameter for the generation configuration.
-            top_p (float): The top_p parameter for the generation configuration.
-            max_new_tokens (int): The maximum number of new tokens to generate.
-            temperature (float): The temperature parameter for the generation configuration.
-            chunk_size (int): The chunk size for text splitting.
-            chunk_overlap (int): The chunk overlap for text splitting.
-            hf_token (str): The Hugging Face token for authentication.
-        """
         self.chat_model_id = chat_model_id
         self.embed_model_id = embed_model_id
         self.k = k
@@ -103,26 +73,14 @@ class BanglaRAGChain:
         if self.hf_token is not None:
             os.environ["HF_TOKEN"] = str(self.hf_token)
-        rprint(Panel("[bold green]Loading chat models...", expand=False))
         self._load_models()
-        rprint(Panel("[bold green]Creating document...", expand=False))
         self._create_document()
-        rprint(Panel("[bold green]Updating Chroma database...", expand=False))
         self._update_chroma_db()
-        rprint(Panel("[bold green]Initializing retriever...", expand=False))
         self._get_retriever()
-        rprint(Panel("[bold green]Initializing LLM...", expand=False))
         self._get_llm()
-        rprint(Panel("[bold green]Creating chain...", expand=False))
         self._create_chain()
     def _load_models(self):
-        """Loads the chat model and tokenizer."""
         try:
             self.tokenizer = AutoTokenizer.from_pretrained(self.chat_model_id)
             bnb_config = None
@@ -133,28 +91,23 @@ class BanglaRAGChain:
                     bnb_4bit_quant_type="nf4",
                     bnb_4bit_compute_dtype=torch.float16,
                 )
-                rprint(Panel("[bold green]Applying 4bit quantization...", expand=False))
                 self.chat_model = AutoModelForCausalLM.from_pretrained(
                     self.chat_model_id,
-                    torch_dtype=torch.float16,
-                    low_cpu_mem_usage=True,
-                    quantization_config=bnb_config,
                     device_map="auto",
                 )
-                rprint(Panel("[bold green]Applied 4bit quantization successfully", expand=False))
             else:
                 self.chat_model = AutoModelForCausalLM.from_pretrained(
                     self.chat_model_id,
-                    torch_dtype=torch.float16,
-                    low_cpu_mem_usage=True,
                     device_map="auto",
                 )
-            rprint(Panel("[bold green]Chat Model loaded successfully!", expand=False))
         except Exception as e:
-            rprint(Panel(f"[red]Error loading chat model: {e}", expand=False))
     def _create_document(self):
-        """Splits the input text into chunks using RecursiveCharacterTextSplitter."""
         try:
             with open(self.text_path, "r", encoding="utf-8") as file:
                 self._text_content = file.read()
@@ -163,44 +116,21 @@ class BanglaRAGChain:
                 chunk_size=self.chunk_size,
                 chunk_overlap=self.chunk_overlap,
             )
-            self._documents = list(
-                tqdm(
-                    character_splitter.split_text(self._text_content),
-                    desc="Chunking text",
-                )
-            )
-            print(f"Number of chunks: {len(self._documents)}")
-            if False:
-                for i, chunk in enumerate(self._documents):
-                    if i > 5:
-                        break
-                    print(f"Chunk {i}: {chunk}")
-            rprint(Panel("[bold green]Document created successfully!", expand=False))
         except Exception as e:
-            rprint(Panel(f"[red]Chunking failed: {e}", expand=False))
     def _update_chroma_db(self):
-        """Updates the Chroma vector database with the text chunks."""
         try:
-            try:
-                rprint(Panel(f"[bold green]Loading embedding model...",expand=False))
-                model_kwargs = {"device": self._device}
-                embeddings = HuggingFaceEmbeddings(
-                    model_name=self.embed_model_id, model_kwargs=model_kwargs
-                )
-                rprint(Panel(f"[bold green]Loaded embedding model successfully!", expand=False))
-            except Exception as e:
-                rprint(Panel(f"[red]embedding model loading failed: {e}", expand=False))
-            self._db = Chroma.from_texts(texts=self._documents, embedding=embeddings)
-            rprint(
-                Panel("[bold green]Chroma database updated successfully!", expand=False)
             )
         except Exception as e:
-            rprint(Panel(f"[red]Vector DB initialization failed: {e}", expand=False))
     def _create_chain(self):
-        """Creates the retrieval-augmented generation (RAG) chain."""
         template = """Below is an instruction in Bengali language that describes a task, paired with an input also in Bengali language that provides further context. Write a response in Bengali that appropriately completes the request.
         ### Instruction:
         {question}
@@ -242,22 +172,18 @@ class BanglaRAGChain:
             ).assign(answer=rag_chain_from_docs)
             self._chain = rag_chain_with_source
-            rprint(Panel("[bold green]Chain created successfully!", expand=False))
         except Exception as e:
-            rprint(Panel(f"[red]Chain creation failed: {e}", expand=False))
     def _get_retriever(self):
-        """Creates a retriever for the vector database."""
         try:
             self._retriever = self._db.as_retriever(
                 search_type="similarity", search_kwargs={"k": self.k}
             )
-            rprint(Panel("[bold green]Retriever created successfully!", expand=False))
         except Exception as e:
-            rprint(Panel(f"[red]Retriever creation failed: {e}", expand=False))
     def _get_llm(self):
-        """Initializes the language model using the Hugging Face pipeline."""
         try:
             pipe = pipeline(
                 "text-generation",
@@ -271,26 +197,22 @@ class BanglaRAGChain:
                 top_p=self.top_p,
                 top_k=self.top_k,
                 repetition_penalty=1.2,
-                torch_dtype=torch.float16,
             )
             self._llm = HuggingFacePipeline(pipeline=pipe)
-            rprint(Panel("[bold green]LLM initialized successfully!", expand=False))
         except Exception as e:
-            rprint(Panel(f"[red]LLM initialization failed: {e}", expand=False))
-            self._llm = None  # Ensure it’s set to None on failure
     def __call__(self, query):
-        """Runs the RAG chain on a user query and returns the generated answer."""
         if not self._chain:
             raise ValueError("The chain has not been initialized.")
-        if self._chain:
-            result = self._chain.invoke({"question": query})
-            return result["answer"], result["context"]
     def _format_docs(self, docs):
-        """Formats retrieved documents into a string format."""
         context = ""
         for i, doc in enumerate(docs):
             context += f"\nDocument {i + 1}:\n{doc.page_content}\n\n"
-        return context

     AutoTokenizer,
     AutoModelForCausalLM,
     pipeline,
     BitsAndBytesConfig,
 )
 from langchain_core.prompts import PromptTemplate
 from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
 from langchain_core.runnables import RunnableParallel, RunnablePassthrough
 from langchain_core.output_parsers import StrOutputParser
 import warnings
 warnings.filterwarnings("ignore")
 class BanglaRAGChain:
     def __init__(self):
         self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.chat_model_id = None
         self.embed_model_id = None
         chunk_overlap=150,
         hf_token=None,
     ):
         self.chat_model_id = chat_model_id
         self.embed_model_id = embed_model_id
         self.k = k
         if self.hf_token is not None:
             os.environ["HF_TOKEN"] = str(self.hf_token)
         self._load_models()
         self._create_document()
         self._update_chroma_db()
         self._get_retriever()
         self._get_llm()
         self._create_chain()
     def _load_models(self):
         try:
             self.tokenizer = AutoTokenizer.from_pretrained(self.chat_model_id)
             bnb_config = None
                     bnb_4bit_quant_type="nf4",
                     bnb_4bit_compute_dtype=torch.float16,
                 )
                 self.chat_model = AutoModelForCausalLM.from_pretrained(
                     self.chat_model_id,
+                    load_in_8bit=True,
+                    torch_dtype=torch.bfloat16,
                     device_map="auto",
+                    quantization_config=bnb_config,
                 )
             else:
                 self.chat_model = AutoModelForCausalLM.from_pretrained(
                     self.chat_model_id,
+                    torch_dtype=torch.bfloat16,
                     device_map="auto",
                 )
         except Exception as e:
+            raise RuntimeError(f"Error loading chat model: {e}")
     def _create_document(self):
         try:
             with open(self.text_path, "r", encoding="utf-8") as file:
                 self._text_content = file.read()
                 chunk_size=self.chunk_size,
                 chunk_overlap=self.chunk_overlap,
             )
+            self._documents = character_splitter.split_text(self._text_content)
         except Exception as e:
+            raise RuntimeError(f"Chunking failed: {e}")
     def _update_chroma_db(self):
         try:
+            model_kwargs = {"device": self._device}
+            embeddings = HuggingFaceEmbeddings(
+                model_name=self.embed_model_id, model_kwargs=model_kwargs
             )
+            self._db = Chroma.from_texts(texts=self._documents, embedding=embeddings)
         except Exception as e:
+            raise RuntimeError(f"Vector DB initialization failed: {e}")
     def _create_chain(self):
         template = """Below is an instruction in Bengali language that describes a task, paired with an input also in Bengali language that provides further context. Write a response in Bengali that appropriately completes the request.
         ### Instruction:
         {question}
             ).assign(answer=rag_chain_from_docs)
             self._chain = rag_chain_with_source
         except Exception as e:
+            raise RuntimeError(f"Chain creation failed: {e}")
     def _get_retriever(self):
         try:
             self._retriever = self._db.as_retriever(
                 search_type="similarity", search_kwargs={"k": self.k}
             )
         except Exception as e:
+            raise RuntimeError(f"Retriever creation failed: {e}")
     def _get_llm(self):
         try:
             pipe = pipeline(
                 "text-generation",
                 top_p=self.top_p,
                 top_k=self.top_k,
                 repetition_penalty=1.2,
+                torch_dtype=torch.bfloat16,
             )
             self._llm = HuggingFacePipeline(pipeline=pipe)
         except Exception as e:
+            raise RuntimeError(f"LLM initialization failed: {e}")
+            self._llm = None
     def __call__(self, query):
         if not self._chain:
             raise ValueError("The chain has not been initialized.")
+        result = self._chain.invoke({"question": query})
+        return result["answer"], result["context"]
     def _format_docs(self, docs):
         context = ""
         for i, doc in enumerate(docs):
             context += f"\nDocument {i + 1}:\n{doc.page_content}\n\n"
+        return context