Spaces:

Rohit1412
/

gemma3-27b-RAG

Sleeping

App Files Files Community

Rohit1412 commited on Mar 16

Commit

4027709

verified ·

1 Parent(s): 0cb9fb5

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -34

app.py CHANGED Viewed

@@ -2,8 +2,6 @@ import gradio as gr
 import torch
 from sentence_transformers import SentenceTransformer, util
 from transformers import AutoTokenizer, AutoModelForCausalLM
-from langchain.chains import LLMChain
-from langchain.prompts import PromptTemplate
 import PyPDF2
 import os
 import time
@@ -21,15 +19,6 @@ gen_model = AutoModelForCausalLM.from_pretrained("microsoft/phi-1", torch_dtype=
 # Cache for document embeddings
 embedding_cache = {}
-# LangChain wrapper for Phi-1
-class Phi1LLM:
-    def __call__(self, prompt, **kwargs):
-        inputs = gen_tokenizer(prompt, return_tensors="pt")
-        outputs = gen_model.generate(**inputs, max_new_tokens=150, num_beams=2)
-        return gen_tokenizer.decode(outputs[0], skip_special_tokens=True)
-phi1_llm = Phi1LLM()
 def extract_text_from_pdf(pdf_file):
     """Extract text from a PDF file, returning a list of page texts."""
     pages = []
@@ -74,8 +63,14 @@ def get_document_embeddings(documents):
             embeddings.append(emb)
     return torch.stack(embeddings)
 def rag_pipeline(question, pdf_files):
-    """RAG pipeline with multi-step thinking using Phi-1 and LangChain."""
     start_time = time.time()
     documents = []
@@ -114,31 +109,23 @@ def rag_pipeline(question, pdf_files):
     logger.info(f"Retrieved context:\n{retrieved_context}")
     # Step 1: Initial Answer
-    initial_prompt = PromptTemplate(
-        input_variables=["context", "question"],
-        template=(
-            "Using the following context, provide a concise answer to the question:\n\n"
-            "Context:\n{context}\n\n"
-            "Question: {question}\n\n"
-            "Answer:"
-        )
     )
-    initial_chain = LLMChain(llm=phi1_llm, prompt=initial_prompt)
-    initial_answer = initial_chain.run(context=retrieved_context, question=question)
     # Step 2: Refine Answer
-    refine_prompt = PromptTemplate(
-        input_variables=["context", "question", "initial_answer"],
-        template=(
-            "Given the context and initial answer, refine and improve the response to the question:\n\n"
-            "Context:\n{context}\n\n"
-            "Question: {question}\n\n"
-            "Initial Answer: {initial_answer}\n\n"
-            "Refined Answer:"
-        )
     )
-    refine_chain = LLMChain(llm=phi1_llm, prompt=refine_prompt)
-    refined_answer = refine_chain.run(context=retrieved_context, question=question, initial_answer=initial_answer)
     logger.info(f"Initial answer: {initial_answer}")
     logger.info(f"Refined answer: {refined_answer}")
@@ -162,4 +149,4 @@ with gr.Blocks() as demo:
     submit_button.click(fn=rag_pipeline, inputs=[question_input, pdf_input], outputs=response_output)
-demo.launch(share=True, debug=True)

 import torch
 from sentence_transformers import SentenceTransformer, util
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import PyPDF2
 import os
 import time
 # Cache for document embeddings
 embedding_cache = {}
 def extract_text_from_pdf(pdf_file):
     """Extract text from a PDF file, returning a list of page texts."""
     pages = []
             embeddings.append(emb)
     return torch.stack(embeddings)
+def generate_response(prompt):
+    """Helper function to generate text with Phi-1."""
+    inputs = gen_tokenizer(prompt, return_tensors="pt")
+    outputs = gen_model.generate(**inputs, max_new_tokens=150, num_beams=2)
+    return gen_tokenizer.decode(outputs[0], skip_special_tokens=True)
 def rag_pipeline(question, pdf_files):
+    """RAG pipeline with multi-step thinking using Phi-1."""
     start_time = time.time()
     documents = []
     logger.info(f"Retrieved context:\n{retrieved_context}")
     # Step 1: Initial Answer
+    initial_prompt = (
+        f"Using the following context, provide a concise answer to the question:\n\n"
+        f"Context:\n{retrieved_context}\n\n"
+        f"Question: {question}\n\n"
+        f"Answer:"
     )
+    initial_answer = generate_response(initial_prompt)
     # Step 2: Refine Answer
+    refine_prompt = (
+        f"Given the context and initial answer, refine and improve the response to the question:\n\n"
+        f"Context:\n{retrieved_context}\n\n"
+        f"Question: {question}\n\n"
+        f"Initial Answer: {initial_answer}\n\n"
+        f"Refined Answer:"
     )
+    refined_answer = generate_response(refine_prompt)
     logger.info(f"Initial answer: {initial_answer}")
     logger.info(f"Refined answer: {refined_answer}")
     submit_button.click(fn=rag_pipeline, inputs=[question_input, pdf_input], outputs=response_output)
+demo.launch()