Spaces:
Running
Running
File size: 5,075 Bytes
8ef6cb8 0818db7 6847a85 8ef6cb8 6847a85 3e21c23 6847a85 3e21c23 8ef6cb8 6847a85 90dc0e0 6847a85 90dc0e0 6847a85 3d8afc7 90dc0e0 6847a85 3e21c23 8ef6cb8 90a2d71 90dc0e0 90a2d71 8ef6cb8 6847a85 8ef6cb8 6847a85 8ef6cb8 3e21c23 8ef6cb8 3e21c23 8ef6cb8 3e21c23 6847a85 90dc0e0 3e21c23 8ef6cb8 3e21c23 6847a85 90dc0e0 6847a85 3e21c23 90dc0e0 3e21c23 90dc0e0 3e21c23 6847a85 8ef6cb8 6847a85 90dc0e0 6847a85 8ef6cb8 6847a85 c1c2b2d 6847a85 c1c2b2d 6847a85 3d8afc7 6847a85 90dc0e0 6847a85 3d8afc7 6847a85 8ef6cb8 3d8afc7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
from datetime import datetime
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_parse import LlamaParse
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import os
from dotenv import load_dotenv
import gradio as gr
import markdowm as md
import base64
# Load environment variables
load_dotenv()
llm_models = [
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"meta-llama/Meta-Llama-3-8B-Instruct",
"mistralai/Mistral-7B-Instruct-v0.2",
"tiiuae/falcon-7b-instruct",
]
embed_models = [
"BAAI/bge-small-en-v1.5",
"NeuML/pubmedbert-base-embeddings",
"BAAI/llm-embedder",
"BAAI/bge-large-en"
]
# Global variables
selected_llm_model_name = llm_models[0]
selected_embed_model_name = embed_models[0]
vector_index = None
# Initialize the parser
parser = LlamaParse(api_key=os.getenv("LLAMA_INDEX_API"), result_type='markdown')
file_extractor = {
'.pdf': parser,
'.docx': parser,
'.doc': parser,
'.txt': parser,
'.csv': parser,
'.xlsx': parser,
'.pptx': parser,
'.html': parser,
'.jpg': parser,
'.jpeg': parser,
'.png': parser,
'.webp': parser,
'.svg': parser,
}
def load_files(file_path: str, embed_model_name: str):
try:
global vector_index
document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data()
embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model)
print(f"Parsing done for {file_path}")
filename = os.path.basename(file_path)
return f"Ready to give response on {filename}"
except Exception as e:
return f"An error occurred: {e}"
def set_llm_model(selected_model):
global selected_llm_model_name
selected_llm_model_name = selected_model
return f"Model set to: {selected_model}"
def respond(message, history):
try:
llm = HuggingFaceInferenceAPI(
model_name=selected_llm_model_name,
contextWindow=8192,
maxTokens=1024,
temperature=0.3,
topP=0.9,
frequencyPenalty=0.5,
presencePenalty=0.5,
token=os.getenv("TOKEN")
)
if vector_index is not None:
query_engine = vector_index.as_query_engine(llm=llm)
bot_message = query_engine.query(message)
print(f"\n{datetime.now()}:{selected_llm_model_name}:: {message} --> {str(bot_message)}\n")
return f"{selected_llm_model_name}:\n{str(bot_message)}"
else:
return "Please upload a file."
except Exception as e:
return f"An error occurred: {e}"
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
github_logo_encoded = encode_image("Images/github-logo.png")
linkedin_logo_encoded = encode_image("Images/linkedin-logo.png")
website_logo_encoded = encode_image("Images/ai-logo.png")
with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]), css='footer {visibility: hidden}') as demo:
gr.Markdown("# DocBot")
with gr.Tabs():
with gr.TabItem("Intro"):
gr.Markdown(md.description)
with gr.TabItem("DocBot"):
with gr.Accordion("=== IMPORTANT: READ ME FIRST ===", open=False):
guid = gr.Markdown(md.guide)
with gr.Row():
with gr.Column(scale=1):
file_input = gr.File(file_count="single", type='filepath', label="Step-1: Upload document")
embed_model_dropdown = gr.Dropdown(embed_models, label="Step-2: Select Embedding", interactive=True)
with gr.Row():
btn = gr.Button("Submit", variant='primary')
clear = gr.ClearButton()
output = gr.Text(label='Vector Index')
llm_model_dropdown = gr.Dropdown(llm_models, label="Step-3: Select LLM", interactive=True)
model_selected_output = gr.Text(label="Model selected") # FIXED OUTPUT COMPONENT
with gr.Column(scale=3):
gr.ChatInterface(
fn=respond,
chatbot=gr.Chatbot(height=500),
theme="soft",
textbox=gr.Textbox(placeholder="Step-4: Ask me questions on the uploaded document!", container=False)
)
gr.HTML(md.footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded))
# Event bindings
llm_model_dropdown.change(fn=set_llm_model, inputs=llm_model_dropdown, outputs=model_selected_output)
btn.click(fn=load_files, inputs=[file_input, embed_model_dropdown], outputs=output)
clear.click(lambda: [None] * 3, outputs=[file_input, embed_model_dropdown, output])
if __name__ == "__main__":
demo.launch(share=True)
|