File size: 5,075 Bytes
8ef6cb8
 
 
 
 
 
 
 
0818db7
6847a85
8ef6cb8
 
 
 
6847a85
3e21c23
 
6847a85
 
3e21c23
8ef6cb8
6847a85
90dc0e0
6847a85
90dc0e0
 
6847a85
 
3d8afc7
90dc0e0
 
6847a85
3e21c23
 
8ef6cb8
90a2d71
90dc0e0
 
 
 
 
 
 
 
 
 
 
 
 
90a2d71
8ef6cb8
6847a85
8ef6cb8
 
 
6847a85
8ef6cb8
3e21c23
8ef6cb8
3e21c23
8ef6cb8
3e21c23
 
6847a85
 
90dc0e0
 
3e21c23
8ef6cb8
 
3e21c23
6847a85
90dc0e0
 
 
 
 
 
6847a85
3e21c23
90dc0e0
 
 
 
 
 
3e21c23
90dc0e0
3e21c23
 
6847a85
 
 
 
 
 
 
8ef6cb8
6847a85
90dc0e0
6847a85
 
 
8ef6cb8
6847a85
c1c2b2d
 
6847a85
 
 
 
 
 
c1c2b2d
6847a85
 
3d8afc7
6847a85
 
 
 
90dc0e0
6847a85
 
 
3d8afc7
 
 
6847a85
 
8ef6cb8
 
3d8afc7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from datetime import datetime
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_parse import LlamaParse
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import os
from dotenv import load_dotenv
import gradio as gr
import markdowm as md
import base64

# Load environment variables
load_dotenv()

llm_models = [
    "mistralai/Mixtral-8x7B-Instruct-v0.1",
    "meta-llama/Meta-Llama-3-8B-Instruct",
    "mistralai/Mistral-7B-Instruct-v0.2",
    "tiiuae/falcon-7b-instruct",
]

embed_models = [
    "BAAI/bge-small-en-v1.5",
    "NeuML/pubmedbert-base-embeddings",
    "BAAI/llm-embedder",
    "BAAI/bge-large-en"
]

# Global variables
selected_llm_model_name = llm_models[0]
selected_embed_model_name = embed_models[0]
vector_index = None

# Initialize the parser
parser = LlamaParse(api_key=os.getenv("LLAMA_INDEX_API"), result_type='markdown')
file_extractor = {
    '.pdf': parser,
    '.docx': parser,
    '.doc': parser,
    '.txt': parser,
    '.csv': parser,
    '.xlsx': parser,
    '.pptx': parser,
    '.html': parser,
    '.jpg': parser,
    '.jpeg': parser,
    '.png': parser,
    '.webp': parser,
    '.svg': parser,
}

def load_files(file_path: str, embed_model_name: str):
    try:
        global vector_index
        document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data()
        embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
        vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model)
        print(f"Parsing done for {file_path}")
        filename = os.path.basename(file_path)
        return f"Ready to give response on {filename}"
    except Exception as e:
        return f"An error occurred: {e}"

def set_llm_model(selected_model):
    global selected_llm_model_name
    selected_llm_model_name = selected_model
    return f"Model set to: {selected_model}"

def respond(message, history):
    try:
        llm = HuggingFaceInferenceAPI(
            model_name=selected_llm_model_name,
            contextWindow=8192,
            maxTokens=1024,
            temperature=0.3,
            topP=0.9,
            frequencyPenalty=0.5,
            presencePenalty=0.5,
            token=os.getenv("TOKEN")
        )
        if vector_index is not None:
            query_engine = vector_index.as_query_engine(llm=llm)
            bot_message = query_engine.query(message)
            print(f"\n{datetime.now()}:{selected_llm_model_name}:: {message} --> {str(bot_message)}\n")
            return f"{selected_llm_model_name}:\n{str(bot_message)}"
        else:
            return "Please upload a file."
    except Exception as e:
        return f"An error occurred: {e}"

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

github_logo_encoded = encode_image("Images/github-logo.png")
linkedin_logo_encoded = encode_image("Images/linkedin-logo.png")
website_logo_encoded = encode_image("Images/ai-logo.png")

with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]), css='footer {visibility: hidden}') as demo:
    gr.Markdown("# DocBot")
    with gr.Tabs():
        with gr.TabItem("Intro"):
            gr.Markdown(md.description)

        with gr.TabItem("DocBot"):
            with gr.Accordion("=== IMPORTANT: READ ME FIRST ===", open=False):
                guid = gr.Markdown(md.guide)
            with gr.Row():
                with gr.Column(scale=1):
                    file_input = gr.File(file_count="single", type='filepath', label="Step-1: Upload document")
                    embed_model_dropdown = gr.Dropdown(embed_models, label="Step-2: Select Embedding", interactive=True)
                    with gr.Row():
                        btn = gr.Button("Submit", variant='primary')
                        clear = gr.ClearButton()
                    output = gr.Text(label='Vector Index')
                    llm_model_dropdown = gr.Dropdown(llm_models, label="Step-3: Select LLM", interactive=True)
                    model_selected_output = gr.Text(label="Model selected")  # FIXED OUTPUT COMPONENT
                with gr.Column(scale=3):
                    gr.ChatInterface(
                        fn=respond,
                        chatbot=gr.Chatbot(height=500),
                        theme="soft",
                        textbox=gr.Textbox(placeholder="Step-4: Ask me questions on the uploaded document!", container=False)
                    )
    gr.HTML(md.footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded))

    # Event bindings
    llm_model_dropdown.change(fn=set_llm_model, inputs=llm_model_dropdown, outputs=model_selected_output)
    btn.click(fn=load_files, inputs=[file_input, embed_model_dropdown], outputs=output)
    clear.click(lambda: [None] * 3, outputs=[file_input, embed_model_dropdown, output])

if __name__ == "__main__":
    demo.launch(share=True)