File size: 20,228 Bytes
b5e7375
 
 
 
 
188764d
 
 
 
 
 
 
 
 
 
b5e7375
 
188764d
 
 
 
 
 
 
 
 
b5e7375
188764d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5e7375
188764d
b5e7375
188764d
 
b5e7375
188764d
b5e7375
 
 
188764d
b5e7375
188764d
b5e7375
188764d
 
 
b5e7375
188764d
b5e7375
188764d
 
 
 
 
b5e7375
188764d
b5e7375
188764d
 
 
 
 
b5e7375
188764d
b5e7375
188764d
 
 
 
 
b5e7375
188764d
b5e7375
188764d
 
 
 
 
b5e7375
188764d
b5e7375
188764d
 
 
 
 
b5e7375
188764d
b5e7375
 
188764d
 
 
 
b5e7375
188764d
b5e7375
188764d
b5e7375
188764d
 
 
 
b5e7375
188764d
b5e7375
 
188764d
b5e7375
188764d
b5e7375
188764d
b5e7375
 
188764d
b5e7375
188764d
b5e7375
188764d
b5e7375
188764d
b5e7375
 
188764d
b5e7375
188764d
b5e7375
188764d
b5e7375
188764d
b5e7375
 
188764d
b5e7375
188764d
b5e7375
188764d
 
b5e7375
188764d
 
 
 
 
 
 
 
 
 
b5e7375
188764d
b5e7375
 
 
 
 
 
 
 
 
 
5d9ca4f
 
b5e7375
 
 
188764d
b5e7375
188764d
 
 
 
b5e7375
188764d
 
b5e7375
188764d
b5e7375
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
#
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
# SPDX-License-Identifier: Apache-2.0
#

import logging  # Import the logging module to enable tracking of events, errors, and debugging information during program execution
import warnings  # Import the warnings module to manage and control warning messages generated by the code or libraries
import os  # Import the os module to interact with the operating system, such as file system operations and environment variables
import sys  # Import the sys module to access system-specific parameters and functions, including command-line arguments and interpreter information
import gradio as gr  # Import the Gradio library to build interactive web interfaces for machine learning applications, enabling easy deployment and testing
from src.core.parameter import parameters  # Import the 'parameters' function from the core parameter module, which provides model parameter configurations based on the selected reasoning mode
from src.client.chat_handler import respond  # Import the 'respond' function from the chat handler module, responsible for generating AI assistant responses to user inputs
from config import model, meta_tags  # Import 'model' dictionary containing available model precision options and their details, and 'meta_tags' dictionary containing HTML meta tag information for web interface setup

# Define the main user interface function for the Gradio application
def ui():
    """
    This function constructs and configures the entire user interface for the AI assistant web application using the Gradio Blocks API.
    The function is responsible for setting up logging and warning filters, suppressing unwanted output, and building a highly interactive sidebar for model configuration.
    It provides users with a comprehensive set of controls to adjust model precision, reasoning mode, and advanced generation parameters such as temperature, top_k, min_p, top_p, and repetition penalty.
    The sidebar also allows users to enable or disable additional features like image generation, audio generation, and deep web search, each with descriptive tooltips to guide user interaction.
    The function dynamically updates parameter sliders based on the state of the reasoning checkbox, ensuring that the UI always reflects the current configuration.
    The main chat interface is constructed with a set of example prompts to help users explore the assistant's capabilities, and all configuration states are passed as additional inputs to the response handler.
    Output and error streams are redirected to prevent extraneous log messages from appearing in the web interface, creating a clean and focused user experience.
    The function returns the fully constructed Gradio app object, ready to be launched or embedded as needed.
    This approach ensures the application is robust, user-friendly, and highly customizable, making it suitable for both demonstration and production use
    """
    # Suppress all warning messages globally to prevent them from displaying in the application output and confusing users
    warnings.filterwarnings(
        "ignore"  # Specify the action to ignore all warnings, ensuring a clean user interface free from warning clutter
    )
    # Set the warnings filter to ignore warnings by default, providing an additional layer of suppression for any warnings that might arise during runtime
    warnings.simplefilter(
        "ignore"  # Apply the ignore filter to all warning categories, further ensuring that no warnings are shown to users
    )
    # Configure the logging system to only display messages at the CRITICAL level or higher, effectively hiding informational, warning, and error logs from the application's output
    logging.basicConfig(
        level=logging.CRITICAL  # Set the logging threshold to CRITICAL, which is the highest severity level, to minimize log output
    )
    # Define a list of logger names associated with common web and async frameworks used in Gradio apps
    logger_keywords = [
        "uvicorn",  # Uvicorn is an ASGI web server commonly used with FastAPI and Gradio
        "fastapi",  # FastAPI is a modern web framework for building APIs with Python
        "gradio",  # Gradio's own logger, which may emit internal messages
        "httpx",  # HTTPX is an HTTP client for Python, often used for making web requests
        "aiohttp",  # Aiohttp is an asynchronous HTTP client and server library
        "asyncio",  # Asyncio is Python's standard library for asynchronous programming
        "starlette",  # Starlette is a lightweight ASGI framework used by FastAPI
        "anyio",  # AnyIO is an asynchronous networking and concurrency library
    ]
    # Iterate through all registered loggers in the logging system to suppress their output if they match any of the specified keywords
    for name, logger in logging.root.manager.loggerDict.items():
        # Check if the logger's name contains any of the keywords from the list, indicating it is associated with a third-party library whose logs should be suppressed
        if any(k in name for k in logger_keywords):
            # Ensure the logger object is an instance of the Logger class before attempting to set its level
            if isinstance(
                logger,  # The logger object retrieved from the logger dictionary
                logging.Logger  # The Logger class from the logging module
            ):
                # Set the logger's level to CRITICAL to prevent it from emitting lower-severity log messages during application execution
                logger.setLevel(
                    logging.CRITICAL  # Only allow critical errors to be logged, hiding all informational and warning messages
                )
    # Redirect the standard output stream to the null device, effectively silencing all print statements and unwanted output from third-party libraries
    sys.stdout = open(
        os.devnull, "w"  # Open the operating system's null device in write mode, discarding any data written to it
    )
    # Redirect the standard error stream to the null device, ensuring that error messages and tracebacks do not appear in the application's output
    sys.stderr = open(
        os.devnull, "w"  # Open the null device for error output, suppressing all error messages from being displayed to users
    )
    # Create the main Gradio Blocks application, which serves as the container for all UI components and layout
    with gr.Blocks(
        fill_height=True,  # Automatically adjust the block's height to fill the available vertical space in the browser window, ensuring a responsive design
        fill_width=True,  # Automatically adjust the block's width to fill the available horizontal space, maximizing the use of screen real estate
        head=meta_tags  # Inject custom meta tags into the HTML head section, allowing for SEO optimization
    ) as app:
        """
        This code block initializes the main Gradio Blocks context, which acts as the root container for all user interface components in the application.
        The fill_height and fill_width parameters ensure that the app dynamically resizes to fit the user's browser window, providing a seamless and visually appealing experience.
        The head parameter inserts custom meta tags into the HTML head, enabling advanced customization such as setting the page title, description, favicon, or viewport settings.
        All UI elements, including the sidebar configuration controls and the main chat interface, are defined within this context to ensure they are properly managed and rendered by Gradio.
        The resulting 'app' object encapsulates the entire interactive application, making it easy to launch or embed as needed
        """
        # Begin the sidebar section, which slides in from the left and contains all model configuration controls. The sidebar starts in a closed state for a cleaner initial appearance
        with gr.Sidebar(open=False):
            # Create a dropdown menu that allows users to select the desired model precision from the available options defined in the 'model' dictionary
            model_precision = gr.Dropdown(
                choices=list(model.keys()),  # Populate the dropdown with the keys from the model dictionary, such as "F16", "F32", or custom precision names
                label="Model Precision",  # Display this label above the dropdown to clearly indicate its purpose to users
                info=(
                    # Provide a tooltip that explains the impact of different precision settings on model speed and accuracy, helping users make informed choices
                    "The smaller the value, the faster the response but less accurate. "
                    "Conversely, the larger the value, the response is slower but more accurate."
                ),
                value="Q8_K_XL"  # Set the default selected value to "Q8_K_XL", which represents a specific model precision configuration
            )
            # Add a checkbox that enables or disables reasoning mode, which affects how the AI assistant processes user queries and generates responses
            reasoning = gr.Checkbox(
                label="Reasoning",  # Show this label next to the checkbox to indicate its function
                info="Switching between thinking and non-thinking mode.",  # Display a tooltip explaining that this option toggles the AI's ability to perform complex reasoning
                value=True  # Set the default state to enabled, so the AI uses reasoning mode by default
            )
            # Create a slider for the 'Temperature' parameter, which controls the randomness of the AI's text generation. The slider is initially non-interactive and updated dynamically based on reasoning mode
            temperature = gr.Slider(
                minimum=0.0,  # Allow the temperature value to range from 0.0, representing deterministic output
                maximum=2.0,  # Set the upper limit to 2.0, allowing for highly random and creative responses
                step=0.01,  # Allow fine-grained adjustments in increments of 0.01 for precise control
                label="Temperature",  # Label the slider so users know what parameter they are adjusting
                interactive=False  # Disable direct user interaction, as the value is set programmatically when reasoning mode changes
            )
            # Add a slider for the 'Top K' parameter, which limits the number of highest-probability tokens considered during text generation. This helps control output diversity
            top_k = gr.Slider(
                minimum=0,  # Allow the top_k value to start at 0, which may disable the filter
                maximum=100,  # Set the maximum to 100, providing a wide range for experimentation with model output diversity
                step=1,  # Adjust the value in whole number increments for clarity
                label="Top K",  # Label the slider for user understanding
                interactive=False  # Make the slider non-interactive, as it is updated based on reasoning mode
            )
            # Create a slider for the 'Min P' parameter, representing the minimum cumulative probability threshold for token selection during generation
            min_p = gr.Slider(
                minimum=0.0,  # Allow the minimum probability to start at 0.0, including all tokens
                maximum=1.0,  # Set the maximum to 1.0, representing the full probability mass
                step=0.01,  # Use small increments for precise tuning
                label="Min P",  # Label the slider to clarify its function
                interactive=False  # Disable direct interaction, as it is controlled programmatically
            )
            # Add a slider for the 'Top P' parameter, which controls nucleus sampling by limiting the cumulative probability of selected tokens
            top_p = gr.Slider(
                minimum=0.0,  # Allow the top_p value to start at 0.0, which may restrict output to only the most probable token
                maximum=1.0,  # Set the upper limit to 1.0, including all tokens in the sampling pool
                step=0.01,  # Enable fine adjustments for optimal output control
                label="Top P",  # Label the slider accordingly
                interactive=False  # Make the slider non-interactive, as it is set based on reasoning mode
            )
            # Create a slider for the 'Repetition Penalty' parameter, which discourages the model from generating repetitive text by penalizing repeated tokens
            repetition_penalty = gr.Slider(
                minimum=0.1,  # Set the minimum penalty to 0.1, allowing for low but non-zero penalties
                maximum=2.0,  # Allow penalties up to 2.0, strongly discouraging repetition
                step=0.01,  # Use small increments for precise adjustment
                label="Repetition Penalty",  # Label the slider for clarity
                interactive=False  # Make the slider non-interactive, as it is updated programmatically
            )
            # Define a function to update all parameter sliders when the reasoning checkbox is toggled by the user
            def update_parameters(switching):
                """
                This function is triggered whenever the user changes the state of the reasoning checkbox in the sidebar.
                It calls the 'parameters' function, passing the new reasoning state, to retrieve a tuple of updated values for temperature, top_k, min_p, top_p, and repetition penalty.
                The returned values are then used to update the corresponding sliders in the sidebar, ensuring that the UI always reflects the current configuration for the selected reasoning mode.
                This dynamic updating mechanism provides immediate feedback to users and helps prevent configuration mismatches, improving the overall user experience and reliability of the application
                """
                # Call the external 'parameters' function with the current reasoning state to get updated parameter values
                return parameters(switching)
            # Set up an event listener that calls 'update_parameters' whenever the reasoning checkbox state changes, updating all related sliders with new values
            reasoning.change(
                fn=update_parameters,  # Specify the function to call when the checkbox value changes
                inputs=[reasoning],  # Pass the current value of the reasoning checkbox as input
                outputs=[temperature, top_k, min_p, top_p, repetition_penalty],  # Update all relevant sliders with the new parameter values
                api_name=False # Disable API exposure for this event, as it is only used internally by the UI
            )
            # Initialize all parameter sliders with values corresponding to the default state of the reasoning checkbox, ensuring the UI is consistent on first load
            values = parameters(reasoning.value)
            temperature.value, top_k.value, min_p.value, top_p.value, repetition_penalty.value = values
            # Add a checkbox to enable or disable image generation capabilities in the chat interface, allowing users to control access to this feature
            image_generation = gr.Checkbox(
                label="Image Generation",  # Display this label next to the checkbox for clarity
                info=(
                    # Provide a tooltip explaining how to trigger image generation using a special chat command
                    "Type <i><b>/image</b></i> followed by the instructions to start generating an image."
                ),
                value=True  # Enable image generation by default for user convenience
            )
            # Add a checkbox to enable or disable audio generation in the chat, giving users control over this feature
            audio_generation = gr.Checkbox(
                label="Audio Generation",  # Label the checkbox for clarity
                info=(
                    # Tooltip instructing users to use the /audio command to generate audio responses
                    "Type <i><b>/audio</b></i> followed by the instructions to start generating audio."
                ),
                value=True  # Enable audio generation by default
            )
            # Add a checkbox to enable or disable deep web search functionality in the chat interface
            search_generation = gr.Checkbox(
                label="Deep Search",  # Label the checkbox to indicate its purpose
                info=(
                    # Tooltip explaining how to trigger deep search using the /dp command in chat
                    "Type <i><b>/dp</b></i> followed by the instructions to search the web."
                ),
                value=True  # Enable deep search by default
            )
        # Create the main chat interface where users can interact with the AI assistant, send messages, and receive responses
        gr.ChatInterface(
            fn=respond,  # Specify the function that processes user input and generates assistant responses
            # Provide a list of additional input components whose current values are passed to the respond function for each user message
            additional_inputs=[
                model_precision,  # Current selected model precision value from the dropdown
                temperature,  # Current temperature value from the slider
                top_k,  # Current top_k value from the slider
                min_p,  # Current min_p value from the slider
                top_p,  # Current top_p value from the slider
                repetition_penalty,  # Current repetition penalty value from the slider
                reasoning,  # Current state of the reasoning checkbox
                image_generation,  # Whether image generation is enabled
                audio_generation,  # Whether audio generation is enabled
                search_generation  # Whether deep search is enabled
            ],
            # Provide a list of example prompts that users can click to quickly test the assistant's capabilities and explore different features
            examples=[
                ["Please introduce yourself."],
                ["/audio Could you explain what Artificial Intelligence (AI) is?"],
                ["/audio What is Hugging Face?"],
                ["/dp Please search for the J.A.R.V.I.S. AI model on Hugging Face."],
                ["/dp What is the capital city of Indonesia?"],
                ["/image Create an image of a futuristic city."],
                ["/image Create a cartoon-style image of a man."],
                ["What day is it today, what's the date, and what time is it?"],
                ['/audio Say "I am J.A.R.V.I.S.".'],
                ["How can I run you in the terminal without having to download the model?"],
                ["Do you have an OpenAI-compatible API for your model?"],
                ["Please generate a highly complex code snippet on any topic."],
                ["Explain about quantum computers."]
            ],
            cache_examples=False,  # Disable caching of example outputs to ensure fresh responses are generated each time an example is selected
            chatbot=gr.Chatbot(
                label="J.A.R.V.I.S.",   # Set the title label above the chat window to "J.A.R.V.I.S." for branding and recognition
                show_copy_button=True,  # Display a button that allows users to easily copy chat messages for later use
                scale=1,  # Set the scale factor for the chatbot UI, keeping the default size
                allow_tags=["think"]  # Allow the use of the "think" tag to indicate reasoning mode in chat
            ),
            multimodal=False,  # Disable file upload capabilities, restricting the chat to text-only interactions
            api_name="api",  # Expose the chat interface as an API endpoint named "api" for programmatic access if needed
        )
    # Return the fully constructed Gradio app object, which can be launched as a standalone web application or embedded in other platforms
    return app