nav13n commited on
Commit
45fb393
·
0 Parent(s):

first commit

Browse files
.chainlit/config.toml ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ # Whether to enable telemetry (default: true). No personal data is collected.
3
+ enable_telemetry = true
4
+
5
+ # List of environment variables to be provided by each user to use the app.
6
+ user_env = []
7
+
8
+ # Duration (in seconds) during which the session is saved when the connection is lost
9
+ session_timeout = 3600
10
+
11
+ # Enable third parties caching (e.g LangChain cache)
12
+ cache = false
13
+
14
+ # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
15
+ # follow_symlink = false
16
+
17
+ [features]
18
+ # Show the prompt playground
19
+ prompt_playground = false
20
+
21
+ # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
22
+ unsafe_allow_html = false
23
+
24
+ # Process and display mathematical expressions. This can clash with "$" characters in messages.
25
+ latex = false
26
+
27
+ # Authorize users to upload files with messages
28
+ multi_modal = false
29
+
30
+ # Allows user to use speech to text
31
+ [features.speech_to_text]
32
+ enabled = false
33
+ # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
34
+ # language = "en-US"
35
+
36
+ [UI]
37
+ # Name of the app and chatbot.
38
+ name = "Sage"
39
+
40
+ # Show the readme while the conversation is empty.
41
+ show_readme_as_default = false
42
+
43
+ # Description of the app and chatbot. This is used for HTML tags.
44
+ # description = ""
45
+
46
+ # Large size content are by default collapsed for a cleaner ui
47
+ default_collapse_content = true
48
+
49
+ # The default value for the expand messages settings.
50
+ default_expand_messages = false
51
+
52
+ # Hide the chain of thought details from the user in the UI.
53
+ hide_cot = false
54
+
55
+ # Link to your github repo. This will add a github button in the UI's header.
56
+ # github = ""
57
+
58
+ # Specify a CSS file that can be used to customize the user interface.
59
+ # The CSS file can be served from the public directory or via an external link.
60
+ # custom_css = "/public/test.css"
61
+
62
+ # Override default MUI light theme. (Check theme.ts)
63
+ [UI.theme.light]
64
+ #background = "#FAFAFA"
65
+ #paper = "#FFFFFF"
66
+
67
+ [UI.theme.light.primary]
68
+ #main = "#F80061"
69
+ #dark = "#980039"
70
+ #light = "#FFE7EB"
71
+
72
+ # Override default MUI dark theme. (Check theme.ts)
73
+ [UI.theme.dark]
74
+ #background = "#FAFAFA"
75
+ #paper = "#FFFFFF"
76
+
77
+ [UI.theme.dark.primary]
78
+ #main = "#F80061"
79
+ #dark = "#980039"
80
+ #light = "#FFE7EB"
81
+
82
+
83
+ [meta]
84
+ generated_by = "0.7.700"
.chainlit/translations/en-US.json ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "components": {
3
+ "atoms": {
4
+ "buttons": {
5
+ "userButton": {
6
+ "menu": {
7
+ "settings": "Settings",
8
+ "settingsKey": "S",
9
+ "APIKeys": "API Keys",
10
+ "logout": "Logout"
11
+ }
12
+ }
13
+ }
14
+ },
15
+ "molecules": {
16
+ "newChatButton": {
17
+ "newChat": "New Chat"
18
+ },
19
+ "tasklist": {
20
+ "TaskList": {
21
+ "title": "\ud83d\uddd2\ufe0f Task List",
22
+ "loading": "Loading...",
23
+ "error": "An error occured"
24
+ }
25
+ },
26
+ "attachments": {
27
+ "cancelUpload": "Cancel upload",
28
+ "removeAttachment": "Remove attachment"
29
+ },
30
+ "newChatDialog": {
31
+ "createNewChat": "Create new chat?",
32
+ "clearChat": "This will clear the current messages and start a new chat.",
33
+ "cancel": "Cancel",
34
+ "confirm": "Confirm"
35
+ },
36
+ "settingsModal": {
37
+ "settings": "Settings",
38
+ "expandMessages": "Expand Messages",
39
+ "hideChainOfThought": "Hide Chain of Thought",
40
+ "darkMode": "Dark Mode"
41
+ },
42
+ "detailsButton": {
43
+ "using": "Using",
44
+ "running": "Running",
45
+ "took_one": "Took {{count}} step",
46
+ "took_other": "Took {{count}} steps"
47
+ },
48
+ "auth": {
49
+ "authLogin": {
50
+ "title": "Login to access the app.",
51
+ "form": {
52
+ "email": "Email address",
53
+ "password": "Password",
54
+ "noAccount": "Don't have an account?",
55
+ "alreadyHaveAccount": "Already have an account?",
56
+ "signup": "Sign Up",
57
+ "signin": "Sign In",
58
+ "or": "OR",
59
+ "continue": "Continue",
60
+ "forgotPassword": "Forgot password?",
61
+ "passwordMustContain": "Your password must contain:",
62
+ "emailRequired": "email is a required field",
63
+ "passwordRequired": "password is a required field"
64
+ },
65
+ "error": {
66
+ "default": "Unable to sign in.",
67
+ "signin": "Try signing in with a different account.",
68
+ "oauthsignin": "Try signing in with a different account.",
69
+ "redirect_uri_mismatch": "The redirect URI is not matching the oauth app configuration.",
70
+ "oauthcallbackerror": "Try signing in with a different account.",
71
+ "oauthcreateaccount": "Try signing in with a different account.",
72
+ "emailcreateaccount": "Try signing in with a different account.",
73
+ "callback": "Try signing in with a different account.",
74
+ "oauthaccountnotlinked": "To confirm your identity, sign in with the same account you used originally.",
75
+ "emailsignin": "The e-mail could not be sent.",
76
+ "emailverify": "Please verify your email, a new email has been sent.",
77
+ "credentialssignin": "Sign in failed. Check the details you provided are correct.",
78
+ "sessionrequired": "Please sign in to access this page."
79
+ }
80
+ },
81
+ "authVerifyEmail": {
82
+ "almostThere": "You're almost there! We've sent an email to ",
83
+ "verifyEmailLink": "Please click on the link in that email to complete your signup.",
84
+ "didNotReceive": "Can't find the email?",
85
+ "resendEmail": "Resend email",
86
+ "goBack": "Go Back",
87
+ "emailSent": "Email sent successfully.",
88
+ "verifyEmail": "Verify your email address"
89
+ },
90
+ "providerButton": {
91
+ "continue": "Continue with {{provider}}",
92
+ "signup": "Sign up with {{provider}}"
93
+ },
94
+ "authResetPassword": {
95
+ "newPasswordRequired": "New password is a required field",
96
+ "passwordsMustMatch": "Passwords must match",
97
+ "confirmPasswordRequired": "Confirm password is a required field",
98
+ "newPassword": "New password",
99
+ "confirmPassword": "Confirm password",
100
+ "resetPassword": "Reset Password"
101
+ },
102
+ "authForgotPassword": {
103
+ "email": "Email address",
104
+ "emailRequired": "email is a required field",
105
+ "emailSent": "Please check the email address {{email}} for instructions to reset your password.",
106
+ "enterEmail": "Enter your email address and we will send you instructions to reset your password.",
107
+ "resendEmail": "Resend email",
108
+ "continue": "Continue",
109
+ "goBack": "Go Back"
110
+ }
111
+ }
112
+ },
113
+ "organisms": {
114
+ "chat": {
115
+ "history": {
116
+ "index": {
117
+ "showHistory": "Show history",
118
+ "lastInputs": "Last Inputs",
119
+ "noInputs": "Such empty...",
120
+ "loading": "Loading..."
121
+ }
122
+ },
123
+ "inputBox": {
124
+ "input": {
125
+ "placeholder": "Type your message here..."
126
+ },
127
+ "speechButton": {
128
+ "start": "Start recording",
129
+ "stop": "Stop recording"
130
+ },
131
+ "SubmitButton": {
132
+ "sendMessage": "Send message",
133
+ "stopTask": "Stop Task"
134
+ },
135
+ "UploadButton": {
136
+ "attachFiles": "Attach files"
137
+ },
138
+ "waterMark": {
139
+ "text": "Built with"
140
+ }
141
+ },
142
+ "Messages": {
143
+ "index": {
144
+ "running": "Running",
145
+ "executedSuccessfully": "executed successfully",
146
+ "failed": "failed",
147
+ "feedbackUpdated": "Feedback updated",
148
+ "updating": "Updating"
149
+ }
150
+ },
151
+ "dropScreen": {
152
+ "dropYourFilesHere": "Drop your files here"
153
+ },
154
+ "index": {
155
+ "failedToUpload": "Failed to upload",
156
+ "cancelledUploadOf": "Cancelled upload of",
157
+ "couldNotReachServer": "Could not reach the server",
158
+ "continuingChat": "Continuing previous chat"
159
+ },
160
+ "settings": {
161
+ "settingsPanel": "Settings panel",
162
+ "reset": "Reset",
163
+ "cancel": "Cancel",
164
+ "confirm": "Confirm"
165
+ }
166
+ },
167
+ "threadHistory": {
168
+ "sidebar": {
169
+ "filters": {
170
+ "FeedbackSelect": {
171
+ "feedbackAll": "Feedback: All",
172
+ "feedbackPositive": "Feedback: Positive",
173
+ "feedbackNegative": "Feedback: Negative"
174
+ },
175
+ "SearchBar": {
176
+ "search": "Search"
177
+ }
178
+ },
179
+ "DeleteThreadButton": {
180
+ "confirmMessage": "This will delete the thread as well as it's messages and elements.",
181
+ "cancel": "Cancel",
182
+ "confirm": "Confirm",
183
+ "deletingChat": "Deleting chat",
184
+ "chatDeleted": "Chat deleted"
185
+ },
186
+ "index": {
187
+ "pastChats": "Past Chats"
188
+ },
189
+ "ThreadList": {
190
+ "empty": "Empty...",
191
+ "today": "Today",
192
+ "yesterday": "Yesterday",
193
+ "previous7days": "Previous 7 days",
194
+ "previous30days": "Previous 30 days"
195
+ },
196
+ "TriggerButton": {
197
+ "closeSidebar": "Close sidebar",
198
+ "openSidebar": "Open sidebar"
199
+ }
200
+ },
201
+ "Thread": {
202
+ "backToChat": "Go back to chat",
203
+ "chatCreatedOn": "This chat was created on"
204
+ }
205
+ },
206
+ "header": {
207
+ "chat": "Chat",
208
+ "readme": "Readme"
209
+ }
210
+ }
211
+ },
212
+ "hooks": {
213
+ "useLLMProviders": {
214
+ "failedToFetchProviders": "Failed to fetch providers:"
215
+ }
216
+ },
217
+ "pages": {
218
+ "Design": {},
219
+ "Env": {
220
+ "savedSuccessfully": "Saved successfully",
221
+ "requiredApiKeys": "Required API Keys",
222
+ "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage."
223
+ },
224
+ "Page": {
225
+ "notPartOfProject": "You are not part of this project."
226
+ },
227
+ "ResumeButton": {
228
+ "resumeChat": "Resume Chat"
229
+ }
230
+ }
231
+ }
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.github/workflows/check_file_size.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Check file size
2
+ on: # or directly `on: [push]` to run the action on every push on any branch
3
+ pull_request:
4
+ branches: [main]
5
+
6
+ # to run this workflow manually from the Actions tab
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - name: Check large files
14
+ uses: ActionsDesk/lfs-warning@v2.0
15
+ with:
16
+ filesizelimit: 10485760 # this is 10MB so we can sync to HF Spaces
.github/workflows/sync_to_hf_space.yml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face hub
2
+ on:
3
+ push:
4
+ branches: [main]
5
+ workflow_dispatch:
6
+
7
+ jobs:
8
+ sync-to-hub:
9
+ runs-on: ubuntu-latest
10
+ steps:
11
+ - uses: actions/checkout@v3
12
+ with:
13
+ fetch-depth: 0
14
+ lfs: true
15
+ - name: Push to hub
16
+ env:
17
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
18
+ run: git push -f https://nav13n:$HF_TOKEN@huggingface.co/spaces/nav13n/DocuSage main
.gitignore ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+ RUN useradd -m -u 1000 user
3
+ USER user
4
+ ENV HOME=/home/user \
5
+ PATH=/home/user/.local/bin:$PATH
6
+ WORKDIR $HOME/app
7
+ COPY --chown=user . $HOME/app
8
+ COPY ./requirements.txt ~/app/requirements.txt
9
+ RUN pip install -r requirements.txt
10
+ COPY . .
11
+ CMD ["chainlit", "run", "app.py", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: DocuSage
3
+ emoji: ⚡
4
+ colorFrom: indigo
5
+ colorTo: gray
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ short_description: DocuSage -AI-powered insights in seconds.
10
+ ---
11
+
12
+ # DocuSage
13
+
14
+ DocuSage is a powerful document question-answering application that allows users to upload documents and ask natural language questions about their content. With cutting-edge AI technology, DocuSage provides accurate, concise answers in seconds, saving time and boosting productivity. Unlock valuable insights and let DocuSage be your sage guide through the world of document knowledge.
app.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import chainlit as cl
3
+ from sage import app
4
+ from langchain.schema.runnable import RunnableConfig
5
+
6
+
7
+ welcome_message = "Welcome! Ask anything about your stored documents and get AI-powered insights in seconds."
8
+
9
+ @cl.on_chat_start
10
+ async def start_chat():
11
+ await cl.Message(content=welcome_message).send()
12
+ cl.user_session.set("runnable", app)
13
+
14
+
15
+ @cl.on_message
16
+ async def main(message: cl.Message):
17
+ runnable = cl.user_session.get("runnable")
18
+ msg = cl.Message(content="")
19
+
20
+ input = {"question": message.content}
21
+
22
+ value = None
23
+ for output in runnable.stream(input):
24
+ for key, value in output.items():
25
+ print(f"Finished running: {key}:")
26
+ if key == "generator_agent":
27
+ answer = value["answer"]
28
+ await msg.stream_token(answer)
29
+
30
+ await msg.send()
chain.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Langchain imports
2
+ from langchain_community.document_loaders import PyMuPDFLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_community.vectorstores import Qdrant
5
+ from langchain.prompts import ChatPromptTemplate
6
+ from langchain_openai import ChatOpenAI
7
+ from langchain_openai import OpenAIEmbeddings
8
+ from langchain_core.output_parsers import StrOutputParser
9
+ from langchain.schema.runnable import RunnablePassthrough
10
+ from dotenv import load_dotenv
11
+
12
+
13
+ load_dotenv()
14
+
15
+ ######################## Build RAG Chain #############################
16
+ ######################################################################
17
+
18
+ #### Load Documents
19
+ loader = PyMuPDFLoader(
20
+ "https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf"
21
+ )
22
+
23
+ documents = loader.load()
24
+
25
+ #### Split Documents
26
+ text_splitter = RecursiveCharacterTextSplitter(
27
+ chunk_size = 800,
28
+ chunk_overlap = 100
29
+ )
30
+
31
+ documents = text_splitter.split_documents(documents)
32
+
33
+ embeddings = OpenAIEmbeddings(
34
+ model="text-embedding-3-small"
35
+ )
36
+
37
+ ### Create Vector Store
38
+ vector_store = Qdrant.from_documents(
39
+ documents,
40
+ embeddings,
41
+ location=":memory:",
42
+ collection_name="Meta 10k Filings",
43
+ )
44
+
45
+ ### Create Prmopt Template
46
+ template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':
47
+
48
+ Context:
49
+ {context}
50
+
51
+ Question:
52
+ {question}
53
+ """
54
+ prompt = ChatPromptTemplate.from_template(template)
55
+
56
+ def format_docs(docs):
57
+ return "\n\n".join([d.page_content for d in docs])
58
+
59
+ ### Setup RAG Chain
60
+
61
+ retriever = vector_store.as_retriever(search_type="similarity_score_threshold",
62
+ search_kwargs={"score_threshold": 0.6, "k":8})
63
+ primary_qa_llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
64
+
65
+ rag_chain = (
66
+ {"context": retriever | format_docs, "question": RunnablePassthrough()}
67
+ | prompt
68
+ | primary_qa_llm
69
+ | StrOutputParser()
70
+
71
+ )
chainlit.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # DocuSage
2
+
3
+ DocuSage is a powerful document question-answering application that allows users to upload documents and ask natural language questions about their content. With cutting-edge AI technology, DocuSage provides accurate, concise answers in seconds, saving time and boosting productivity. Unlock valuable insights and let DocuSage be your sage guide through the world of document knowledge.
data/placeholder.md ADDED
File without changes
environment.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ name: sage
2
+
3
+ channels:
4
+ - conda-forge
5
+ - defaults
6
+
7
+ dependencies:
8
+ - python=3.11
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ chainlit
2
+ tiktoken
3
+ python-dotenv
4
+ langchain
5
+ langchain_core
6
+ langchain-community
7
+ langchainhub
8
+ faiss-cpu
9
+ langgraph
10
+ fastembed
11
+ flashrank
12
+ pandas
13
+ huggingface-hub
14
+ langchain-groq
15
+ pymupdf
sage.py ADDED
@@ -0,0 +1,684 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from langchain_community.vectorstores import FAISS
3
+
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain_community.document_loaders import PyMuPDFLoader
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
8
+ import os
9
+ from dotenv import load_dotenv
10
+ load_dotenv()
11
+
12
+ embed_model = FastEmbedEmbeddings(model_name="snowflake/snowflake-arctic-embed-m")
13
+
14
+ from groq import Groq
15
+ from langchain_groq import ChatGroq
16
+
17
+
18
+ llm = ChatGroq(temperature=0,
19
+ model_name="Llama3-8b-8192",
20
+ api_key=os.getenv("GROQ_API_KEY"),)
21
+
22
+ loader = PyMuPDFLoader("https://home.synise.com/HRUtility/Documents/HRA/UmaP/Synise%20Handbook.pdf")
23
+ documents = loader.load()
24
+
25
+ text_splitter = RecursiveCharacterTextSplitter(
26
+ chunk_size=1500, chunk_overlap=200
27
+ )
28
+ doc_splits = text_splitter.split_documents(documents)
29
+
30
+ print(len(doc_splits),doc_splits[0])
31
+
32
+ vectorstore = FAISS.from_documents(documents=doc_splits,embedding=embed_model)
33
+
34
+ from langchain.retrievers import ContextualCompressionRetriever
35
+ from langchain.retrievers.document_compressors import FlashrankRerank
36
+
37
+ compressor = FlashrankRerank()
38
+ retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 20})
39
+ compression_retriever = ContextualCompressionRetriever(
40
+ base_compressor=compressor, base_retriever=retriever
41
+ )
42
+
43
+ from operator import itemgetter
44
+ from langchain.prompts import PromptTemplate
45
+ from langchain.schema.runnable import RunnablePassthrough
46
+ from langchain_core.output_parsers import StrOutputParser
47
+
48
+
49
+ RAG_PROMPT_TEMPLATE = """
50
+ <|begin_of_text|><|start_header_id|>system<|end_header_id|>
51
+
52
+ Answer the question based only on the provided context. If you cannot answer the question with the provided context, please respond with 'I don't know" without any preamble, explanation, or additional text.
53
+
54
+ Context:
55
+ {context}
56
+
57
+ Question:
58
+ {question}
59
+
60
+ <|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
61
+
62
+ def format_docs(docs):
63
+ return "\n\n".join(doc.page_content for doc in docs)
64
+
65
+
66
+ rag_prompt = PromptTemplate(
67
+ template=RAG_PROMPT_TEMPLATE, input_variables=["question", "context"]
68
+ )
69
+
70
+ response_chain = (rag_prompt
71
+ | llm
72
+ | StrOutputParser()
73
+
74
+ )
75
+
76
+ def dummy_payroll_api_call(employee_id, month, year):
77
+
78
+ data = {
79
+ 2023: {
80
+ "MAY": {
81
+ "employeeDetails": {
82
+ "employeeId": "E2468",
83
+ "firstName": "Sarah",
84
+ "lastName": "Thompson",
85
+ "designation": "Product Manager"
86
+ },
87
+ "paymentDetails": {
88
+ "year": 2023,
89
+ "month": "JAN",
90
+ "basicSalary": 5500,
91
+ "allowances": [
92
+ {
93
+ "type": "Housing Allowance",
94
+ "amount": 1500
95
+ },
96
+ {
97
+ "type": "Travel Allowance",
98
+ "amount": 800
99
+ }
100
+ ],
101
+ "deductions": [
102
+ {
103
+ "type": "Provident Fund",
104
+ "amount": 650
105
+ },
106
+ {
107
+ "type": "Health Insurance",
108
+ "amount": 300
109
+ }
110
+ ],
111
+ "taxes": [
112
+ {
113
+ "type": "Income Tax",
114
+ "amount": 1300
115
+ }
116
+ ],
117
+ "grossSalary": 7800,
118
+ "totalDeductions": 2250,
119
+ "netSalary": 6650
120
+ },
121
+ "companyDetails": {
122
+ "companyName": "Tech Solutions Ltd.",
123
+ "address": "789 Maple Avenue, City"
124
+ }
125
+ }
126
+ },
127
+ 2024: {
128
+ "JAN": {
129
+ "employeeDetails": {
130
+ "employeeId": "E2468",
131
+ "firstName": "Sarah",
132
+ "lastName": "Thompson",
133
+ "designation": "Product Manager"
134
+ },
135
+ "paymentDetails": {
136
+ "year": 2024,
137
+ "month": "JAN",
138
+ "basicSalary": 6500,
139
+ "allowances": [
140
+ {
141
+ "type": "Housing Allowance",
142
+ "amount": 1500
143
+ },
144
+ {
145
+ "type": "Travel Allowance",
146
+ "amount": 800
147
+ }
148
+ ],
149
+ "deductions": [
150
+ {
151
+ "type": "Provident Fund",
152
+ "amount": 650
153
+ },
154
+ {
155
+ "type": "Health Insurance",
156
+ "amount": 300
157
+ }
158
+ ],
159
+ "taxes": [
160
+ {
161
+ "type": "Income Tax",
162
+ "amount": 1300
163
+ }
164
+ ],
165
+ "grossSalary": 8800,
166
+ "totalDeductions": 2250,
167
+ "netSalary": 6550
168
+ },
169
+ "companyDetails": {
170
+ "companyName": "Tech Solutions Ltd.",
171
+ "address": "789 Maple Avenue, City"
172
+ }
173
+ },
174
+ "FEB": {
175
+ "employeeDetails": {
176
+ "employeeId": "E2468",
177
+ "firstName": "Sarah",
178
+ "lastName": "Thompson",
179
+ "designation": "Product Manager"
180
+ },
181
+ "paymentDetails": {
182
+ "year": 2024,
183
+ "month": "FEB",
184
+ "basicSalary": 6500,
185
+ "allowances": [
186
+ {
187
+ "type": "Housing Allowance",
188
+ "amount": 1500
189
+ },
190
+ {
191
+ "type": "Travel Allowance",
192
+ "amount": 800
193
+ }
194
+ ],
195
+ "deductions": [
196
+ {
197
+ "type": "Provident Fund",
198
+ "amount": 650
199
+ },
200
+ {
201
+ "type": "Health Insurance",
202
+ "amount": 300
203
+ }
204
+ ],
205
+ "taxes": [
206
+ {
207
+ "type": "Income Tax",
208
+ "amount": 1300
209
+ }
210
+ ],
211
+ "grossSalary": 8800,
212
+ "totalDeductions": 2250,
213
+ "netSalary": 6550
214
+ },
215
+ "companyDetails": {
216
+ "companyName": "Tech Solutions Ltd.",
217
+ "address": "789 Maple Avenue, City"
218
+ }
219
+ },
220
+ "MAY": {
221
+ "employeeDetails": {
222
+ "employeeId": "E2468",
223
+ "firstName": "Sarah",
224
+ "lastName": "Thompson",
225
+ "designation": "Product Manager"
226
+ },
227
+ "paymentDetails": {
228
+ "year": 2024,
229
+ "month": "MAY",
230
+ "basicSalary": 6500,
231
+ "allowances": [
232
+ {
233
+ "type": "Housing Allowance",
234
+ "amount": 1500
235
+ },
236
+ {
237
+ "type": "Travel Allowance",
238
+ "amount": 800
239
+ }
240
+ ],
241
+ "deductions": [
242
+ {
243
+ "type": "Provident Fund",
244
+ "amount": 650
245
+ },
246
+ {
247
+ "type": "Health Insurance",
248
+ "amount": 300
249
+ }
250
+ ],
251
+ "taxes": [
252
+ {
253
+ "type": "Income Tax",
254
+ "amount": 1500
255
+ }
256
+ ],
257
+ "grossSalary": 8800,
258
+ "totalDeductions": 2450,
259
+ "netSalary": 6350
260
+ },
261
+ "companyDetails": {
262
+ "companyName": "Tech Solutions Ltd.",
263
+ "address": "789 Maple Avenue, City"
264
+ }
265
+ },
266
+ "APR": {
267
+ "employeeDetails": {
268
+ "employeeId": "E2468",
269
+ "firstName": "Sarah",
270
+ "lastName": "Thompson",
271
+ "designation": "Product Manager"
272
+ },
273
+ "paymentDetails": {
274
+ "year": 2024,
275
+ "month": "APR",
276
+ "basicSalary": 6500,
277
+ "allowances": [
278
+ {
279
+ "type": "Housing Allowance",
280
+ "amount": 1500
281
+ },
282
+ {
283
+ "type": "Travel Allowance",
284
+ "amount": 800
285
+ }
286
+ ],
287
+ "deductions": [
288
+ {
289
+ "type": "Provident Fund",
290
+ "amount": 650
291
+ },
292
+ {
293
+ "type": "Health Insurance",
294
+ "amount": 300
295
+ }
296
+ ],
297
+ "taxes": [
298
+ {
299
+ "type": "Income Tax",
300
+ "amount": 1500
301
+ }
302
+ ],
303
+ "grossSalary": 8800,
304
+ "totalDeductions": 2450,
305
+ "netSalary": 6350
306
+ },
307
+ "companyDetails": {
308
+ "companyName": "Tech Solutions Ltd.",
309
+ "address": "789 Maple Avenue, City"
310
+ }
311
+ }
312
+ }
313
+ }
314
+ year= 2024 if year == "CUR" else year
315
+ year= 2023 if year == "PREV" else year
316
+
317
+ month= "MAY" if month == "CUR" else month
318
+ month= "APR" if month == "PREV" else month
319
+
320
+
321
+ return data[year][month]
322
+
323
+ print(dummy_payroll_api_call(1234, 'CUR', 2024))
324
+
325
+ import time
326
+ from langchain.prompts import PromptTemplate
327
+ from langchain_core.output_parsers import JsonOutputParser
328
+ from langchain_core.output_parsers import StrOutputParser
329
+
330
+ ROUTER_AGENT_PROMPT_TEMPLATE = """
331
+ <|begin_of_text|><|start_header_id|>system<|end_header_id|>
332
+
333
+ You are an expert at delegating user questions to one of the most appropriate agents 'policy_agent' or 'payroll_agent'.
334
+
335
+ Use the following criteria to determine the appropriate agents to answer the user que:
336
+
337
+ - If the query is regarding payslips, salary, tax deductions, basepay of a given month, use payroll_agent'.
338
+ - If the question is closely related to general human resource queries, organisational policies, prompt engineering, or adversarial attacks, even if the keywords are not explicitly mentioned, use the 'policyagent'.
339
+
340
+ Your output should be a JSON object with a single key 'agent' and a value of either 'policy_agent' or 'payroll_agent'. Do not include any preamble, explanation, or additional text.
341
+
342
+ User's Question: {question}
343
+
344
+ <|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
345
+
346
+ router_prompt = PromptTemplate(
347
+ template=ROUTER_AGENT_PROMPT_TEMPLATE, input_variables=["question"]
348
+ )
349
+
350
+
351
+ router_chain = router_prompt | llm | JsonOutputParser()
352
+
353
+ print(router_chain.invoke({"question":"What is my salary on 6 2024 ?"}))
354
+
355
+ print(router_chain.invoke({"question":"What is leave policy ?"}))
356
+
357
+ payroll_schema= {
358
+ "$schema": "http://json-schema.org/draft-07/schema#",
359
+ "title": "Monthly Payslip",
360
+ "description": "A schema for a monthly payslip",
361
+ "type": "object",
362
+ "properties": {
363
+ "employeeDetails": {
364
+ "type": "object",
365
+ "properties": {
366
+ "employeeId": {
367
+ "type": "string",
368
+ "description": "Unique identifier for the employee"
369
+ },
370
+ "firstName": {
371
+ "type": "string",
372
+ "description": "First name of the employee"
373
+ },
374
+ "lastName": {
375
+ "type": "string",
376
+ "description": "Last name of the employee"
377
+ },
378
+ "designation": {
379
+ "type": "string",
380
+ "description": "Designation or job title of the employee"
381
+ }
382
+ },
383
+ "required": ["employeeId", "firstName", "lastName", "designation"]
384
+ },
385
+ "paymentDetails": {
386
+ "type": "object",
387
+ "properties": {
388
+ "year": {
389
+ "type": "integer",
390
+ "description": "Year of the pay period"
391
+ },
392
+ "month": {
393
+ "type": "string",
394
+ "enum": ["JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"],
395
+ "description": "Month of the pay period"
396
+ },
397
+ "basicSalary": {
398
+ "type": "number",
399
+ "description": "Basic salary of the employee"
400
+ },
401
+ "allowances": {
402
+ "type": "array",
403
+ "items": {
404
+ "type": "object",
405
+ "properties": {
406
+ "type": {
407
+ "type": "string",
408
+ "description": "Type of allowance"
409
+ },
410
+ "amount": {
411
+ "type": "number",
412
+ "description": "Amount of the allowance"
413
+ }
414
+ },
415
+ "required": ["type", "amount"]
416
+ }
417
+ },
418
+ "deductions": {
419
+ "type": "array",
420
+ "items": {
421
+ "type": "object",
422
+ "properties": {
423
+ "type": {
424
+ "type": "string",
425
+ "description": "Type of deduction"
426
+ },
427
+ "amount": {
428
+ "type": "number",
429
+ "description": "Amount of the deduction"
430
+ }
431
+ },
432
+ "required": ["type", "amount"]
433
+ }
434
+ },
435
+ "taxes": {
436
+ "type": "array",
437
+ "items": {
438
+ "type": "object",
439
+ "properties": {
440
+ "type": {
441
+ "type": "string",
442
+ "description": "Type of tax"
443
+ },
444
+ "amount": {
445
+ "type": "number",
446
+ "description": "Amount of the tax"
447
+ }
448
+ },
449
+ "required": ["type", "amount"]
450
+ }
451
+ },
452
+ "grossSalary": {
453
+ "type": "number",
454
+ "description": "Gross salary (basic salary + allowances)"
455
+ },
456
+ "totalDeductions": {
457
+ "type": "number",
458
+ "description": "Total deductions (including taxes)"
459
+ },
460
+ "netSalary": {
461
+ "type": "number",
462
+ "description": "Net salary (gross salary - total deductions)"
463
+ }
464
+ },
465
+ "required": ["year", "month", "basicSalary", "allowances", "deductions", "taxes", "grossSalary", "totalDeductions", "netSalary"]
466
+ },
467
+ "companyDetails": {
468
+ "type": "object",
469
+ "properties": {
470
+ "companyName": {
471
+ "type": "string",
472
+ "description": "Name of the company"
473
+ },
474
+ "address": {
475
+ "type": "string",
476
+ "description": "Address of the company"
477
+ }
478
+ },
479
+ "required": ["companyName", "address"]
480
+ }
481
+ },
482
+ "required": ["employeeDetails", "paymentDetails", "companyDetails"]
483
+ }
484
+
485
+ print(str(payroll_schema))
486
+
487
+ import time
488
+ from langchain.prompts import PromptTemplate
489
+ from langchain_core.output_parsers import JsonOutputParser
490
+ from langchain_core.output_parsers import StrOutputParser
491
+
492
+ FILTER_EXTTRACTION_PROMPT = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
493
+ Extract the month and year from a given user question about payroll. Use the following schema instructions to guide your extraction.
494
+
495
+ Instructions:
496
+ 1. Your output should be a JSON object with only two keys, 'month' and 'year'.
497
+ 2. 'month' key shall have value ["JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"]
498
+ 3. 'year' shall be a number between 2020 and 2024.
499
+ 4. If the user is suggesting current year or month, respond with "CUR" for 'month' and 'year' keys accordingly
500
+ 5. If the user is suggesting previous year or month, respond with "PREV" for 'month' and 'year' keys accordingly
501
+
502
+
503
+ Do not include any preamble, explanation, or additional text.
504
+
505
+ User Question: {question}
506
+ <|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
507
+
508
+ filter_extraction_prompt = PromptTemplate(
509
+ template=FILTER_EXTTRACTION_PROMPT, input_variables=["question"]
510
+ )
511
+
512
+ fiter_extraction_chain = filter_extraction_prompt | llm | JsonOutputParser()
513
+
514
+ print(fiter_extraction_chain.invoke({"question":"What is my salary on 6 2024 ?"}))
515
+
516
+ import time
517
+ from langchain.prompts import PromptTemplate
518
+ from langchain_core.output_parsers import JsonOutputParser
519
+ from langchain_core.output_parsers import StrOutputParser
520
+
521
+ PAYROLL_QA_PROMPT = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
522
+
523
+ Answer the user query given the provided payroll data in json form. Use the provided schema to understand the payroll data structure. If you cannot answer the question with the provided information, please respond with 'I don't know" without any preamble, explanation, or additional text
524
+
525
+ SCHEMA:
526
+ {schema}
527
+
528
+ PAYROLL DATA
529
+ {data}
530
+
531
+ PAYROLL DATA:
532
+ {data}
533
+
534
+ User Question: {question}
535
+ <|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
536
+
537
+ payroll_qa_prompt = PromptTemplate(
538
+ template=PAYROLL_QA_PROMPT, input_variables=["question", "data", "schema"]
539
+ )
540
+
541
+ payroll_qa_chain = payroll_qa_prompt | llm | StrOutputParser()
542
+
543
+ result = fiter_extraction_chain.invoke({"question":"What is my salary on jan 2024 ?"})
544
+
545
+ result
546
+
547
+ api_result = dummy_payroll_api_call(1234, result["month"], result["year"])
548
+
549
+ api_result
550
+
551
+ payroll_qa_chain.invoke({"question":"What is my salary on jan 2024 ?", "data":api_result, "schema":payroll_schema})
552
+
553
+ from typing_extensions import TypedDict
554
+ from typing import List
555
+
556
+ ### State
557
+
558
+ class AgentState(TypedDict):
559
+ question : str
560
+ answer : str
561
+ documents : List[str]
562
+
563
+ import logging as log
564
+
565
+ def route_question(state):
566
+ """
567
+ Route question to payroll_agent or policy_agent to retrieve reevant data
568
+
569
+ Args:
570
+ state (dict): The current graph state
571
+
572
+ Returns:
573
+ str: Next node to call
574
+ """
575
+
576
+ question = state["question"]
577
+ result = router_chain.invoke({"question": question})
578
+
579
+ log.debug('Routing to {}....'.format(result["agent"]))
580
+
581
+ if result['agent'] == 'payroll_agent':
582
+ log.debug('Routing to {}....'.format(result["agent"]))
583
+ return "payroll_agent"
584
+ elif result['agent'] == 'policy_agent':
585
+ log.debug('Routing to {}....'.format(result["agent"]))
586
+ return "policy_agent"
587
+
588
+ state = AgentState(question="What is my salary on jan 2024 ?", answer="", documents=None)
589
+ route_question(state)
590
+
591
+ from langchain.schema import Document
592
+ def retrieve_policy(state):
593
+ """
594
+ Retrieve policy documents from vectorstore
595
+
596
+ Args:
597
+ state (dict): The current graph state
598
+
599
+ Returns:
600
+ state (dict): New key added to state, documents, that contains retrieved documents
601
+ """
602
+ log.debug("Retreiving policy documents.......")
603
+ question = state["question"]
604
+ documents = compression_retriever.invoke(question)
605
+ return {"documents": documents, "question": question}
606
+
607
+ state = AgentState(question="What is leave policy?", answer="", documents=None)
608
+ retrieve_policy(state)
609
+
610
+ def generate_answer(state):
611
+ """
612
+ Generate answer using retrieved data
613
+
614
+ Args:
615
+ state (dict): The current graph state
616
+
617
+ Returns:
618
+ state (dict): New key added to state, generation, that contains LLM generation
619
+ """
620
+ log.debug("Generating answer.......")
621
+ question = state["question"]
622
+ documents = state["documents"]
623
+
624
+
625
+ answer = response_chain.invoke({"context": documents, "question": question})
626
+
627
+ return {"documents": documents, "question": question, "answer": answer}
628
+
629
+ state = AgentState(question="What is leave policy?", answer="", documents=[Document(page_content="According to leave policy, there are two types of leaves 1: PL 2: CL")])
630
+ generate_answer(state)
631
+
632
+ def query_payroll(state):
633
+ """
634
+ Query payroll api to retrieve payroll data
635
+
636
+ Args:
637
+ state (dict): The current graph state
638
+
639
+ Returns:
640
+ state (dict): Updated state with retrived payroll data
641
+ """
642
+
643
+
644
+ question = state["question"]
645
+ payroll_query_filters = fiter_extraction_chain.invoke({"question":question})
646
+ payroll_api_query_results = dummy_payroll_api_call(1234, result["month"], result["year"])
647
+
648
+
649
+ context = context = 'PAYROLL DATA SCHEMA: \n {payroll_schema} \n PAYROLL DATA: {payroll_api_query_results}'.format(
650
+ payroll_schema=payroll_schema, payroll_api_query_results=payroll_api_query_results)
651
+
652
+ documents = [Document(page_content=context)]
653
+ return {"documents": documents, "question": question}
654
+
655
+ state = AgentState(question="Tell me salary for Jan 2024?", answer="", documents=None)
656
+ query_payroll(state)
657
+
658
+ from langgraph.graph import END, StateGraph
659
+ workflow = StateGraph(AgentState)
660
+
661
+ # Define the nodes
662
+ workflow.add_node("payroll_agent", query_payroll)
663
+ workflow.add_node("policy_agent", retrieve_policy)
664
+ workflow.add_node("generator_agent", generate_answer)
665
+
666
+ workflow.set_conditional_entry_point(
667
+ route_question,
668
+ {
669
+ "payroll_agent": "payroll_agent",
670
+ "policy_agent": "policy_agent",
671
+ },
672
+ )
673
+ workflow.add_edge("payroll_agent", "generator_agent")
674
+ workflow.add_edge("policy_agent", "generator_agent")
675
+ workflow.add_edge("generator_agent", END)
676
+
677
+ app = workflow.compile()
678
+
679
+
680
+ from langchain_core.runnables import chain
681
+ @chain
682
+ def sage_chain(question):
683
+ inputs = {"question": question}
684
+ return app.invoke(inputs)
tests/__init__.py ADDED
File without changes
tests/test_chain.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from chain import rag_chain
3
+
4
+ class TestRAGChain(unittest.TestCase):
5
+
6
+ def test_rag_chain(self):
7
+ ans = rag_chain.invoke("Who are Meta's 'Directors' (i.e., members of the Board of Directors)?")
8
+ self.assertTrue(ans != "Peggy Alford, Marc L. Andreessen, Andrew W. Houston, Nancy Killefer, Robert M. Kimmitt, Sheryl K. Sandberg, Tracey T. Travis, Tony Xu")
9
+
10
+ if __name__ == '__main__':
11
+ unittest.main()