Spaces:
Running
Running
first commit to deploy
Browse files- .flake8 +8 -0
- .gitignore +14 -0
- .pre-commit-config.yaml +53 -0
- README.md +1 -1
- app.py +74 -0
- data/indices/auto_merging/d5c92a9b2f/default__vector_store.json +0 -0
- data/indices/auto_merging/d5c92a9b2f/docstore.json +0 -0
- data/indices/auto_merging/d5c92a9b2f/graph_store.json +1 -0
- data/indices/auto_merging/d5c92a9b2f/image__vector_store.json +1 -0
- data/indices/auto_merging/d5c92a9b2f/index_store.json +1 -0
- data/indices/auto_merging/d5c92a9b2f/meta.json +9 -0
- data/indices/basic/default__vector_store.json +0 -0
- data/indices/basic/docstore.json +0 -0
- data/indices/basic/graph_store.json +1 -0
- data/indices/basic/image__vector_store.json +1 -0
- data/indices/basic/index_store.json +1 -0
- data/indices/sentence_window/61a981e27b/default__vector_store.json +0 -0
- data/indices/sentence_window/61a981e27b/docstore.json +0 -0
- data/indices/sentence_window/61a981e27b/graph_store.json +1 -0
- data/indices/sentence_window/61a981e27b/image__vector_store.json +1 -0
- data/indices/sentence_window/61a981e27b/index_store.json +1 -0
- data/indices/sentence_window/61a981e27b/meta.json +5 -0
- pyproject.toml +51 -0
- requirements.txt +266 -0
- src/mythesis_chatbot/rag_setup.py +264 -0
- src/mythesis_chatbot/utils.py +23 -0
.flake8
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[flake8]
|
2 |
+
ignore = E203, W503
|
3 |
+
# matches black's default
|
4 |
+
max-line-length = 88
|
5 |
+
docstring-convention = numpy
|
6 |
+
per-file-ignores =
|
7 |
+
__init__.py:F401
|
8 |
+
exclude = .git, __pycache__, .venv
|
.gitignore
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# poetry
|
2 |
+
# Tt is generally recommended to include poetry.lock in version control.
|
3 |
+
# This is especially recommended for binary packages to ensure reproducibility,
|
4 |
+
# and is more commonly ignored for libraries.
|
5 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
6 |
+
#poetry.lock
|
7 |
+
|
8 |
+
# Environments
|
9 |
+
.env
|
10 |
+
.venv
|
11 |
+
|
12 |
+
__pycache__/
|
13 |
+
|
14 |
+
*.sqlite
|
.pre-commit-config.yaml
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
default_language_version:
|
2 |
+
python: python3.10
|
3 |
+
|
4 |
+
repos:
|
5 |
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
6 |
+
rev: v5.0.0
|
7 |
+
hooks:
|
8 |
+
- id: check-json
|
9 |
+
description: This hook checks json files for parseable syntax.
|
10 |
+
- id: check-yaml
|
11 |
+
description: This hook checks yaml files for parseable syntax.
|
12 |
+
- id: check-toml
|
13 |
+
description: This hook checks toml files for parseable syntax.
|
14 |
+
- id: check-ast
|
15 |
+
description: Simply check whether files parse as valid python.
|
16 |
+
- id: check-merge-conflict
|
17 |
+
description: Check for files that contain merge conflict strings.
|
18 |
+
- id: debug-statements
|
19 |
+
description: Check for debugger imports and py37+ `breakpoint()` calls in python source.
|
20 |
+
- id: end-of-file-fixer
|
21 |
+
description: Ensures that a file is either empty, or ends with one newline.
|
22 |
+
- id: name-tests-test
|
23 |
+
description: This verifies that test files are named correctly - test* format (prefix and not suffix)
|
24 |
+
args: ["--pytest-test-first"]
|
25 |
+
- id: trailing-whitespace
|
26 |
+
args: ["--markdown-linebreak-ext=md"]
|
27 |
+
description: This hook trims trailing whitespace.
|
28 |
+
|
29 |
+
- repo: https://github.com/asottile/pyupgrade
|
30 |
+
rev: v3.19.1
|
31 |
+
hooks:
|
32 |
+
- id: pyupgrade
|
33 |
+
args: [--py310-plus]
|
34 |
+
|
35 |
+
# Pre-commit is installed by poetry alongside black, isort, flake8
|
36 |
+
- repo: local
|
37 |
+
hooks:
|
38 |
+
- id: black
|
39 |
+
name: black
|
40 |
+
entry: poetry run black --config pyproject.toml
|
41 |
+
language: system
|
42 |
+
require_serial: true
|
43 |
+
types: [python]
|
44 |
+
- id: isort
|
45 |
+
name: isort
|
46 |
+
entry: poetry run isort --settings-path pyproject.toml
|
47 |
+
language: system
|
48 |
+
types: [python]
|
49 |
+
- id: flake8
|
50 |
+
name: flake8
|
51 |
+
entry: poetry run flake8 --config .flake8
|
52 |
+
language: system
|
53 |
+
types: [python]
|
README.md
CHANGED
@@ -10,4 +10,4 @@ pinned: false
|
|
10 |
short_description: RAG chatbot trained on my master thesis.
|
11 |
---
|
12 |
|
13 |
-
|
|
|
10 |
short_description: RAG chatbot trained on my master thesis.
|
11 |
---
|
12 |
|
13 |
+
This is a private demo using my own OpenAI API key. Please use responsibly.
|
app.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
from mythesis_chatbot.rag_setup import (
|
4 |
+
SupportedRags,
|
5 |
+
automerging_retrieval_setup,
|
6 |
+
basic_rag_setup,
|
7 |
+
sentence_window_retrieval_setup,
|
8 |
+
)
|
9 |
+
|
10 |
+
input_file = "./data/Master_Thesis.pdf"
|
11 |
+
save_dir = "./data/indices/"
|
12 |
+
|
13 |
+
automerging_engine = automerging_retrieval_setup(
|
14 |
+
input_file=input_file,
|
15 |
+
save_dir=save_dir,
|
16 |
+
llm_openai_model="gpt-4o-mini",
|
17 |
+
embed_model="BAAI/bge-small-en-v1.5",
|
18 |
+
chunk_sizes=[2048, 512, 128],
|
19 |
+
similarity_top_k=6,
|
20 |
+
rerank_model="cross-encoder/ms-marco-MiniLM-L-2-v2",
|
21 |
+
rerank_top_n=2,
|
22 |
+
)
|
23 |
+
|
24 |
+
sentence_window_engine = sentence_window_retrieval_setup(
|
25 |
+
input_file=input_file,
|
26 |
+
save_dir=save_dir,
|
27 |
+
llm_openai_model="gpt-4o-mini",
|
28 |
+
embed_model="BAAI/bge-small-en-v1.5",
|
29 |
+
sentence_window_size=3,
|
30 |
+
similarity_top_k=6,
|
31 |
+
rerank_model="cross-encoder/ms-marco-MiniLM-L-2-v2",
|
32 |
+
rerank_top_n=2,
|
33 |
+
)
|
34 |
+
|
35 |
+
basic_engine = basic_rag_setup(
|
36 |
+
input_file=input_file,
|
37 |
+
save_dir=save_dir,
|
38 |
+
llm_openai_model="gpt-4o-mini",
|
39 |
+
embed_model="BAAI/bge-small-en-v1.5",
|
40 |
+
similarity_top_k=6,
|
41 |
+
rerank_model="cross-encoder/ms-marco-MiniLM-L-2-v2",
|
42 |
+
rerank_top_n=2,
|
43 |
+
)
|
44 |
+
|
45 |
+
|
46 |
+
def chat_bot(query: str, rag_mode: SupportedRags) -> str:
|
47 |
+
if rag_mode == "basic":
|
48 |
+
return basic_engine.query(query).response
|
49 |
+
if rag_mode == "auto-merging retrieval":
|
50 |
+
return automerging_engine.query(query).response
|
51 |
+
if rag_mode == "sentence window retrieval":
|
52 |
+
return sentence_window_engine.query(query).response
|
53 |
+
|
54 |
+
|
55 |
+
default_message = (
|
56 |
+
"Ask a about a topic that is discussed in my master thesis."
|
57 |
+
"E.g., what is epistemic uncertainty?"
|
58 |
+
)
|
59 |
+
|
60 |
+
gradio_app = gr.Interface(
|
61 |
+
fn=chat_bot,
|
62 |
+
inputs=[
|
63 |
+
gr.Textbox(placeholder=default_message),
|
64 |
+
gr.Dropdown(
|
65 |
+
choices=["basic", "sentence window retrieval", "auto-merging retrieval"],
|
66 |
+
label="RAG mode",
|
67 |
+
value="basic",
|
68 |
+
),
|
69 |
+
],
|
70 |
+
outputs=["text"],
|
71 |
+
)
|
72 |
+
|
73 |
+
if __name__ == "__main__":
|
74 |
+
gradio_app.launch()
|
data/indices/auto_merging/d5c92a9b2f/default__vector_store.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/indices/auto_merging/d5c92a9b2f/docstore.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/indices/auto_merging/d5c92a9b2f/graph_store.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"graph_dict": {}}
|
data/indices/auto_merging/d5c92a9b2f/image__vector_store.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}
|
data/indices/auto_merging/d5c92a9b2f/index_store.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"index_store/data": {"01d4fc3b-cde7-4254-a776-0340613d6542": {"__type__": "vector_store", "__data__": "{\"index_id\": \"01d4fc3b-cde7-4254-a776-0340613d6542\", \"summary\": null, \"nodes_dict\": {\"127eed05-f4c0-492c-aebb-fe8ea02ca1e5\": \"127eed05-f4c0-492c-aebb-fe8ea02ca1e5\", \"2935ee7a-e93e-49cf-95c5-78bc8dff3e9a\": \"2935ee7a-e93e-49cf-95c5-78bc8dff3e9a\", \"4a4dbbb4-76fe-4fdd-a535-6b639d06fed1\": \"4a4dbbb4-76fe-4fdd-a535-6b639d06fed1\", \"4149f79f-033e-4947-89f4-d0f132e26664\": \"4149f79f-033e-4947-89f4-d0f132e26664\", \"70d2a7d7-8312-47b7-8e75-cdac5ba2e41c\": \"70d2a7d7-8312-47b7-8e75-cdac5ba2e41c\", \"6ba0ab8c-7d35-464d-b1a7-3e51a3305397\": \"6ba0ab8c-7d35-464d-b1a7-3e51a3305397\", \"f48df62c-d67c-4992-a458-b8b82f24a21e\": \"f48df62c-d67c-4992-a458-b8b82f24a21e\", \"d9b7d187-5420-4a21-ba94-b677e41b31c5\": \"d9b7d187-5420-4a21-ba94-b677e41b31c5\", \"d18aaf46-78ba-4ac4-b2d8-04877df01f04\": \"d18aaf46-78ba-4ac4-b2d8-04877df01f04\", \"823db52f-3c7c-49f2-b06d-6c592db7ef42\": \"823db52f-3c7c-49f2-b06d-6c592db7ef42\", \"774f5af1-b593-47eb-9d6a-354c538e08f3\": \"774f5af1-b593-47eb-9d6a-354c538e08f3\", \"b0a8bf13-67ba-44ed-b2f8-ba822e7faadb\": \"b0a8bf13-67ba-44ed-b2f8-ba822e7faadb\", \"e2dfa4a4-b378-4b62-84f7-bef1ec7ceb0f\": \"e2dfa4a4-b378-4b62-84f7-bef1ec7ceb0f\", \"63f8f248-245f-4955-bedd-a2866caaed7a\": \"63f8f248-245f-4955-bedd-a2866caaed7a\", \"fe153069-6018-4026-9d8a-b4685d64ed38\": \"fe153069-6018-4026-9d8a-b4685d64ed38\", \"9c5c5819-4547-48ff-8f13-f22d11438a3a\": \"9c5c5819-4547-48ff-8f13-f22d11438a3a\", \"c6fb2fef-6474-47b8-9092-b026ab781d2b\": \"c6fb2fef-6474-47b8-9092-b026ab781d2b\", \"ecf9695e-b641-4b46-a467-0bd43708cbe0\": \"ecf9695e-b641-4b46-a467-0bd43708cbe0\", \"d724a087-a3d0-4009-bd38-b806365de8c7\": \"d724a087-a3d0-4009-bd38-b806365de8c7\", \"63e59f20-3737-45d8-9a92-d32c74bb7166\": \"63e59f20-3737-45d8-9a92-d32c74bb7166\", \"066b93a5-3342-4cc4-8571-98cadb2a3164\": \"066b93a5-3342-4cc4-8571-98cadb2a3164\", \"58ad19b7-324c-4be6-a9b8-3fb64690c7bb\": \"58ad19b7-324c-4be6-a9b8-3fb64690c7bb\", \"9922a1b5-0c0b-4c9f-bfe5-b353886f674a\": \"9922a1b5-0c0b-4c9f-bfe5-b353886f674a\", \"1a2a3619-bfd3-4b5d-8688-08beca93b771\": \"1a2a3619-bfd3-4b5d-8688-08beca93b771\", \"fd961f40-ff40-4c4d-9fc2-ae381ad4b060\": \"fd961f40-ff40-4c4d-9fc2-ae381ad4b060\", \"e99348f4-24f8-4709-b5b5-311c923a356a\": \"e99348f4-24f8-4709-b5b5-311c923a356a\", \"fa140f79-d1f4-444a-b07c-c3c2b00bcb6a\": \"fa140f79-d1f4-444a-b07c-c3c2b00bcb6a\", \"9071c131-fcde-4881-9954-dfdd8b1909a3\": \"9071c131-fcde-4881-9954-dfdd8b1909a3\", \"4dec264a-3556-4369-a51f-fa460f2e3e67\": \"4dec264a-3556-4369-a51f-fa460f2e3e67\", \"88b2e2ea-51e1-407b-a3db-6120ddb7a953\": \"88b2e2ea-51e1-407b-a3db-6120ddb7a953\", \"612e4522-6b23-4b77-a1a6-cc15d93cbfad\": \"612e4522-6b23-4b77-a1a6-cc15d93cbfad\", \"b1a24e8a-e7a6-490c-92fb-bf690a24a9cc\": \"b1a24e8a-e7a6-490c-92fb-bf690a24a9cc\", \"b4e16def-e018-4af7-9216-35a9f688b90e\": \"b4e16def-e018-4af7-9216-35a9f688b90e\", \"ba7f5982-2eee-425b-813b-c17de4d0bf65\": \"ba7f5982-2eee-425b-813b-c17de4d0bf65\", \"8159166f-ff8a-4af8-9c75-6af7349664af\": \"8159166f-ff8a-4af8-9c75-6af7349664af\", \"507c5eea-78f1-422c-a6d3-a187e4291c1f\": \"507c5eea-78f1-422c-a6d3-a187e4291c1f\", \"f78e2e16-decf-4534-bbd2-c0aab0f37021\": \"f78e2e16-decf-4534-bbd2-c0aab0f37021\", \"9fdd0b73-5596-42cb-9361-ac375d6f0415\": \"9fdd0b73-5596-42cb-9361-ac375d6f0415\", \"b62c885a-a745-427f-b3a1-5fb1988990bc\": \"b62c885a-a745-427f-b3a1-5fb1988990bc\", \"c313afad-778d-4960-b5ae-858e82effd8d\": \"c313afad-778d-4960-b5ae-858e82effd8d\", \"a6693f1e-bd6a-46b9-adc4-056ea375dab0\": \"a6693f1e-bd6a-46b9-adc4-056ea375dab0\", \"6b4320f6-7f58-47bd-82b4-42567bcb1bc8\": \"6b4320f6-7f58-47bd-82b4-42567bcb1bc8\", \"0a2e62df-7f45-47ee-9a98-86d449d32648\": \"0a2e62df-7f45-47ee-9a98-86d449d32648\", \"676f69bd-4650-4c90-a9fd-253354c2fd06\": \"676f69bd-4650-4c90-a9fd-253354c2fd06\", \"ea0f952a-51a3-4033-8131-4a6f189c4cba\": \"ea0f952a-51a3-4033-8131-4a6f189c4cba\", \"c9728fd6-54ea-4466-b7e4-71bf94f2db36\": \"c9728fd6-54ea-4466-b7e4-71bf94f2db36\", \"33a22272-74c1-4053-899b-f52ee8721489\": \"33a22272-74c1-4053-899b-f52ee8721489\", \"4e54f3b3-3cec-4266-9bfd-b7ba95ce2939\": \"4e54f3b3-3cec-4266-9bfd-b7ba95ce2939\", \"b11f795c-9011-4923-9346-e9949df69467\": \"b11f795c-9011-4923-9346-e9949df69467\", \"55eec99b-78ea-4843-af4a-46d61411510b\": \"55eec99b-78ea-4843-af4a-46d61411510b\", \"1b591c75-28e9-440c-8e65-5eaf1b979fc5\": \"1b591c75-28e9-440c-8e65-5eaf1b979fc5\", \"fd91c70a-adcb-4a30-9a73-73a17707624d\": \"fd91c70a-adcb-4a30-9a73-73a17707624d\", \"cc12d5e5-c058-4c6e-97f4-232c2f91af7a\": \"cc12d5e5-c058-4c6e-97f4-232c2f91af7a\", \"5713b650-3c69-4369-b4a1-25e79a3f4b4b\": \"5713b650-3c69-4369-b4a1-25e79a3f4b4b\", \"6216ecc3-4580-4861-905a-51f0c233a993\": \"6216ecc3-4580-4861-905a-51f0c233a993\", \"202ed815-c9f3-4180-976f-2b3d0fc99b3a\": \"202ed815-c9f3-4180-976f-2b3d0fc99b3a\", \"fc45659e-9e72-42e5-b1f5-e964f6474119\": \"fc45659e-9e72-42e5-b1f5-e964f6474119\", \"57d310ce-6e84-4805-b3ee-4137f01377e4\": \"57d310ce-6e84-4805-b3ee-4137f01377e4\", \"da804d94-7c21-4a4b-91f5-00ba625e411f\": \"da804d94-7c21-4a4b-91f5-00ba625e411f\", \"ae3e487e-3473-4c6c-a5bc-d6cec3cb8565\": \"ae3e487e-3473-4c6c-a5bc-d6cec3cb8565\", \"086f66d3-b416-41bd-af49-179420725658\": \"086f66d3-b416-41bd-af49-179420725658\", \"45da310f-48f1-4072-930a-ed4abccd4067\": \"45da310f-48f1-4072-930a-ed4abccd4067\", \"ebae6ed2-50e2-4e10-8c43-8d892eece566\": \"ebae6ed2-50e2-4e10-8c43-8d892eece566\", \"23c1c50a-828e-403c-ae44-4fcba39d6197\": \"23c1c50a-828e-403c-ae44-4fcba39d6197\", \"8d13191d-7fa0-401f-909f-e761626681de\": \"8d13191d-7fa0-401f-909f-e761626681de\", \"5f40ffb2-adf0-45f0-8837-0edd1db895a5\": \"5f40ffb2-adf0-45f0-8837-0edd1db895a5\", \"1d25b9e7-defd-4782-b095-1562832e7d21\": \"1d25b9e7-defd-4782-b095-1562832e7d21\", \"9013bbb3-cb21-4429-99cc-e8c7426f10fb\": \"9013bbb3-cb21-4429-99cc-e8c7426f10fb\", \"978359a5-1e65-4640-8a9f-343a2a59698d\": \"978359a5-1e65-4640-8a9f-343a2a59698d\", \"30c71308-f900-4bae-8ec6-d86d4dd13419\": \"30c71308-f900-4bae-8ec6-d86d4dd13419\", \"9819cc86-9766-441a-aa8c-40e186e24fea\": \"9819cc86-9766-441a-aa8c-40e186e24fea\", \"5c6de902-b23a-4c7e-855f-cd5eb652add2\": \"5c6de902-b23a-4c7e-855f-cd5eb652add2\", \"bb921f3b-6a8e-48ca-b4b5-e599a6d6c406\": \"bb921f3b-6a8e-48ca-b4b5-e599a6d6c406\", \"d4c91e89-461c-4294-ace0-23d5ce6747c0\": \"d4c91e89-461c-4294-ace0-23d5ce6747c0\", \"8f9fd36f-b761-4f40-a66c-6c4f75d4207b\": \"8f9fd36f-b761-4f40-a66c-6c4f75d4207b\", \"15c3b984-ff09-4f68-9d55-fe705c4f4ae4\": \"15c3b984-ff09-4f68-9d55-fe705c4f4ae4\", \"0526c5ab-8527-42b1-b1d8-1cfe710874f4\": \"0526c5ab-8527-42b1-b1d8-1cfe710874f4\", \"d4aa2f5a-33a8-453a-a6ad-c4bacf576fab\": \"d4aa2f5a-33a8-453a-a6ad-c4bacf576fab\", \"0ccd7498-9105-4204-bafd-6edc9cf75849\": \"0ccd7498-9105-4204-bafd-6edc9cf75849\", \"bd6b344e-d289-4006-a983-d9ffff88650f\": \"bd6b344e-d289-4006-a983-d9ffff88650f\", \"1fb579b0-32b1-4482-93af-f8335ef3d5e9\": \"1fb579b0-32b1-4482-93af-f8335ef3d5e9\", \"ab58e4bc-757c-4b83-8279-13bf36e223f4\": \"ab58e4bc-757c-4b83-8279-13bf36e223f4\", \"d6db2978-cf10-4fd6-949e-fc950bbf2598\": \"d6db2978-cf10-4fd6-949e-fc950bbf2598\", \"c2191d8c-3f02-4542-a078-164beec931a8\": \"c2191d8c-3f02-4542-a078-164beec931a8\", \"ca4bf718-63a5-4fb5-b024-7c448c880f95\": \"ca4bf718-63a5-4fb5-b024-7c448c880f95\", \"faf19732-15b6-4344-8678-a4b2c6a85dd1\": \"faf19732-15b6-4344-8678-a4b2c6a85dd1\", \"dddf8bf2-5179-4791-a310-7710be80004b\": \"dddf8bf2-5179-4791-a310-7710be80004b\", \"113bc7b3-5f99-45ca-b093-0b867520f051\": \"113bc7b3-5f99-45ca-b093-0b867520f051\", \"c6d6677b-f019-4c31-a8d0-503541971456\": \"c6d6677b-f019-4c31-a8d0-503541971456\", \"6b031f5f-0e40-4434-8ce5-d1e0ba08f7a0\": \"6b031f5f-0e40-4434-8ce5-d1e0ba08f7a0\", \"66cd63ad-1f7e-415f-99fb-100448af6aa2\": \"66cd63ad-1f7e-415f-99fb-100448af6aa2\", \"78390fc6-05b5-471f-9ed8-a2f24c6d53a8\": \"78390fc6-05b5-471f-9ed8-a2f24c6d53a8\", \"10d7b5e0-3e25-416f-bc00-2a01933c6b9b\": \"10d7b5e0-3e25-416f-bc00-2a01933c6b9b\", \"521b4f69-fc53-4ee1-aa3b-9a64d4689aab\": \"521b4f69-fc53-4ee1-aa3b-9a64d4689aab\", \"a63ebc03-cc47-4b22-bcbe-b26c328d8ad4\": \"a63ebc03-cc47-4b22-bcbe-b26c328d8ad4\", \"05cd4e21-ea82-46c9-9edb-0ce5101680e8\": \"05cd4e21-ea82-46c9-9edb-0ce5101680e8\", \"9ab01bf2-af0f-4b17-9232-46eca13fd2cc\": \"9ab01bf2-af0f-4b17-9232-46eca13fd2cc\", \"b8885c9f-be74-4eac-b33e-6388fca0496f\": \"b8885c9f-be74-4eac-b33e-6388fca0496f\", \"f6111520-529e-4fde-8438-3bdb22626f29\": \"f6111520-529e-4fde-8438-3bdb22626f29\", \"305361e0-447c-4b9a-bd75-cb60ce8e508a\": \"305361e0-447c-4b9a-bd75-cb60ce8e508a\", \"6d55016e-f6c2-4b28-9228-eae4e5982a3d\": \"6d55016e-f6c2-4b28-9228-eae4e5982a3d\", \"da53c1af-ff4e-4e59-a3b4-8ba2e6b62195\": \"da53c1af-ff4e-4e59-a3b4-8ba2e6b62195\", \"7cc0cbb7-3405-4440-bd3a-fe94b1d814f1\": \"7cc0cbb7-3405-4440-bd3a-fe94b1d814f1\", \"aa03814b-1151-47bf-b2c3-d5f728e0ed36\": \"aa03814b-1151-47bf-b2c3-d5f728e0ed36\", \"8fe63cef-848b-4b24-a35e-d09771946411\": \"8fe63cef-848b-4b24-a35e-d09771946411\", \"cfc29151-e033-4efa-8330-099a55348c4e\": \"cfc29151-e033-4efa-8330-099a55348c4e\", \"967d28de-3d2f-4905-8de3-660bf3740fca\": \"967d28de-3d2f-4905-8de3-660bf3740fca\", \"49bb4996-036c-48d4-9e30-5295bc7ac314\": \"49bb4996-036c-48d4-9e30-5295bc7ac314\", \"8fe6d34d-9c21-4656-86d3-732a702b4ec2\": \"8fe6d34d-9c21-4656-86d3-732a702b4ec2\", \"e09fef37-7df1-48a1-bfae-7d3cf85d77e9\": \"e09fef37-7df1-48a1-bfae-7d3cf85d77e9\", \"1efa753a-6d54-4d40-8bdf-0f52e6fea8b5\": \"1efa753a-6d54-4d40-8bdf-0f52e6fea8b5\", \"9d9f1478-1daf-40ec-8cd4-87f4fa12250a\": \"9d9f1478-1daf-40ec-8cd4-87f4fa12250a\", \"d77ea73b-146d-4d3a-b6b5-3f7d5bdca293\": \"d77ea73b-146d-4d3a-b6b5-3f7d5bdca293\", \"dfb85eda-0acd-4182-a136-c8692ca0a786\": \"dfb85eda-0acd-4182-a136-c8692ca0a786\", \"afd5e88c-7489-4f00-a9ad-44ecd97dbfb2\": \"afd5e88c-7489-4f00-a9ad-44ecd97dbfb2\", \"cb588028-5fd3-4f58-8384-ff9ebde0ae11\": \"cb588028-5fd3-4f58-8384-ff9ebde0ae11\", \"195ea30a-2725-4638-8edb-425d24fc565d\": \"195ea30a-2725-4638-8edb-425d24fc565d\", \"63e42cac-6151-43e7-85db-f41c374bb94b\": \"63e42cac-6151-43e7-85db-f41c374bb94b\", \"aac01576-0a03-407b-91cd-e222aecfd721\": \"aac01576-0a03-407b-91cd-e222aecfd721\", \"ab56bea2-c39a-444a-9434-dff308adbbea\": \"ab56bea2-c39a-444a-9434-dff308adbbea\", \"637776b7-4a00-4514-ae55-e2dfe5c640f6\": \"637776b7-4a00-4514-ae55-e2dfe5c640f6\", \"bbdf5750-fabd-4e11-b784-19a0b62b520b\": \"bbdf5750-fabd-4e11-b784-19a0b62b520b\", \"b10f58ab-250a-4f49-8497-16be51d652ee\": \"b10f58ab-250a-4f49-8497-16be51d652ee\", \"a0e4e6e7-b68e-4e73-b5e0-8d5d38b1bf70\": \"a0e4e6e7-b68e-4e73-b5e0-8d5d38b1bf70\", \"50d36569-0c7c-4cae-a1c0-6bb1c8990fc0\": \"50d36569-0c7c-4cae-a1c0-6bb1c8990fc0\", \"d8b051cc-f398-40c7-9326-433d79f1051d\": \"d8b051cc-f398-40c7-9326-433d79f1051d\", \"6859747b-0a8a-48c4-980f-6600405219ab\": \"6859747b-0a8a-48c4-980f-6600405219ab\", \"b682c9ff-86e2-42e3-a4e9-c188dd7cfb8f\": \"b682c9ff-86e2-42e3-a4e9-c188dd7cfb8f\", \"c4c3dd73-196b-4290-82dd-4046b9bb0ad8\": \"c4c3dd73-196b-4290-82dd-4046b9bb0ad8\", \"80817e53-55d3-43d3-83d1-a719a414926a\": \"80817e53-55d3-43d3-83d1-a719a414926a\", \"8e6e18c6-4421-4f3d-9290-ba27dfcaf95d\": \"8e6e18c6-4421-4f3d-9290-ba27dfcaf95d\", \"d71c30f3-4e73-4117-8927-b336191f9d1c\": \"d71c30f3-4e73-4117-8927-b336191f9d1c\", \"694102af-3409-4464-bb8d-19b1f381f253\": \"694102af-3409-4464-bb8d-19b1f381f253\", \"7a70e0d5-0a6d-4ae6-a4c2-e0290bc97d7d\": \"7a70e0d5-0a6d-4ae6-a4c2-e0290bc97d7d\", \"7a93bcb4-c2e9-4717-9c8b-2f99c1b5c97f\": \"7a93bcb4-c2e9-4717-9c8b-2f99c1b5c97f\", \"0faa8361-dc17-4fba-a3c1-eae0d76cc290\": \"0faa8361-dc17-4fba-a3c1-eae0d76cc290\", \"97b47867-e66b-4e98-bc15-fbde3135ba50\": \"97b47867-e66b-4e98-bc15-fbde3135ba50\", \"df6864be-1e32-4467-aeb8-81f7f38176e1\": \"df6864be-1e32-4467-aeb8-81f7f38176e1\", \"332fe951-d55a-44b7-b5a2-bfbeb1c57151\": \"332fe951-d55a-44b7-b5a2-bfbeb1c57151\", \"c0212a98-a9a5-4e81-abc4-7bcef93be942\": \"c0212a98-a9a5-4e81-abc4-7bcef93be942\", \"b42544bb-3912-482e-8047-ad5386607134\": \"b42544bb-3912-482e-8047-ad5386607134\", \"6feacb6d-c009-40f0-9392-dc2d6bd0cb7b\": \"6feacb6d-c009-40f0-9392-dc2d6bd0cb7b\", \"981cfbe8-bed1-4d15-97e9-dddfc96c1123\": \"981cfbe8-bed1-4d15-97e9-dddfc96c1123\", \"51b4e8a9-f9dd-4153-9ca8-36e2bb679353\": \"51b4e8a9-f9dd-4153-9ca8-36e2bb679353\", \"5b10e014-d531-451f-8425-864e25fa5558\": \"5b10e014-d531-451f-8425-864e25fa5558\", \"e44bddbb-d644-4ac6-8a1c-726ebf9627f4\": \"e44bddbb-d644-4ac6-8a1c-726ebf9627f4\", \"03f8aa5e-45cf-4f50-bd7e-d4f06ebefc23\": \"03f8aa5e-45cf-4f50-bd7e-d4f06ebefc23\", \"b7723603-864a-4164-9084-336d25f2890d\": \"b7723603-864a-4164-9084-336d25f2890d\", \"b2342ae1-ec3e-4d96-aaa4-1dfc15e8279b\": \"b2342ae1-ec3e-4d96-aaa4-1dfc15e8279b\", \"1c7a3a4e-db10-480e-9851-4b91640abc16\": \"1c7a3a4e-db10-480e-9851-4b91640abc16\", \"ef773976-8799-4129-979e-4d74984bd69d\": \"ef773976-8799-4129-979e-4d74984bd69d\", \"f85638bc-678e-4101-b240-962c6c1c8137\": \"f85638bc-678e-4101-b240-962c6c1c8137\", \"aba94add-6b16-4f14-9b0a-e5ec1ea041a0\": \"aba94add-6b16-4f14-9b0a-e5ec1ea041a0\", \"07e6b100-c219-485e-b418-d1d938ca0102\": \"07e6b100-c219-485e-b418-d1d938ca0102\", \"fced2909-7e86-41c3-924f-d06b644b76a7\": \"fced2909-7e86-41c3-924f-d06b644b76a7\", \"284789e6-68c0-4bf2-982c-288eaa266705\": \"284789e6-68c0-4bf2-982c-288eaa266705\", \"64b45aed-c073-4f5a-b33f-939104b0f21f\": \"64b45aed-c073-4f5a-b33f-939104b0f21f\", \"244967f0-6448-4a4f-9e2b-f96e26f682dd\": \"244967f0-6448-4a4f-9e2b-f96e26f682dd\", \"62e784ee-a4cf-474d-aabf-0044130aa013\": \"62e784ee-a4cf-474d-aabf-0044130aa013\", \"3f460913-76d7-44b2-820e-d4b1160f4b20\": \"3f460913-76d7-44b2-820e-d4b1160f4b20\", \"cb16543a-5ec6-4947-bb55-b9784e6e7c7c\": \"cb16543a-5ec6-4947-bb55-b9784e6e7c7c\", \"5235a666-1e07-4abc-8742-8e1bcf0e790d\": \"5235a666-1e07-4abc-8742-8e1bcf0e790d\", \"496a456f-2ec3-48cf-940f-602619bd2c6c\": \"496a456f-2ec3-48cf-940f-602619bd2c6c\", \"cb50208d-24e7-4954-972f-900604a9ab61\": \"cb50208d-24e7-4954-972f-900604a9ab61\", \"aaec41e4-2cd2-44d8-8c78-5ca1268a0780\": \"aaec41e4-2cd2-44d8-8c78-5ca1268a0780\", \"bae30186-8436-4dd3-ab19-b6c86c3d67c1\": \"bae30186-8436-4dd3-ab19-b6c86c3d67c1\", \"1fc0e546-5e5b-43fd-8efe-1aa0db4fad69\": \"1fc0e546-5e5b-43fd-8efe-1aa0db4fad69\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}
|
data/indices/auto_merging/d5c92a9b2f/meta.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"doc_source": "Master_Thesis.pdf",
|
3 |
+
"embed_model": "BAAI/bge-small-en-v1.5",
|
4 |
+
"chunk_sizes": [
|
5 |
+
2048,
|
6 |
+
512,
|
7 |
+
128
|
8 |
+
]
|
9 |
+
}
|
data/indices/basic/default__vector_store.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/indices/basic/docstore.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/indices/basic/graph_store.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"graph_dict": {}}
|
data/indices/basic/image__vector_store.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}
|
data/indices/basic/index_store.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"index_store/data": {"0765479e-c27c-4c88-b41f-dc98ed86f3a5": {"__type__": "vector_store", "__data__": "{\"index_id\": \"0765479e-c27c-4c88-b41f-dc98ed86f3a5\", \"summary\": null, \"nodes_dict\": {\"a31b64b9-d4c7-4ec6-8ca0-10d085d48205\": \"a31b64b9-d4c7-4ec6-8ca0-10d085d48205\", \"b17212a2-233f-492c-b53b-6a03c84d9f4f\": \"b17212a2-233f-492c-b53b-6a03c84d9f4f\", \"21f04a43-0875-4b86-94e4-93b11843969f\": \"21f04a43-0875-4b86-94e4-93b11843969f\", \"35cb1ff3-4a4f-4616-a7c1-af26f0b9d1e9\": \"35cb1ff3-4a4f-4616-a7c1-af26f0b9d1e9\", \"db73e86c-4c24-4992-b2c4-96c3cb23e27e\": \"db73e86c-4c24-4992-b2c4-96c3cb23e27e\", \"66023780-9b49-46a9-9204-8a4786f537e9\": \"66023780-9b49-46a9-9204-8a4786f537e9\", \"3ed814ba-a0a4-4328-802d-91780bc5964e\": \"3ed814ba-a0a4-4328-802d-91780bc5964e\", \"2ce891bb-b95c-457c-859e-7a8980eb98c2\": \"2ce891bb-b95c-457c-859e-7a8980eb98c2\", \"04a0e9e2-4399-4f8e-9bb7-4a92ca7f3dfa\": \"04a0e9e2-4399-4f8e-9bb7-4a92ca7f3dfa\", \"39a9562c-9040-4276-b6a3-0686a44bcf50\": \"39a9562c-9040-4276-b6a3-0686a44bcf50\", \"fe41da69-d1ee-4ce6-8888-34e9c9a759b5\": \"fe41da69-d1ee-4ce6-8888-34e9c9a759b5\", \"5e20f7b6-ac06-4a68-913b-419a2b585f5d\": \"5e20f7b6-ac06-4a68-913b-419a2b585f5d\", \"c0f8c088-1ff8-4839-a325-2522f014510a\": \"c0f8c088-1ff8-4839-a325-2522f014510a\", \"e29d9346-c846-4203-8c35-b8bb1e8fa481\": \"e29d9346-c846-4203-8c35-b8bb1e8fa481\", \"004767a1-85b2-466e-b117-b264a4667205\": \"004767a1-85b2-466e-b117-b264a4667205\", \"bd3c9a8c-f95b-478e-a4a2-01ed7f774d64\": \"bd3c9a8c-f95b-478e-a4a2-01ed7f774d64\", \"5084cc68-c3d7-4136-83c1-b8c48955719a\": \"5084cc68-c3d7-4136-83c1-b8c48955719a\", \"a1381f9d-d625-43ab-8869-28f2ec055ddd\": \"a1381f9d-d625-43ab-8869-28f2ec055ddd\", \"7e4d7d91-e79e-49e5-8c71-8a48fc38cb46\": \"7e4d7d91-e79e-49e5-8c71-8a48fc38cb46\", \"379dd8d5-bf98-4615-93a9-cba410df45ec\": \"379dd8d5-bf98-4615-93a9-cba410df45ec\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}
|
data/indices/sentence_window/61a981e27b/default__vector_store.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/indices/sentence_window/61a981e27b/docstore.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/indices/sentence_window/61a981e27b/graph_store.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"graph_dict": {}}
|
data/indices/sentence_window/61a981e27b/image__vector_store.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}
|
data/indices/sentence_window/61a981e27b/index_store.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"index_store/data": {"dcb26062-97ca-4980-98ba-3ac367ae9e38": {"__type__": "vector_store", "__data__": "{\"index_id\": \"dcb26062-97ca-4980-98ba-3ac367ae9e38\", \"summary\": null, \"nodes_dict\": {\"9ab5067f-1cba-4f14-b483-f13e4943eb6f\": \"9ab5067f-1cba-4f14-b483-f13e4943eb6f\", \"4b7406f1-4038-428d-a41b-ed15736a3ad2\": \"4b7406f1-4038-428d-a41b-ed15736a3ad2\", \"19f2c43a-c9ef-4afc-9607-38a5a898b98b\": \"19f2c43a-c9ef-4afc-9607-38a5a898b98b\", \"0fd054f0-bd92-4e92-8037-4fcc7393ef0c\": \"0fd054f0-bd92-4e92-8037-4fcc7393ef0c\", \"d898ce41-24a0-4315-9a8e-056fc948525e\": \"d898ce41-24a0-4315-9a8e-056fc948525e\", \"cd43f204-6cd3-414d-82b8-bcbde2cceace\": \"cd43f204-6cd3-414d-82b8-bcbde2cceace\", \"dcc52b5f-0e96-4574-a39a-ab928669c990\": \"dcc52b5f-0e96-4574-a39a-ab928669c990\", \"5df6f5f8-cc1d-408c-b9a1-85e01ec94403\": \"5df6f5f8-cc1d-408c-b9a1-85e01ec94403\", \"2521c213-0049-4cc6-875f-a868a9ca718e\": \"2521c213-0049-4cc6-875f-a868a9ca718e\", \"12ee176d-2954-4255-ab0a-65a6b867ea3a\": \"12ee176d-2954-4255-ab0a-65a6b867ea3a\", \"3e0d57bd-2b67-4083-b6b4-566dd1f91eab\": \"3e0d57bd-2b67-4083-b6b4-566dd1f91eab\", \"aa04a66d-6118-40d8-8290-3cf7d233ebad\": \"aa04a66d-6118-40d8-8290-3cf7d233ebad\", \"0a5e52be-600c-465e-aad7-10b04c9a7c8a\": \"0a5e52be-600c-465e-aad7-10b04c9a7c8a\", \"489b851b-8e96-4f53-a3bb-32fadb4ccbf0\": \"489b851b-8e96-4f53-a3bb-32fadb4ccbf0\", \"75c6b57b-009a-4b1d-9099-f188ecd51a0a\": \"75c6b57b-009a-4b1d-9099-f188ecd51a0a\", \"6ac79648-9763-4226-ad60-ac9f6ce20778\": \"6ac79648-9763-4226-ad60-ac9f6ce20778\", \"45842347-a5ac-43ac-b405-f77ace8a56a7\": \"45842347-a5ac-43ac-b405-f77ace8a56a7\", \"26cefadf-7040-43ea-9003-a1a4ad189372\": \"26cefadf-7040-43ea-9003-a1a4ad189372\", \"b6cbbc8a-2737-4272-a9d4-2f608931f1df\": \"b6cbbc8a-2737-4272-a9d4-2f608931f1df\", \"b5e2bdd2-6e23-4ccc-9bbd-4f08b831e046\": \"b5e2bdd2-6e23-4ccc-9bbd-4f08b831e046\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}
|
data/indices/sentence_window/61a981e27b/meta.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"doc_source": "Master_Thesis.pdf",
|
3 |
+
"embed_model": "BAAI/bge-small-en-v1.5",
|
4 |
+
"sentence_window_size": 3
|
5 |
+
}
|
pyproject.toml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "mythesis-chatbot"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "RAG chatbot trained on my master thesis."
|
5 |
+
authors = [
|
6 |
+
{name = "Léonard Pasi",email = "leonardpasi@gmail.com"}
|
7 |
+
]
|
8 |
+
readme = "README.md"
|
9 |
+
requires-python = "^3.10"
|
10 |
+
dependencies = [
|
11 |
+
"trulens (>=1.4.7,<2.0.0)",
|
12 |
+
"llama-index (>=0.12.28,<0.13.0)",
|
13 |
+
"pandas (>=2.2.3,<3.0.0)",
|
14 |
+
"llama-index-embeddings-huggingface (>=0.5.2,<0.6.0)",
|
15 |
+
"trulens-providers-openai (>=1.4.8,<2.0.0)",
|
16 |
+
"trulens-apps-llamaindex (>=1.4.8,<2.0.0)",
|
17 |
+
"gradio (==5.24.0)",
|
18 |
+
]
|
19 |
+
|
20 |
+
[tool.poetry]
|
21 |
+
packages = [{include = "mythesis_chatbot", from = "src"}]
|
22 |
+
|
23 |
+
[tool.poetry.group.dev.dependencies]
|
24 |
+
pre-commit = "^4.2.0"
|
25 |
+
flake8 = "^7.2.0"
|
26 |
+
black = "^25.1.0"
|
27 |
+
isort = "^6.0.1"
|
28 |
+
|
29 |
+
[tool.black]
|
30 |
+
line-length = 88
|
31 |
+
exclude = '''
|
32 |
+
/(
|
33 |
+
\.git
|
34 |
+
| \.mypy_cache
|
35 |
+
| \.tox
|
36 |
+
| \.venv
|
37 |
+
| _build
|
38 |
+
| buck-out
|
39 |
+
| build
|
40 |
+
| dist
|
41 |
+
)/
|
42 |
+
'''
|
43 |
+
|
44 |
+
[tool.isort]
|
45 |
+
profile="black"
|
46 |
+
known_first_party=["mythesis_chatbot"]
|
47 |
+
known_third_party=["llamaindex", "trulens"]
|
48 |
+
|
49 |
+
[build-system]
|
50 |
+
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
51 |
+
build-backend = "poetry.core.masonry.api"
|
requirements.txt
ADDED
@@ -0,0 +1,266 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiohappyeyeballs==2.6.1 ; python_version >= "3.10" and python_version < "4.0"
|
2 |
+
aiohttp==3.11.16 ; python_version >= "3.10" and python_version < "4.0"
|
3 |
+
aiosignal==1.3.2 ; python_version >= "3.10" and python_version < "4.0"
|
4 |
+
alembic==1.15.2 ; python_version >= "3.10" and python_version < "4.0"
|
5 |
+
altair==5.5.0 ; python_version >= "3.10" and python_version < "4.0"
|
6 |
+
annotated-types==0.7.0 ; python_version >= "3.10" and python_version < "4.0"
|
7 |
+
anyio==4.9.0 ; python_version >= "3.10" and python_version < "4.0"
|
8 |
+
appnope==0.1.4 ; python_version >= "3.10" and python_version < "4.0" and platform_system == "Darwin"
|
9 |
+
argon2-cffi-bindings==21.2.0 ; python_version >= "3.10" and python_version < "4.0"
|
10 |
+
argon2-cffi==23.1.0 ; python_version >= "3.10" and python_version < "4.0"
|
11 |
+
arrow==1.3.0 ; python_version >= "3.10" and python_version < "4.0"
|
12 |
+
asttokens==3.0.0 ; python_version >= "3.10" and python_version < "4.0"
|
13 |
+
async-lru==2.0.5 ; python_version >= "3.10" and python_version < "4.0"
|
14 |
+
async-timeout==4.0.3 ; python_version == "3.10"
|
15 |
+
attrs==25.3.0 ; python_version >= "3.10" and python_version < "4.0"
|
16 |
+
babel==2.17.0 ; python_version >= "3.10" and python_version < "4.0"
|
17 |
+
banks==2.1.1 ; python_version >= "3.10" and python_version < "4.0"
|
18 |
+
beautifulsoup4==4.13.3 ; python_version >= "3.10" and python_version < "4.0"
|
19 |
+
bleach==6.2.0 ; python_version >= "3.10" and python_version < "4.0"
|
20 |
+
blinker==1.9.0 ; python_version >= "3.10" and python_version < "4.0"
|
21 |
+
build==1.2.2.post1 ; python_version >= "3.10" and python_version < "4.0"
|
22 |
+
cachecontrol==0.14.2 ; python_version >= "3.10" and python_version < "4.0"
|
23 |
+
cachetools==5.5.2 ; python_version >= "3.10" and python_version < "4.0"
|
24 |
+
certifi==2025.1.31 ; python_version >= "3.10" and python_version < "4.0"
|
25 |
+
cffi==1.17.1 ; python_version >= "3.10" and python_version < "4.0"
|
26 |
+
charset-normalizer==3.4.1 ; python_version >= "3.10" and python_version < "4.0"
|
27 |
+
cleo==2.1.0 ; python_version >= "3.10" and python_version < "4.0"
|
28 |
+
click==8.1.8 ; python_version >= "3.10" and python_version < "4.0"
|
29 |
+
colorama==0.4.6 ; python_version >= "3.10" and python_version < "4.0"
|
30 |
+
comm==0.2.2 ; python_version >= "3.10" and python_version < "4.0"
|
31 |
+
crashtest==0.4.1 ; python_version >= "3.10" and python_version < "4.0"
|
32 |
+
cryptography==44.0.2 ; python_version >= "3.10" and python_version < "4.0" and sys_platform == "linux"
|
33 |
+
dataclasses-json==0.6.7 ; python_version >= "3.10" and python_version < "4.0"
|
34 |
+
debugpy==1.8.13 ; python_version >= "3.10" and python_version < "4.0"
|
35 |
+
decorator==5.2.1 ; python_version >= "3.10" and python_version < "4.0"
|
36 |
+
defusedxml==0.7.1 ; python_version >= "3.10" and python_version < "4.0"
|
37 |
+
deprecated==1.2.18 ; python_version >= "3.10" and python_version < "4.0"
|
38 |
+
dill==0.3.9 ; python_version >= "3.10" and python_version < "4.0"
|
39 |
+
dirtyjson==1.0.8 ; python_version >= "3.10" and python_version < "4.0"
|
40 |
+
distlib==0.3.9 ; python_version >= "3.10" and python_version < "4.0"
|
41 |
+
distro==1.9.0 ; python_version >= "3.10" and python_version < "4.0"
|
42 |
+
dulwich==0.21.7 ; python_version >= "3.10" and python_version < "4.0"
|
43 |
+
exceptiongroup==1.2.2 ; python_version == "3.10"
|
44 |
+
executing==2.2.0 ; python_version >= "3.10" and python_version < "4.0"
|
45 |
+
fastjsonschema==2.21.1 ; python_version >= "3.10" and python_version < "4.0"
|
46 |
+
filelock==3.18.0 ; python_version >= "3.10" and python_version < "4.0"
|
47 |
+
filetype==1.2.0 ; python_version >= "3.10" and python_version < "4.0"
|
48 |
+
fqdn==1.5.1 ; python_version >= "3.10" and python_version < "4.0"
|
49 |
+
frozenlist==1.5.0 ; python_version >= "3.10" and python_version < "4.0"
|
50 |
+
fsspec==2025.3.2 ; python_version >= "3.10" and python_version < "4.0"
|
51 |
+
gitdb==4.0.12 ; python_version >= "3.10" and python_version < "4.0"
|
52 |
+
gitpython==3.1.44 ; python_version >= "3.10" and python_version < "4.0"
|
53 |
+
greenlet==3.1.1 ; python_version >= "3.10" and python_version <= "3.13" and (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32")
|
54 |
+
griffe==1.7.2 ; python_version >= "3.10" and python_version < "4.0"
|
55 |
+
h11==0.14.0 ; python_version >= "3.10" and python_version < "4.0"
|
56 |
+
httpcore==1.0.7 ; python_version >= "3.10" and python_version < "4.0"
|
57 |
+
httpx-sse==0.4.0 ; python_version >= "3.10" and python_version < "4.0"
|
58 |
+
httpx==0.28.1 ; python_version >= "3.10" and python_version < "4.0"
|
59 |
+
huggingface-hub==0.30.1 ; python_version >= "3.10" and python_version < "4.0"
|
60 |
+
idna==3.10 ; python_version >= "3.10" and python_version < "4.0"
|
61 |
+
importlib-metadata==8.6.1 ; python_version >= "3.10" and python_version < "4.0"
|
62 |
+
importlib-resources==6.5.2 ; python_version >= "3.10" and python_version < "4.0"
|
63 |
+
installer==0.7.0 ; python_version >= "3.10" and python_version < "4.0"
|
64 |
+
ipykernel==6.29.5 ; python_version >= "3.10" and python_version < "4.0"
|
65 |
+
ipython-pygments-lexers==1.1.1 ; python_version >= "3.11" and python_version < "4.0"
|
66 |
+
ipython==8.34.0 ; python_version == "3.10"
|
67 |
+
ipython==9.0.2 ; python_version >= "3.11" and python_version < "4.0"
|
68 |
+
ipywidgets==8.1.5 ; python_version >= "3.10" and python_version < "4.0"
|
69 |
+
isoduration==20.11.0 ; python_version >= "3.10" and python_version < "4.0"
|
70 |
+
jaraco-classes==3.4.0 ; python_version >= "3.10" and python_version < "4.0"
|
71 |
+
jedi==0.19.2 ; python_version >= "3.10" and python_version < "4.0"
|
72 |
+
jeepney==0.9.0 ; python_version >= "3.10" and python_version < "4.0" and sys_platform == "linux"
|
73 |
+
jinja2==3.1.6 ; python_version >= "3.10" and python_version < "4.0"
|
74 |
+
jiter==0.9.0 ; python_version >= "3.10" and python_version < "4.0"
|
75 |
+
joblib==1.4.2 ; python_version >= "3.10" and python_version < "4.0"
|
76 |
+
json5==0.11.0 ; python_version >= "3.10" and python_version < "4.0"
|
77 |
+
jsonpatch==1.33 ; python_version >= "3.10" and python_version < "4.0"
|
78 |
+
jsonpointer==3.0.0 ; python_version >= "3.10" and python_version < "4.0"
|
79 |
+
jsonschema-specifications==2024.10.1 ; python_version >= "3.10" and python_version < "4.0"
|
80 |
+
jsonschema==4.23.0 ; python_version >= "3.10" and python_version < "4.0"
|
81 |
+
jupyter-client==8.6.3 ; python_version >= "3.10" and python_version < "4.0"
|
82 |
+
jupyter-console==6.6.3 ; python_version >= "3.10" and python_version < "4.0"
|
83 |
+
jupyter-core==5.7.2 ; python_version >= "3.10" and python_version < "4.0"
|
84 |
+
jupyter-events==0.12.0 ; python_version >= "3.10" and python_version < "4.0"
|
85 |
+
jupyter-lsp==2.2.5 ; python_version >= "3.10" and python_version < "4.0"
|
86 |
+
jupyter-server-terminals==0.5.3 ; python_version >= "3.10" and python_version < "4.0"
|
87 |
+
jupyter-server==2.15.0 ; python_version >= "3.10" and python_version < "4.0"
|
88 |
+
jupyter==1.1.1 ; python_version >= "3.10" and python_version < "4.0"
|
89 |
+
jupyterlab-pygments==0.3.0 ; python_version >= "3.10" and python_version < "4.0"
|
90 |
+
jupyterlab-server==2.27.3 ; python_version >= "3.10" and python_version < "4.0"
|
91 |
+
jupyterlab-widgets==3.0.13 ; python_version >= "3.10" and python_version < "4.0"
|
92 |
+
jupyterlab==4.3.6 ; python_version >= "3.10" and python_version < "4.0"
|
93 |
+
keyring==24.3.1 ; python_version >= "3.10" and python_version < "4.0"
|
94 |
+
langchain-community==0.3.21 ; python_version >= "3.10" and python_version < "4.0"
|
95 |
+
langchain-core==0.3.51 ; python_version >= "3.10" and python_version < "4.0"
|
96 |
+
langchain-text-splitters==0.3.8 ; python_version >= "3.10" and python_version < "4.0"
|
97 |
+
langchain==0.3.23 ; python_version >= "3.10" and python_version < "4.0"
|
98 |
+
langsmith==0.3.24 ; python_version >= "3.10" and python_version < "4.0"
|
99 |
+
llama-cloud-services==0.6.9 ; python_version >= "3.10" and python_version < "4.0"
|
100 |
+
llama-cloud==0.1.17 ; python_version >= "3.10" and python_version < "4.0"
|
101 |
+
llama-index-agent-openai==0.4.6 ; python_version >= "3.10" and python_version < "4.0"
|
102 |
+
llama-index-cli==0.4.1 ; python_version >= "3.10" and python_version < "4.0"
|
103 |
+
llama-index-core==0.12.28 ; python_version >= "3.10" and python_version < "4.0"
|
104 |
+
llama-index-embeddings-huggingface==0.5.2 ; python_version >= "3.10" and python_version < "4.0"
|
105 |
+
llama-index-embeddings-openai==0.3.1 ; python_version >= "3.10" and python_version < "4.0"
|
106 |
+
llama-index-indices-managed-llama-cloud==0.6.10 ; python_version >= "3.10" and python_version < "4.0"
|
107 |
+
llama-index-llms-openai==0.3.29 ; python_version >= "3.10" and python_version < "4.0"
|
108 |
+
llama-index-multi-modal-llms-openai==0.4.3 ; python_version >= "3.10" and python_version < "4.0"
|
109 |
+
llama-index-program-openai==0.3.1 ; python_version >= "3.10" and python_version < "4.0"
|
110 |
+
llama-index-question-gen-openai==0.3.0 ; python_version >= "3.10" and python_version < "4.0"
|
111 |
+
llama-index-readers-file==0.4.7 ; python_version >= "3.10" and python_version < "4.0"
|
112 |
+
llama-index-readers-llama-parse==0.4.0 ; python_version >= "3.10" and python_version < "4.0"
|
113 |
+
llama-index==0.12.28 ; python_version >= "3.10" and python_version < "4.0"
|
114 |
+
llama-parse==0.6.4.post1 ; python_version >= "3.10" and python_version < "4.0"
|
115 |
+
mako==1.3.9 ; python_version >= "3.10" and python_version < "4.0"
|
116 |
+
markdown-it-py==3.0.0 ; python_version >= "3.10" and python_version < "4.0"
|
117 |
+
markupsafe==3.0.2 ; python_version >= "3.10" and python_version < "4.0"
|
118 |
+
marshmallow==3.26.1 ; python_version >= "3.10" and python_version < "4.0"
|
119 |
+
matplotlib-inline==0.1.7 ; python_version >= "3.10" and python_version < "4.0"
|
120 |
+
mdurl==0.1.2 ; python_version >= "3.10" and python_version < "4.0"
|
121 |
+
mistune==3.1.3 ; python_version >= "3.10" and python_version < "4.0"
|
122 |
+
more-itertools==10.6.0 ; python_version >= "3.10" and python_version < "4.0"
|
123 |
+
mpmath==1.3.0 ; python_version >= "3.10" and python_version < "4.0"
|
124 |
+
msgpack==1.1.0 ; python_version >= "3.10" and python_version < "4.0"
|
125 |
+
multidict==6.3.1 ; python_version >= "3.10" and python_version < "4.0"
|
126 |
+
munch==2.5.0 ; python_version >= "3.10" and python_version < "4.0"
|
127 |
+
mypy-extensions==1.0.0 ; python_version >= "3.10" and python_version < "4.0"
|
128 |
+
narwhals==1.33.0 ; python_version >= "3.10" and python_version < "4.0"
|
129 |
+
nbclient==0.10.2 ; python_version >= "3.10" and python_version < "4.0"
|
130 |
+
nbconvert==7.16.6 ; python_version >= "3.10" and python_version < "4.0"
|
131 |
+
nbformat==5.10.4 ; python_version >= "3.10" and python_version < "4.0"
|
132 |
+
nest-asyncio==1.6.0 ; python_version >= "3.10" and python_version < "4.0"
|
133 |
+
networkx==3.4.2 ; python_version >= "3.10" and python_version < "4.0"
|
134 |
+
nltk==3.9.1 ; python_version >= "3.10" and python_version < "4.0"
|
135 |
+
notebook-shim==0.2.4 ; python_version >= "3.10" and python_version < "4.0"
|
136 |
+
notebook==7.3.3 ; python_version >= "3.10" and python_version < "4.0"
|
137 |
+
numpy==2.2.4 ; python_version >= "3.10" and python_version < "4.0"
|
138 |
+
nvidia-cublas-cu12==12.4.5.8 ; python_version >= "3.10" and python_version < "4.0" and platform_machine == "x86_64" and platform_system == "Linux"
|
139 |
+
nvidia-cuda-cupti-cu12==12.4.127 ; python_version >= "3.10" and python_version < "4.0" and platform_machine == "x86_64" and platform_system == "Linux"
|
140 |
+
nvidia-cuda-nvrtc-cu12==12.4.127 ; python_version >= "3.10" and python_version < "4.0" and platform_machine == "x86_64" and platform_system == "Linux"
|
141 |
+
nvidia-cuda-runtime-cu12==12.4.127 ; python_version >= "3.10" and python_version < "4.0" and platform_machine == "x86_64" and platform_system == "Linux"
|
142 |
+
nvidia-cudnn-cu12==9.1.0.70 ; python_version >= "3.10" and python_version < "4.0" and platform_machine == "x86_64" and platform_system == "Linux"
|
143 |
+
nvidia-cufft-cu12==11.2.1.3 ; python_version >= "3.10" and python_version < "4.0" and platform_machine == "x86_64" and platform_system == "Linux"
|
144 |
+
nvidia-curand-cu12==10.3.5.147 ; python_version >= "3.10" and python_version < "4.0" and platform_machine == "x86_64" and platform_system == "Linux"
|
145 |
+
nvidia-cusolver-cu12==11.6.1.9 ; python_version >= "3.10" and python_version < "4.0" and platform_machine == "x86_64" and platform_system == "Linux"
|
146 |
+
nvidia-cusparse-cu12==12.3.1.170 ; python_version >= "3.10" and python_version < "4.0" and platform_machine == "x86_64" and platform_system == "Linux"
|
147 |
+
nvidia-cusparselt-cu12==0.6.2 ; python_version >= "3.10" and python_version < "4.0" and platform_machine == "x86_64" and platform_system == "Linux"
|
148 |
+
nvidia-nccl-cu12==2.21.5 ; python_version >= "3.10" and python_version < "4.0" and platform_machine == "x86_64" and platform_system == "Linux"
|
149 |
+
nvidia-nvjitlink-cu12==12.4.127 ; python_version >= "3.10" and python_version < "4.0" and platform_machine == "x86_64" and platform_system == "Linux"
|
150 |
+
nvidia-nvtx-cu12==12.4.127 ; python_version >= "3.10" and python_version < "4.0" and platform_machine == "x86_64" and platform_system == "Linux"
|
151 |
+
openai==1.70.0 ; python_version >= "3.10" and python_version < "4.0"
|
152 |
+
opentelemetry-api==1.31.1 ; python_version >= "3.10" and python_version < "4.0"
|
153 |
+
opentelemetry-proto==1.31.1 ; python_version >= "3.10" and python_version < "4.0"
|
154 |
+
opentelemetry-sdk==1.31.1 ; python_version >= "3.10" and python_version < "4.0"
|
155 |
+
opentelemetry-semantic-conventions==0.52b1 ; python_version >= "3.10" and python_version < "4.0"
|
156 |
+
orjson==3.10.16 ; python_version >= "3.10" and python_version < "4.0" and platform_python_implementation != "PyPy"
|
157 |
+
overrides==7.7.0 ; python_version >= "3.10" and python_version < "4.0"
|
158 |
+
packaging==24.2 ; python_version >= "3.10" and python_version < "4.0"
|
159 |
+
pandas==2.2.3 ; python_version >= "3.10" and python_version < "4.0"
|
160 |
+
pandocfilters==1.5.1 ; python_version >= "3.10" and python_version < "4.0"
|
161 |
+
parso==0.8.4 ; python_version >= "3.10" and python_version < "4.0"
|
162 |
+
pexpect==4.9.0 ; python_version >= "3.10" and python_version < "4.0"
|
163 |
+
pillow==11.1.0 ; python_version >= "3.10" and python_version < "4.0"
|
164 |
+
pkginfo==1.12.1.2 ; python_version >= "3.10" and python_version < "4.0"
|
165 |
+
platformdirs==4.3.7 ; python_version >= "3.10" and python_version < "4.0"
|
166 |
+
plotly==5.24.1 ; python_version >= "3.10" and python_version < "4.0"
|
167 |
+
poetry-core==1.9.1 ; python_version >= "3.10" and python_version < "4.0"
|
168 |
+
poetry-plugin-export==1.8.0 ; python_version >= "3.10" and python_version < "4.0"
|
169 |
+
poetry==1.8.5 ; python_version >= "3.10" and python_version < "4.0"
|
170 |
+
prometheus-client==0.21.1 ; python_version >= "3.10" and python_version < "4.0"
|
171 |
+
prompt-toolkit==3.0.50 ; python_version >= "3.10" and python_version < "4.0"
|
172 |
+
propcache==0.3.1 ; python_version >= "3.10" and python_version < "4.0"
|
173 |
+
protobuf==5.29.4 ; python_version >= "3.10" and python_version < "4.0"
|
174 |
+
psutil==5.9.8 ; python_version >= "3.10" and python_version < "4.0"
|
175 |
+
ptyprocess==0.7.0 ; python_version >= "3.10" and python_version < "4.0"
|
176 |
+
pure-eval==0.2.3 ; python_version >= "3.10" and python_version < "4.0"
|
177 |
+
pyarrow==19.0.1 ; python_version >= "3.10" and python_version < "4.0"
|
178 |
+
pycparser==2.22 ; python_version >= "3.10" and python_version < "4.0"
|
179 |
+
pydantic-core==2.33.0 ; python_version >= "3.10" and python_version < "4.0"
|
180 |
+
pydantic-settings==2.8.1 ; python_version >= "3.10" and python_version < "4.0"
|
181 |
+
pydantic==2.11.1 ; python_version >= "3.10" and python_version < "4.0"
|
182 |
+
pydeck==0.9.1 ; python_version >= "3.10" and python_version < "4.0"
|
183 |
+
pygments==2.19.1 ; python_version >= "3.10" and python_version < "4.0"
|
184 |
+
pypdf==5.4.0 ; python_version >= "3.10" and python_version < "4.0"
|
185 |
+
pyproject-hooks==1.2.0 ; python_version >= "3.10" and python_version < "4.0"
|
186 |
+
python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version < "4.0"
|
187 |
+
python-decouple==3.8 ; python_version >= "3.10" and python_version < "4.0"
|
188 |
+
python-dotenv==1.1.0 ; python_version >= "3.10" and python_version < "4.0"
|
189 |
+
python-json-logger==3.3.0 ; python_version >= "3.10" and python_version < "4.0"
|
190 |
+
pytz==2025.2 ; python_version >= "3.10" and python_version < "4.0"
|
191 |
+
pywin32-ctypes==0.2.3 ; python_version >= "3.10" and python_version < "4.0" and sys_platform == "win32"
|
192 |
+
pywin32==310 ; python_version >= "3.10" and python_version < "4.0" and platform_python_implementation != "PyPy" and sys_platform == "win32"
|
193 |
+
pywinpty==2.0.15 ; python_version >= "3.10" and python_version < "4.0" and os_name == "nt"
|
194 |
+
pyyaml==6.0.2 ; python_version >= "3.10" and python_version < "4.0"
|
195 |
+
pyzmq==26.3.0 ; python_version >= "3.10" and python_version < "4.0"
|
196 |
+
rapidfuzz==3.12.2 ; python_version >= "3.10" and python_version < "4.0"
|
197 |
+
referencing==0.36.2 ; python_version >= "3.10" and python_version < "4.0"
|
198 |
+
regex==2024.11.6 ; python_version >= "3.10" and python_version < "4.0"
|
199 |
+
requests-toolbelt==1.0.0 ; python_version >= "3.10" and python_version < "4.0"
|
200 |
+
requests==2.32.3 ; python_version >= "3.10" and python_version < "4.0"
|
201 |
+
rfc3339-validator==0.1.4 ; python_version >= "3.10" and python_version < "4.0"
|
202 |
+
rfc3986-validator==0.1.1 ; python_version >= "3.10" and python_version < "4.0"
|
203 |
+
rich==13.9.4 ; python_version >= "3.10" and python_version < "4.0"
|
204 |
+
rpds-py==0.24.0 ; python_version >= "3.10" and python_version < "4.0"
|
205 |
+
safetensors==0.5.3 ; python_version >= "3.10" and python_version < "4.0"
|
206 |
+
scikit-learn==1.6.1 ; python_version >= "3.10" and python_version < "4.0"
|
207 |
+
scipy==1.15.2 ; python_version >= "3.10" and python_version < "4.0"
|
208 |
+
secretstorage==3.3.3 ; python_version >= "3.10" and python_version < "4.0" and sys_platform == "linux"
|
209 |
+
send2trash==1.8.3 ; python_version >= "3.10" and python_version < "4.0"
|
210 |
+
sentence-transformers==4.0.2 ; python_version >= "3.10" and python_version < "4.0"
|
211 |
+
setuptools==78.1.0 ; python_version >= "3.10" and python_version < "4.0"
|
212 |
+
shellingham==1.5.4 ; python_version >= "3.10" and python_version < "4.0"
|
213 |
+
six==1.17.0 ; python_version >= "3.10" and python_version < "4.0"
|
214 |
+
smmap==5.0.2 ; python_version >= "3.10" and python_version < "4.0"
|
215 |
+
sniffio==1.3.1 ; python_version >= "3.10" and python_version < "4.0"
|
216 |
+
soupsieve==2.6 ; python_version >= "3.10" and python_version < "4.0"
|
217 |
+
sqlalchemy==2.0.40 ; python_version >= "3.10" and python_version < "4.0"
|
218 |
+
stack-data==0.6.3 ; python_version >= "3.10" and python_version < "4.0"
|
219 |
+
streamlit-aggrid==1.1.2 ; python_version >= "3.10" and python_version < "4.0"
|
220 |
+
streamlit==1.44.1 ; python_version >= "3.10" and python_version < "4.0"
|
221 |
+
striprtf==0.0.26 ; python_version >= "3.10" and python_version < "4.0"
|
222 |
+
sympy==1.13.1 ; python_version >= "3.10" and python_version < "4.0"
|
223 |
+
tenacity==9.1.2 ; python_version >= "3.10" and python_version < "4.0"
|
224 |
+
terminado==0.18.1 ; python_version >= "3.10" and python_version < "4.0"
|
225 |
+
threadpoolctl==3.6.0 ; python_version >= "3.10" and python_version < "4.0"
|
226 |
+
tiktoken==0.9.0 ; python_version >= "3.10" and python_version < "4.0"
|
227 |
+
tinycss2==1.4.0 ; python_version >= "3.10" and python_version < "4.0"
|
228 |
+
tokenizers==0.21.1 ; python_version >= "3.10" and python_version < "4.0"
|
229 |
+
toml==0.10.2 ; python_version >= "3.10" and python_version < "4.0"
|
230 |
+
tomli==2.2.1 ; python_version == "3.10"
|
231 |
+
tomlkit==0.13.2 ; python_version >= "3.10" and python_version < "4.0"
|
232 |
+
torch==2.6.0 ; python_version >= "3.10" and python_version < "4.0"
|
233 |
+
tornado==6.4.2 ; python_version >= "3.10" and python_version < "4.0"
|
234 |
+
tqdm==4.67.1 ; python_version >= "3.10" and python_version < "4.0"
|
235 |
+
traitlets==5.14.3 ; python_version >= "3.10" and python_version < "4.0"
|
236 |
+
transformers==4.50.3 ; python_version >= "3.10" and python_version < "4.0"
|
237 |
+
triton==3.2.0 ; python_version >= "3.10" and python_version < "4.0" and platform_machine == "x86_64" and platform_system == "Linux"
|
238 |
+
trove-classifiers==2025.3.19.19 ; python_version >= "3.10" and python_version < "4.0"
|
239 |
+
trulens-apps-langchain==1.4.8 ; python_version >= "3.10" and python_version < "4.0"
|
240 |
+
trulens-apps-llamaindex==1.4.8 ; python_version >= "3.10" and python_version < "4.0"
|
241 |
+
trulens-core==1.4.7 ; python_version >= "3.10" and python_version < "4.0"
|
242 |
+
trulens-dashboard==1.4.7 ; python_version >= "3.10" and python_version < "4.0"
|
243 |
+
trulens-eval==1.4.7 ; python_version >= "3.10" and python_version < "4.0"
|
244 |
+
trulens-feedback==1.4.7 ; python_version >= "3.10" and python_version < "4.0"
|
245 |
+
trulens-otel-semconv==1.4.7 ; python_version >= "3.10" and python_version < "4.0"
|
246 |
+
trulens-providers-openai==1.4.8 ; python_version >= "3.10" and python_version < "4.0"
|
247 |
+
trulens==1.4.7 ; python_version >= "3.10" and python_version < "4.0"
|
248 |
+
types-python-dateutil==2.9.0.20241206 ; python_version >= "3.10" and python_version < "4.0"
|
249 |
+
typing-extensions==4.13.0 ; python_version >= "3.10" and python_version < "4.0"
|
250 |
+
typing-inspect==0.9.0 ; python_version >= "3.10" and python_version < "4.0"
|
251 |
+
typing-inspection==0.4.0 ; python_version >= "3.10" and python_version < "4.0"
|
252 |
+
tzdata==2025.2 ; python_version >= "3.10" and python_version < "4.0"
|
253 |
+
uri-template==1.3.0 ; python_version >= "3.10" and python_version < "4.0"
|
254 |
+
urllib3==2.3.0 ; python_version >= "3.10" and python_version < "4.0"
|
255 |
+
virtualenv==20.30.0 ; python_version >= "3.10" and python_version < "4.0"
|
256 |
+
watchdog==6.0.0 ; python_version >= "3.10" and python_version < "4.0" and platform_system != "Darwin"
|
257 |
+
wcwidth==0.2.13 ; python_version >= "3.10" and python_version < "4.0"
|
258 |
+
webcolors==24.11.1 ; python_version >= "3.10" and python_version < "4.0"
|
259 |
+
webencodings==0.5.1 ; python_version >= "3.10" and python_version < "4.0"
|
260 |
+
websocket-client==1.8.0 ; python_version >= "3.10" and python_version < "4.0"
|
261 |
+
widgetsnbextension==4.0.13 ; python_version >= "3.10" and python_version < "4.0"
|
262 |
+
wrapt==1.17.2 ; python_version >= "3.10" and python_version < "4.0"
|
263 |
+
xattr==1.1.4 ; python_version >= "3.10" and python_version < "4.0" and sys_platform == "darwin"
|
264 |
+
yarl==1.18.3 ; python_version >= "3.10" and python_version < "4.0"
|
265 |
+
zipp==3.21.0 ; python_version >= "3.10" and python_version < "4.0"
|
266 |
+
zstandard==0.23.0 ; python_version >= "3.10" and python_version < "4.0"
|
src/mythesis_chatbot/rag_setup.py
ADDED
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
from typing import Literal
|
4 |
+
|
5 |
+
import openai
|
6 |
+
from llama_index.core import (
|
7 |
+
Document,
|
8 |
+
Settings,
|
9 |
+
SimpleDirectoryReader,
|
10 |
+
StorageContext,
|
11 |
+
VectorStoreIndex,
|
12 |
+
load_index_from_storage,
|
13 |
+
)
|
14 |
+
from llama_index.core.node_parser import (
|
15 |
+
HierarchicalNodeParser,
|
16 |
+
SentenceWindowNodeParser,
|
17 |
+
get_leaf_nodes,
|
18 |
+
)
|
19 |
+
from llama_index.core.postprocessor import (
|
20 |
+
MetadataReplacementPostProcessor,
|
21 |
+
SentenceTransformerRerank,
|
22 |
+
)
|
23 |
+
from llama_index.core.query_engine import RetrieverQueryEngine
|
24 |
+
from llama_index.core.retrievers import AutoMergingRetriever
|
25 |
+
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
26 |
+
from llama_index.llms.openai import OpenAI
|
27 |
+
|
28 |
+
from mythesis_chatbot.utils import get_config_hash, get_openai_api_key
|
29 |
+
|
30 |
+
SupportedRags = Literal["basic", "sentence window retrieval", "auto-merging retrieval"]
|
31 |
+
SupportedOpenAIllms = Literal["gpt-4o-mini", "gpt-3.5-turbo"]
|
32 |
+
SupportedEmbedModels = Literal["BAAI/bge-small-en-v1.5"]
|
33 |
+
SupportedRerankModels = Literal["cross-encoder/ms-marco-MiniLM-L-2-v2"]
|
34 |
+
|
35 |
+
|
36 |
+
def load_data(input_file: str) -> Document:
|
37 |
+
|
38 |
+
reader = SimpleDirectoryReader(input_files=[input_file])
|
39 |
+
documents = reader.load_data() # List of Document objects (one object per page)
|
40 |
+
# Merge into single document
|
41 |
+
document = Document(text="\n\n".join([doc.text for doc in documents]))
|
42 |
+
|
43 |
+
return document
|
44 |
+
|
45 |
+
|
46 |
+
def build_sentence_window_index(
|
47 |
+
input_file: str,
|
48 |
+
save_dir: str,
|
49 |
+
index_config: dict[str, str | int],
|
50 |
+
):
|
51 |
+
config_hash = get_config_hash(index_config)
|
52 |
+
save_dir = os.path.join(save_dir, "sentence_window", config_hash)
|
53 |
+
|
54 |
+
Settings.embed_model = HuggingFaceEmbedding(model_name=index_config["embed_model"])
|
55 |
+
|
56 |
+
if not os.path.exists(save_dir):
|
57 |
+
|
58 |
+
document = load_data(input_file)
|
59 |
+
|
60 |
+
# Create the sentence window node parser w/ default settings.
|
61 |
+
# A node is a chunck of text. Each node returned by the sentence window node
|
62 |
+
# parser also contains its context as metadata (closest chuncks of texts)
|
63 |
+
node_parser = SentenceWindowNodeParser.from_defaults(
|
64 |
+
window_size=index_config["sentence_window_size"],
|
65 |
+
window_metadata_key="window",
|
66 |
+
original_text_metadata_key="original_text",
|
67 |
+
)
|
68 |
+
|
69 |
+
Settings.node_parser = node_parser
|
70 |
+
|
71 |
+
sentence_index = VectorStoreIndex.from_documents([document])
|
72 |
+
sentence_index.storage_context.persist(persist_dir=save_dir)
|
73 |
+
with open(os.path.join(save_dir, "meta.json"), "w") as f:
|
74 |
+
json.dump(index_config, f, indent=2)
|
75 |
+
|
76 |
+
else:
|
77 |
+
sentence_index = load_index_from_storage(
|
78 |
+
StorageContext.from_defaults(persist_dir=save_dir)
|
79 |
+
)
|
80 |
+
|
81 |
+
return sentence_index
|
82 |
+
|
83 |
+
|
84 |
+
def build_automerging_index(
|
85 |
+
input_file: str,
|
86 |
+
save_dir: str,
|
87 |
+
index_config: dict[str, str | list[int]],
|
88 |
+
):
|
89 |
+
|
90 |
+
config_hash = get_config_hash(index_config)
|
91 |
+
save_dir = os.path.join(save_dir, "auto_merging", config_hash)
|
92 |
+
|
93 |
+
Settings.embed_model = HuggingFaceEmbedding(model_name=index_config["embed_model"])
|
94 |
+
|
95 |
+
if not os.path.exists(save_dir):
|
96 |
+
|
97 |
+
document = load_data(input_file)
|
98 |
+
node_parser = HierarchicalNodeParser.from_defaults(
|
99 |
+
chunk_sizes=index_config["chunk_sizes"]
|
100 |
+
)
|
101 |
+
nodes = node_parser.get_nodes_from_documents([document])
|
102 |
+
leaf_nodes = get_leaf_nodes(nodes)
|
103 |
+
|
104 |
+
Settings.node_parser = node_parser
|
105 |
+
|
106 |
+
storage_context = StorageContext.from_defaults()
|
107 |
+
storage_context.docstore.add_documents(nodes)
|
108 |
+
|
109 |
+
automerging_index = VectorStoreIndex(
|
110 |
+
leaf_nodes,
|
111 |
+
storage_context=storage_context,
|
112 |
+
)
|
113 |
+
automerging_index.storage_context.persist(persist_dir=save_dir)
|
114 |
+
with open(os.path.join(save_dir, "meta.json"), "w") as f:
|
115 |
+
json.dump(index_config, f, indent=2)
|
116 |
+
|
117 |
+
else:
|
118 |
+
automerging_index = load_index_from_storage(
|
119 |
+
StorageContext.from_defaults(persist_dir=save_dir),
|
120 |
+
)
|
121 |
+
return automerging_index
|
122 |
+
|
123 |
+
|
124 |
+
def get_sentence_window_query_engine(
|
125 |
+
sentence_index,
|
126 |
+
similarity_top_k: int = 6,
|
127 |
+
rerank_top_n: int = 2,
|
128 |
+
rerank_model: str = "cross-encoder/ms-marco-MiniLM-L-2-v2",
|
129 |
+
):
|
130 |
+
# Used to replace the node content with a field from the node metadata.
|
131 |
+
postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
|
132 |
+
|
133 |
+
# Rerank can speed up an LLM query without sacrificing accuracy. It does so by
|
134 |
+
# pruning away irrelevant nodes from the context.
|
135 |
+
rerank = SentenceTransformerRerank(top_n=rerank_top_n, model=rerank_model)
|
136 |
+
|
137 |
+
sentence_window_engine = sentence_index.as_query_engine(
|
138 |
+
similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank]
|
139 |
+
)
|
140 |
+
return sentence_window_engine
|
141 |
+
|
142 |
+
|
143 |
+
def get_automerging_query_engine(
|
144 |
+
automerging_index,
|
145 |
+
similarity_top_k: int = 12,
|
146 |
+
rerank_top_n: int = 6,
|
147 |
+
rerank_model: str = "cross-encoder/ms-marco-MiniLM-L-2-v2",
|
148 |
+
):
|
149 |
+
base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)
|
150 |
+
retriever = AutoMergingRetriever(
|
151 |
+
base_retriever, automerging_index.storage_context, verbose=True
|
152 |
+
)
|
153 |
+
rerank = SentenceTransformerRerank(top_n=rerank_top_n, model=rerank_model)
|
154 |
+
auto_merging_engine = RetrieverQueryEngine.from_args(
|
155 |
+
retriever, node_postprocessors=[rerank]
|
156 |
+
)
|
157 |
+
return auto_merging_engine
|
158 |
+
|
159 |
+
|
160 |
+
def sentence_window_retrieval_setup(
|
161 |
+
input_file: str,
|
162 |
+
save_dir: str,
|
163 |
+
llm_openai_model: SupportedOpenAIllms = "gpt-4o-mini",
|
164 |
+
temperature: float = 0.1,
|
165 |
+
embed_model: SupportedEmbedModels = "BAAI/bge-small-en-v1.5",
|
166 |
+
sentence_window_size: int = 3,
|
167 |
+
similarity_top_k: int = 6,
|
168 |
+
rerank_model: SupportedRerankModels = "cross-encoder/ms-marco-MiniLM-L-2-v2",
|
169 |
+
rerank_top_n: int = 2,
|
170 |
+
):
|
171 |
+
|
172 |
+
openai.api_key = get_openai_api_key()
|
173 |
+
|
174 |
+
# This allows to uniquely identify the index
|
175 |
+
config = {
|
176 |
+
"doc_source": os.path.basename(input_file),
|
177 |
+
"embed_model": embed_model,
|
178 |
+
"sentence_window_size": sentence_window_size,
|
179 |
+
}
|
180 |
+
|
181 |
+
# 1. Build index
|
182 |
+
index = build_sentence_window_index(input_file, save_dir, config)
|
183 |
+
|
184 |
+
Settings.llm = OpenAI(model=llm_openai_model, temperature=temperature)
|
185 |
+
|
186 |
+
# 2. Get engine
|
187 |
+
sentence_window_engine = get_sentence_window_query_engine(
|
188 |
+
index,
|
189 |
+
similarity_top_k=similarity_top_k,
|
190 |
+
rerank_model=rerank_model,
|
191 |
+
rerank_top_n=rerank_top_n,
|
192 |
+
)
|
193 |
+
|
194 |
+
return sentence_window_engine
|
195 |
+
|
196 |
+
|
197 |
+
def automerging_retrieval_setup(
|
198 |
+
input_file: str,
|
199 |
+
save_dir: str,
|
200 |
+
llm_openai_model: SupportedOpenAIllms = "gpt-4o-mini",
|
201 |
+
temperature: float = 0.1,
|
202 |
+
embed_model: SupportedEmbedModels = "BAAI/bge-small-en-v1.5",
|
203 |
+
chunk_sizes=[2048, 512, 128],
|
204 |
+
similarity_top_k: int = 6,
|
205 |
+
rerank_model: SupportedRerankModels = "cross-encoder/ms-marco-MiniLM-L-2-v2",
|
206 |
+
rerank_top_n: int = 2,
|
207 |
+
):
|
208 |
+
openai.api_key = get_openai_api_key()
|
209 |
+
|
210 |
+
# This allows to uniquely identify the index
|
211 |
+
config = {
|
212 |
+
"doc_source": os.path.basename(input_file),
|
213 |
+
"embed_model": embed_model,
|
214 |
+
"chunk_sizes": chunk_sizes,
|
215 |
+
}
|
216 |
+
|
217 |
+
# 1. Build index
|
218 |
+
index = build_automerging_index(input_file, save_dir, config)
|
219 |
+
|
220 |
+
Settings.llm = OpenAI(model=llm_openai_model, temperature=temperature)
|
221 |
+
|
222 |
+
# 2. Get engine
|
223 |
+
automerging_engine = get_sentence_window_query_engine(
|
224 |
+
index,
|
225 |
+
similarity_top_k=similarity_top_k,
|
226 |
+
rerank_model=rerank_model,
|
227 |
+
rerank_top_n=rerank_top_n,
|
228 |
+
)
|
229 |
+
|
230 |
+
return automerging_engine
|
231 |
+
|
232 |
+
|
233 |
+
def basic_rag_setup(
|
234 |
+
input_file: str,
|
235 |
+
save_dir: str,
|
236 |
+
llm_openai_model: SupportedOpenAIllms = "gpt-4o-mini",
|
237 |
+
temperature: float = 0.1,
|
238 |
+
embed_model: SupportedEmbedModels = "BAAI/bge-small-en-v1.5",
|
239 |
+
similarity_top_k: int = 6,
|
240 |
+
rerank_model: SupportedRerankModels = "cross-encoder/ms-marco-MiniLM-L-2-v2",
|
241 |
+
rerank_top_n: int = 2,
|
242 |
+
):
|
243 |
+
openai.api_key = get_openai_api_key()
|
244 |
+
|
245 |
+
Settings.embed_model = HuggingFaceEmbedding(model_name=embed_model)
|
246 |
+
|
247 |
+
save_dir = os.path.join(save_dir, "basic")
|
248 |
+
if not os.path.exists(save_dir):
|
249 |
+
document = load_data(input_file)
|
250 |
+
index = VectorStoreIndex.from_documents([document])
|
251 |
+
index.storage_context.persist(persist_dir=save_dir)
|
252 |
+
else:
|
253 |
+
index = load_index_from_storage(
|
254 |
+
StorageContext.from_defaults(persist_dir=save_dir)
|
255 |
+
)
|
256 |
+
|
257 |
+
rerank = SentenceTransformerRerank(top_n=rerank_top_n, model=rerank_model)
|
258 |
+
|
259 |
+
engine = index.as_query_engine(
|
260 |
+
llm=OpenAI(model=llm_openai_model, temperature=temperature),
|
261 |
+
similarity_top_k=similarity_top_k,
|
262 |
+
node_postprocessors=[rerank],
|
263 |
+
)
|
264 |
+
return engine
|
src/mythesis_chatbot/utils.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import hashlib
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
|
5 |
+
|
6 |
+
def get_config_hash(config: dict) -> str:
|
7 |
+
# Use JSON to serialize and sort keys for deterministic output
|
8 |
+
config_str = json.dumps(config, sort_keys=True)
|
9 |
+
|
10 |
+
return hashlib.sha256(config_str.encode()).hexdigest()[:10] # short hash
|
11 |
+
|
12 |
+
|
13 |
+
def get_openai_api_key():
|
14 |
+
"""
|
15 |
+
Get the OpenAI API key from an environment variable.
|
16 |
+
"""
|
17 |
+
api_key = os.getenv("OPENAI_API_KEY")
|
18 |
+
if api_key:
|
19 |
+
return api_key
|
20 |
+
|
21 |
+
raise ValueError(
|
22 |
+
"OpenAI API key not found. Please follow the instruction in the readme file."
|
23 |
+
)
|