Spaces:
Running
Running
Update requirements.txt
Browse files- requirements.txt +40 -1
requirements.txt
CHANGED
@@ -1 +1,40 @@
|
|
1 |
-
huggingface_hub==0.25.2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
huggingface_hub==0.25.2
|
2 |
+
# ===============================
|
3 |
+
# 📦 Embedding + Vector Search
|
4 |
+
# ===============================
|
5 |
+
chromadb==0.4.24
|
6 |
+
sentence-transformers==2.4.0 # Compatible with huggingface-hub 0.30.1
|
7 |
+
torch>=1.13.1, <2.0.0 # Stable with sentence-transformers
|
8 |
+
|
9 |
+
# ===============================
|
10 |
+
# 🤖 LLM-Based QA
|
11 |
+
# ===============================
|
12 |
+
transformers==4.37.2 # Works well with huggingface-hub 0.30.1
|
13 |
+
accelerate>=0.26.1
|
14 |
+
huggingface-hub==0.30.1 # Compatible with transformers 4.37.2
|
15 |
+
|
16 |
+
# ===============================
|
17 |
+
# 📄 PDF Parsing
|
18 |
+
# ===============================
|
19 |
+
pymupdf>=1.25.5 # PyMuPDF for full-page text extraction
|
20 |
+
pdfminer.six>=20221105 # Optional: structured layout extraction
|
21 |
+
|
22 |
+
# ===============================
|
23 |
+
# 🖼️ OCR + Image Handling
|
24 |
+
# ===============================
|
25 |
+
pytesseract>=0.3.10 # Requires separate install of Tesseract binary
|
26 |
+
Pillow>=9.4.0
|
27 |
+
|
28 |
+
# ===============================
|
29 |
+
# 🌐 UI Interface
|
30 |
+
# ===============================
|
31 |
+
gradio>=4.29.0 # Gradio 4+ for modern UI
|
32 |
+
requests>=2.31.0
|
33 |
+
|
34 |
+
# ===============================
|
35 |
+
# 🛠 Utilities and Fixes
|
36 |
+
# ===============================
|
37 |
+
beautifulsoup4>=4.12.2 # Parsing for HTML-in-PDFs (e.g., diagrams/tables)
|
38 |
+
pydantic<2.0.0 # Chromadb is not yet compatible with pydantic 2.x
|
39 |
+
numpy==1.26.4 # Ensures compatibility with chromadb and transformers
|
40 |
+
tqdm>=4.66.0 # Progress bar (used in embedding scripts)
|