FauziIsyrinApridal commited on
Commit
ea1ba01
·
1 Parent(s): 69f9e8f

Initial commit without binary files

Browse files
.env.example ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ REPLICATE_API_TOKEN=
2
+ SUPABASE_URL=
3
+ SUPABASE_KEY=
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ /venv
2
+ .env
3
+ /app/__pycache__
4
+ /tests/__pycache__
5
+ /app/scrapping/*.py
6
+ /vector_store_data
README.md CHANGED
@@ -1,14 +1,60 @@
1
- ---
2
- title: Pnp Chatbot V1
3
- emoji: 👀
4
- colorFrom: yellow
5
- colorTo: purple
6
- sdk: streamlit
7
- sdk_version: 1.45.0
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- short_description: Chatbot for Politeknik Negeri Padang
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Pnp-Bot
2
+
3
+ Pnp-Bot is a web-based chatbot application to answer questions about Politeknik Negeri Padang using LLM Model (Sahabat-AI) with Retrieval-Augmented Generation method
4
+
5
+ (Deployed):
6
+
7
+ - Chatbot App :
8
+ - Admin Dashboard : <span style="color:yellow;">Limited Access</span>, contact me for more details.
9
+
10
+ ### Chatbot app
11
+
12
+ #### Technologies
13
+
14
+ - Python
15
+ - Langchain
16
+ - Streamlit
17
+ - Sahabat-AI (LLM)
18
+
19
+ #### Create virtual environment
20
+
21
+ ```bash
22
+ python -m venv venv
23
+ ```
24
+
25
+ #### Activate virtual environment
26
+
27
+ On windows
28
+
29
+ ```bash
30
+ .\venv\Scripts\activate
31
+
32
+ ```
33
+
34
+ On linux
35
+
36
+ ```bash
37
+ source venv/Scripts/activate
38
+ ```
39
+
40
+ #### Install dependencies
41
+
42
+ ```bash
43
+ pip install -r requirements.txt
44
+ ```
45
+
46
+ #### Setup env
47
+
48
+ Create env using env.example file
49
+
50
+ ```bash
51
+ cp .env.example .env
52
+ ```
53
+
54
+ Fill the variable with your own API keys from https://replicate.com/ and https://huggingface.co/
55
+
56
+ #### Running
57
+
58
+ ```bash
59
+ streamlit run app.py
60
+ ```
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from langsmith import traceable
5
+
6
+ from app.chat import initialize_session_state, display_chat_history
7
+ from app.data_loader import get_data, load_docs
8
+ from app.document_processor import process_documents, save_vector_store, load_vector_store
9
+ from app.prompts import sahabat_prompt
10
+ from langchain_community.llms import Replicate
11
+ from langchain.memory import ConversationBufferMemory
12
+ from langchain.chains import ConversationalRetrievalChain
13
+ from langchain_community.document_transformers import LongContextReorder
14
+
15
+ load_dotenv()
16
+
17
+ VECTOR_STORE_PATH = "vector_store_data"
18
+ DATA_DIR = "data"
19
+
20
+ @traceable(name="Create RAG Conversational Chain")
21
+ def create_conversational_chain(vector_store):
22
+ llm = Replicate(
23
+ model="fauziisyrinapridal/sahabat-ai-v1:afb9fa89fe786362f619fd4fef34bd1f7a4a4da23073d8a6fbf54dcbe458f216",
24
+ model_kwargs={"temperature": 0.1, "top_p": 0.9, "max_new_tokens": 6000}
25
+ )
26
+
27
+ memory = ConversationBufferMemory(
28
+ memory_key="chat_history",
29
+ return_messages=True,
30
+ output_key='answer'
31
+ )
32
+
33
+ chain = ConversationalRetrievalChain.from_llm(
34
+ llm,
35
+ retriever=vector_store.as_retriever(search_kwargs={"k": 6}),
36
+ combine_docs_chain_kwargs={"prompt": sahabat_prompt},
37
+ return_source_documents=True,
38
+ memory=memory
39
+ )
40
+
41
+ return chain
42
+
43
+ def reorder_embedding(docs):
44
+ reordering = LongContextReorder()
45
+ return reordering.transform_documents(docs)
46
+
47
+ def get_latest_data_timestamp(folder):
48
+ latest_time = 0
49
+ for root, _, files in os.walk(folder):
50
+ for file in files:
51
+ path = os.path.join(root, file)
52
+ file_time = os.path.getmtime(path)
53
+ latest_time = max(latest_time, file_time)
54
+ return latest_time
55
+
56
+ def vector_store_is_outdated():
57
+ if not os.path.exists(VECTOR_STORE_PATH):
58
+ return True
59
+ vector_store_time = os.path.getmtime(VECTOR_STORE_PATH)
60
+ data_time = get_latest_data_timestamp(DATA_DIR)
61
+ return data_time > vector_store_time
62
+
63
+ @traceable(name="Main Chatbot RAG App")
64
+ def main():
65
+ initialize_session_state()
66
+ get_data()
67
+
68
+ if len(st.session_state['history']) == 0:
69
+ if vector_store_is_outdated():
70
+ docs = load_docs()
71
+ reordered_docs = reorder_embedding(docs)
72
+ vector_store = process_documents(reordered_docs)
73
+ save_vector_store(vector_store)
74
+ else:
75
+ vector_store = load_vector_store()
76
+
77
+ st.session_state['vector_store'] = vector_store
78
+
79
+ if st.session_state['vector_store'] is not None:
80
+ chain = create_conversational_chain(st.session_state['vector_store'])
81
+ display_chat_history(chain)
82
+
83
+ if __name__ == "__main__":
84
+ main()
85
+
app/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # app/__init__.py
app/chat.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_chat import message
3
+ from streamlit_mic_recorder import speech_to_text
4
+ import base64
5
+ import gtts
6
+ from io import BytesIO
7
+
8
+ def initialize_session_state():
9
+ if 'history' not in st.session_state:
10
+ st.session_state['history'] = []
11
+ if 'generated' not in st.session_state:
12
+ st.session_state['generated'] = ["Halo! Saya bisa membantu anda menjawab pertanyaan seputar Politeknik Negeri Padang!"]
13
+ if 'past' not in st.session_state:
14
+ st.session_state['past'] = ["Hai! 👋"]
15
+ if 'data_len' not in st.session_state:
16
+ st.session_state['data_len'] = 0
17
+ if 'vector_store' not in st.session_state:
18
+ st.session_state['vector_store'] = None
19
+ if 'should_speak' not in st.session_state:
20
+ st.session_state['should_speak'] = True
21
+ if 'input_text' not in st.session_state:
22
+ st.session_state['input_text'] = ""
23
+
24
+ def text_to_speech(text):
25
+ tts = gtts.gTTS(text, lang="id")
26
+ audio_bytes = BytesIO()
27
+ tts.write_to_fp(audio_bytes)
28
+ audio_bytes.seek(0)
29
+ audio_base64 = base64.b64encode(audio_bytes.read()).decode()
30
+ audio_player = f"""
31
+ <audio autoplay>
32
+ <source src="data:audio/mp3;base64,{audio_base64}" type="audio/mp3">
33
+ </audio>
34
+ """
35
+ return audio_player
36
+
37
+ def conversation_chat(query, chain, history):
38
+ result = chain({"question": query, "chat_history": history})
39
+ history.append((query, result["answer"]))
40
+ return result["answer"]
41
+
42
+ def display_chat_history(chain):
43
+ reply_container = st.container()
44
+
45
+ # Chat input section (at the bottom, always)
46
+ col1, col2, col3 = st.columns([7, 1, 1])
47
+
48
+ with col2:
49
+ # Toggle Text-to-Speech (TTS) using an icon instead of visible checkbox
50
+ should_speak = st.session_state.get('should_speak', True)
51
+
52
+ # Handle manual icon toggle (using button instead of checkbox)
53
+ icon_label = "🔊" if should_speak else "🔇"
54
+ if st.button(icon_label, key="toggle_tts", help="Aktifkan/Nonaktifkan Text-to-Speech", use_container_width=True):
55
+ st.session_state['should_speak'] = not should_speak
56
+
57
+
58
+ with col3:
59
+ # Mic input
60
+ stt_text = speech_to_text(
61
+ start_prompt="🎤",
62
+ stop_prompt="🛑 Stop",
63
+ language='id',
64
+ just_once=True,
65
+ key='stt_input',
66
+ use_container_width=True,
67
+ )
68
+
69
+ with col1:
70
+ # Use chat_input so it's pinned and integrated better
71
+ user_input_obj = st.chat_input(
72
+ "Masukkan pertanyaan atau Tekan tombol mic untuk berbicara!",
73
+ key="chat_input_field"
74
+ )
75
+
76
+ # Jika ada hasil dari STT, masukkan ke input dan rerun
77
+ if stt_text:
78
+ st.session_state.input_text = stt_text
79
+ st.rerun()
80
+
81
+ user_input = user_input_obj or st.session_state.get("input_text", "")
82
+
83
+ if user_input:
84
+ with st.spinner('Sedang membuat jawaban...'):
85
+ output = conversation_chat(user_input, chain, st.session_state['history'])
86
+
87
+ st.session_state['past'].append(user_input)
88
+ st.session_state['generated'].append(output)
89
+ st.session_state.input_text = "" # Kosongkan input setelah kirim
90
+
91
+ if st.session_state['should_speak'] and output:
92
+ st.markdown(text_to_speech(output), unsafe_allow_html=True)
93
+
94
+ # Tampilkan riwayat chat
95
+ if st.session_state['generated']:
96
+ with reply_container:
97
+ for i in range(len(st.session_state['generated'])):
98
+ message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="no-avatar")
99
+ message(st.session_state["generated"][i], key=str(i), avatar_style="no-avatar")
100
+
101
+
app/config.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv()
5
+
6
+ class Config:
7
+ SUPABASE_URL = os.getenv('SUPABASE_URL')
8
+ SUPABASE_KEY = os.getenv('SUPABASE_KEY')
9
+ REPLICATE_API_TOKEN = os.getenv('REPLICATE_API_TOKEN')
app/data_loader.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from app.db import supabase
3
+ from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
4
+
5
+ def list_all_files(bucket_name, limit_per_page=1000):
6
+ all_files = []
7
+ offset = 0
8
+
9
+ while True:
10
+ try:
11
+ files = supabase.storage.from_(bucket_name).list("", {
12
+ "limit": limit_per_page,
13
+ "offset": offset
14
+ })
15
+ if not files:
16
+ break
17
+ all_files.extend(files)
18
+ offset += limit_per_page
19
+ except Exception as e:
20
+ print(f"Error fetching files with offset {offset}: {e}")
21
+ break
22
+
23
+ return all_files
24
+
25
+
26
+ def get_data():
27
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
28
+ data_dir = os.path.join(BASE_DIR, 'data')
29
+
30
+ if not os.path.exists(data_dir):
31
+ os.makedirs(data_dir)
32
+
33
+ try:
34
+ local_files = [f for f in os.listdir(data_dir) if os.path.isfile(os.path.join(data_dir, f))]
35
+ except Exception as e:
36
+ print(f"Error accessing local files: {e}")
37
+ return
38
+
39
+ try:
40
+ remote_files_info = list_all_files("pnp-bot-storage")
41
+ except Exception as e:
42
+ print(f"Error fetching files from Supabase: {e}")
43
+ return
44
+
45
+ remote_files = [f["name"] for f in remote_files_info]
46
+
47
+ # Sinkronisasi: hapus file yang tidak ada di storage
48
+ file_to_delete = list(set(local_files) - set(remote_files))
49
+ file_to_download = list(set(remote_files) - set(local_files))
50
+
51
+ for filename in file_to_delete:
52
+ try:
53
+ os.remove(os.path.join(data_dir, filename))
54
+ print(f"Removed: {filename}")
55
+ except Exception as e:
56
+ print(f"Error removing {filename}: {e}")
57
+
58
+ for filename in file_to_download:
59
+ try:
60
+ file_path = os.path.join(data_dir, filename)
61
+ res = supabase.storage.from_("pnp-bot-storage").download(filename)
62
+ with open(file_path, "wb") as f:
63
+ f.write(res)
64
+ print(f"Downloaded: {filename}")
65
+ except Exception as e:
66
+ print(f"Error downloading {filename}: {e}")
67
+
68
+
69
+ def load_docs():
70
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
71
+ data_dir = os.path.join(BASE_DIR, 'data')
72
+
73
+ if not os.path.exists(data_dir):
74
+ print(f"Directory not found: {data_dir}")
75
+ os.makedirs(data_dir)
76
+ print(f"Created directory: {data_dir}")
77
+ return []
78
+
79
+ documents = []
80
+
81
+ try:
82
+ files = os.listdir(data_dir)
83
+ except PermissionError:
84
+ print(f"Permission denied: {data_dir}")
85
+ return []
86
+
87
+ for file in files:
88
+ file_path = os.path.join(data_dir, file)
89
+ if file.endswith(".pdf"):
90
+ try:
91
+ loader = PyPDFLoader(file_path)
92
+ documents.extend(loader.load())
93
+ except Exception as e:
94
+ print(f"Error loading PDF file {file}: {e}")
95
+ elif file.endswith('.docx') or file.endswith('.doc'):
96
+ try:
97
+ loader = Docx2txtLoader(file_path)
98
+ documents.extend(loader.load())
99
+ except Exception as e:
100
+ print(f"Error loading DOCX/DOC file {file}: {e}")
101
+ elif file.endswith('.txt'):
102
+ try:
103
+ loader = TextLoader(file_path, encoding='utf-8', autodetect_encoding=True)
104
+ documents.extend(loader.load())
105
+ except Exception as e:
106
+ print(f"Error loading TXT file {file}: {e}")
107
+
108
+ return documents
app/db.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from supabase import create_client
2
+ from app.config import Config
3
+
4
+ supabase = create_client(Config.SUPABASE_URL, Config.SUPABASE_KEY)
app/document_processor.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
2
+ from langchain_huggingface import HuggingFaceEmbeddings
3
+ from langchain_community.vectorstores import FAISS
4
+ import os
5
+
6
+ VECTOR_STORE_PATH = "vector_store_data"
7
+
8
+ def save_vector_store(vector_store):
9
+ """Simpan vector store ke file."""
10
+ vector_store.save_local(VECTOR_STORE_PATH)
11
+ print(f"Vector store saved to {VECTOR_STORE_PATH}")
12
+
13
+ def load_vector_store():
14
+ """Muat vector store dari file, atau return None kalau file tidak ada."""
15
+ if os.path.exists(VECTOR_STORE_PATH):
16
+ embeddings = HuggingFaceEmbeddings(
17
+ model_name="LazarusNLP/all-indo-e5-small-v4",
18
+ model_kwargs={"device": "cpu"},
19
+ encode_kwargs={"normalize_embeddings": True}
20
+ )
21
+ vector_store = FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True)
22
+ print(f"Vector store loaded from {VECTOR_STORE_PATH}")
23
+ return vector_store
24
+ else:
25
+ print("Vector store file not found.")
26
+ return None
27
+
28
+
29
+ def process_documents(docs):
30
+ embeddings = HuggingFaceEmbeddings(
31
+ model_name="LazarusNLP/all-indo-e5-small-v4",
32
+ model_kwargs={"device": "cpu"},
33
+ encode_kwargs={"normalize_embeddings": True}
34
+ )
35
+
36
+ text_splitter = RecursiveCharacterTextSplitter(
37
+ chunk_size=1500,
38
+ chunk_overlap=300
39
+ )
40
+ text_chunks = text_splitter.split_documents(docs)
41
+ vector_store = FAISS.from_documents(text_chunks, embeddings)
42
+
43
+ return vector_store
44
+
app/prompts.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts import PromptTemplate
2
+
3
+ prompt = """<|begin_of_text|>
4
+ <|start_header_id|>system<|end_header_id|>
5
+ Kamu adalah asisten dari Politeknik Negeri Padang.
6
+ Tugasmu adalah menjawab pertanyaan berdasarkan konteks dokumen yang diberikan oleh pengguna.
7
+ Jika pengguna bertanya di luar topik dokumen, jangan tanggapi.
8
+ Jika konteks yang diberikan tidak cukup untuk menjawab pertanyaan, katakan bahwa kamu tidak memiliki jawabannya.
9
+ Jawablah menggunakan bahasa yang sama dengan yang digunakan pengguna seperti Bahasa Indonesia, Bahasa Jawa, Bahasa Minang, Bahasa Sunda, atau Bahasa Inggris.
10
+ Berikan jawaban jelas dan terstruktur
11
+ <|eot_id|>
12
+ <|start_header_id|>user<|end_header_id|>
13
+ Jawablah pertanyaan pengguna berdasarkan konteks berikut:
14
+ Konteks: {context}
15
+ Pertanyaan: {question}
16
+
17
+ <|eot_id|>
18
+ <|start_header_id|>assistant<|end_header_id|>
19
+ """
20
+
21
+ sahabat_prompt = PromptTemplate(
22
+ template=prompt,
23
+ input_variables=["context", "question"]
24
+ )
rag_eval.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.9.5
2
+ aiosignal==1.3.1
3
+ altair==4.0.0
4
+ anyio==3.7.1
5
+ attrs==23.2.0
6
+ beautifulsoup4==4.12.3
7
+ blinker==1.7.0
8
+ bs4==0.0.2
9
+ cachetools==5.3.3
10
+ certifi==2024.2.2
11
+ charset-normalizer==3.3.2
12
+ click==8.1.7
13
+ colorama==0.4.6
14
+ dataclasses-json==0.5.14
15
+ distro==1.9.0
16
+ entrypoints==0.4
17
+ faiss-cpu==1.7.4
18
+ filelock==3.13.4
19
+ frozenlist==1.4.1
20
+ fsspec==2024.3.1
21
+ gitdb==4.0.11
22
+ GitPython==3.1.43
23
+ greenlet==3.0.3
24
+ h11==0.14.0
25
+ httpcore==1.0.5
26
+ httpx==0.27.0
27
+ huggingface-hub==0.14.1
28
+ idna==3.7
29
+ importlib_metadata==7.1.0
30
+ InstructorEmbedding==1.0.1
31
+ Jinja2==3.1.3
32
+ joblib==1.4.0
33
+ jsonpatch==1.33
34
+ jsonpointer==2.4
35
+ jsonschema==4.21.1
36
+ jsonschema-specifications==2023.12.1
37
+ langchain==0.1.17
38
+ langchain-community
39
+ langchain-core==0.1.52
40
+ langchain-text-splitters==0.0.1
41
+ langsmith==0.1.55
42
+ markdown-it-py==3.0.0
43
+ MarkupSafe==2.1.5
44
+ marshmallow==3.21.1
45
+ mdurl==0.1.2
46
+ mpmath==1.3.0
47
+ multidict==6.0.5
48
+ mypy-extensions==1.0.0
49
+ networkx==3.3
50
+ nltk==3.8.1
51
+ numexpr==2.10.0
52
+ numpy==1.26.4
53
+ openai==1.26.0
54
+ openapi-schema-pydantic==1.2.4
55
+ orjson==3.10.3
56
+ packaging==23.2
57
+ pandas==2.2.2
58
+ pillow==10.3.0
59
+ protobuf==3.20.3
60
+ pyarrow==16.0.0
61
+ pydantic==1.10.15
62
+ pydeck==0.8.1b0
63
+ Pygments==2.17.2
64
+ Pympler==1.0.1
65
+ pypdf==4.2.0
66
+ PyPDF2==3.0.1
67
+ python-dateutil==2.9.0.post0
68
+ python-dotenv==1.0.0
69
+ pytz==2024.1
70
+ PyYAML==6.0.1
71
+ referencing==0.34.0
72
+ regex==2024.4.16
73
+ replicate==0.25.2
74
+ requests==2.31.0
75
+ rich==13.7.1
76
+ rpds-py==0.18.0
77
+ safetensors==0.4.3
78
+ scikit-learn==1.4.2
79
+ scipy==1.13.0
80
+ semver==3.0.2
81
+ sentence-transformers==2.2.2
82
+ sentencepiece==0.2.0
83
+ six==1.16.0
84
+ smmap==5.0.1
85
+ sniffio==1.3.1
86
+ soupsieve==2.5
87
+ SQLAlchemy==2.0.29
88
+ streamlit==1.34.0
89
+ streamlit-chat==0.1.1
90
+ sympy==1.12
91
+ tenacity==8.2.3
92
+ threadpoolctl==3.4.0
93
+ tiktoken==0.4.0
94
+ tokenizers==0.13.3
95
+ toml==0.10.2
96
+ toolz==0.12.1
97
+ torch==2.2.2
98
+ torchvision==0.17.2
99
+ tornado==6.4
100
+ tqdm==4.66.2
101
+ transformers==4.45.0
102
+ typing-inspect==0.9.0
103
+ typing_extensions==4.11.0
104
+ tzdata==2024.1
105
+ tzlocal==5.2
106
+ urllib3==2.2.1
107
+ validators==0.28.1
108
+ watchdog==4.0.0
109
+ yarl==1.9.4
110
+ zipp==3.18.1
111
+ supabase
112
+ docx2txt
113
+ soundfile
114
+ SpeechRecognition
115
+ chardet
116
+ streamlit_mic_recorder
117
+ gtts
tests/__init__.py ADDED
File without changes
tests/test_config.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import os
2
+ from app.config import Config
3
+
4
+ def test_config_load():
5
+ assert Config.SUPABASE_URL is not None
6
+ assert Config.SUPABASE_KEY is not None
7
+ assert Config.REPLICATE_API_TOKEN is not None
tests/test_db.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from app.db import supabase
2
+
3
+ def test_supabase_connection():
4
+ #check conn
5
+ assert supabase is not None