Linhz commited on
Commit
3a940c5
·
verified ·
1 Parent(s): d9102e7

Rename App3.py to app.py

Browse files
Files changed (1) hide show
  1. App3.py → app.py +60 -61
App3.py → app.py RENAMED
@@ -1,61 +1,60 @@
1
- from openai import OpenAI
2
- import streamlit as st
3
- import faiss
4
- from sentence_transformers import SentenceTransformer
5
- import pickle
6
- import re
7
- from transformers import pipeline
8
-
9
-
10
-
11
-
12
- st.title("Vietnamese Legal Question Answering System")
13
-
14
- with open('articles.pkl', 'rb') as file:
15
- articles = pickle.load(file)
16
-
17
- index_loaded = faiss.read_index("sentence_embeddings_index_no_citation.faiss")
18
-
19
- if 'model_embedding' not in st.session_state:
20
- print("ERROR")
21
- st.session_state.model_embedding = SentenceTransformer('bkai-foundation-models/vietnamese-bi-encoder')
22
-
23
-
24
-
25
- # Replace this with your own checkpoint
26
- model_checkpoint = "model"
27
- question_answerer = pipeline("question-answering", model=model_checkpoint)
28
- def question_answering(question):
29
- print(question)
30
- query_sentence = [question]
31
- query_embedding = st.session_state.model_embedding.encode(query_sentence)
32
- k = 5
33
- D, I = index_loaded.search(query_embedding.astype('float32'), k) # D is distances, I is indices
34
- answer = [question_answerer(question=query_sentence[0], context=articles[I[0][i]], max_answer_len = 512) for i in range(k)]
35
- best_answer = max(answer, key=lambda x: x['score'])
36
- print(best_answer['answer'])
37
- if best_answer['score'] > 0.5:
38
- return best_answer['answer']
39
- return f"Tôi không chắc lắm nhưng có lẽ câu trả lời là: {best_answer['answer']}"
40
-
41
- if "messages" not in st.session_state:
42
- st.session_state.messages = []
43
-
44
- for message in st.session_state.messages:
45
- with st.chat_message(message["role"]):
46
- st.markdown(message["content"])
47
-
48
-
49
- def clean_answer(s):
50
- # Sử dụng regex để loại bỏ tất cả các ký tự đặc biệt ở cuối chuỗi
51
- return re.sub(r'[^a-zA-Z0-9]+$', '', s)
52
-
53
- if prompt := st.chat_input("What is up?"):
54
- st.session_state.messages.append({"role": "user", "content": prompt})
55
- with st.chat_message("user"):
56
- st.markdown(prompt)
57
- response = clean_answer(question_answering(prompt))
58
- with st.chat_message("assistant"):
59
- st.markdown(response)
60
-
61
- st.session_state.messages.append({"role": "assistant", "content": response})
 
1
+ import streamlit as st
2
+ import faiss
3
+ from sentence_transformers import SentenceTransformer
4
+ import pickle
5
+ import re
6
+ from transformers import pipeline
7
+
8
+
9
+
10
+
11
+ st.title("Vietnamese Legal Question Answering System")
12
+
13
+ with open('articles.pkl', 'rb') as file:
14
+ articles = pickle.load(file)
15
+
16
+ index_loaded = faiss.read_index("sentence_embeddings_index_no_citation.faiss")
17
+
18
+ if 'model_embedding' not in st.session_state:
19
+ print("ERROR")
20
+ st.session_state.model_embedding = SentenceTransformer('bkai-foundation-models/vietnamese-bi-encoder')
21
+
22
+
23
+
24
+ # Replace this with your own checkpoint
25
+ model_checkpoint = "model"
26
+ question_answerer = pipeline("question-answering", model=model_checkpoint)
27
+ def question_answering(question):
28
+ print(question)
29
+ query_sentence = [question]
30
+ query_embedding = st.session_state.model_embedding.encode(query_sentence)
31
+ k = 5
32
+ D, I = index_loaded.search(query_embedding.astype('float32'), k) # D is distances, I is indices
33
+ answer = [question_answerer(question=query_sentence[0], context=articles[I[0][i]], max_answer_len = 512) for i in range(k)]
34
+ best_answer = max(answer, key=lambda x: x['score'])
35
+ print(best_answer['answer'])
36
+ if best_answer['score'] > 0.5:
37
+ return best_answer['answer']
38
+ return f"Tôi không chắc lắm nhưng có lẽ câu trả lời là: {best_answer['answer']}"
39
+
40
+ if "messages" not in st.session_state:
41
+ st.session_state.messages = []
42
+
43
+ for message in st.session_state.messages:
44
+ with st.chat_message(message["role"]):
45
+ st.markdown(message["content"])
46
+
47
+
48
+ def clean_answer(s):
49
+ # Sử dụng regex để loại bỏ tất cả các ký tự đặc biệt ở cuối chuỗi
50
+ return re.sub(r'[^a-zA-Z0-9]+$', '', s)
51
+
52
+ if prompt := st.chat_input("What is up?"):
53
+ st.session_state.messages.append({"role": "user", "content": prompt})
54
+ with st.chat_message("user"):
55
+ st.markdown(prompt)
56
+ response = clean_answer(question_answering(prompt))
57
+ with st.chat_message("assistant"):
58
+ st.markdown(response)
59
+
60
+ st.session_state.messages.append({"role": "assistant", "content": response})