|
import os |
|
import gradio as gr |
|
import requests |
|
from langchain.vectorstores import FAISS |
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
from langchain.document_loaders import UnstructuredHTMLLoader |
|
from langchain.text_splitter import CharacterTextSplitter |
|
|
|
|
|
GROQ_API_KEY = os.environ.get("GROQ_API_KEY") |
|
GROQ_MODEL = "llama3-70b-8192" |
|
|
|
|
|
embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-base-zh") |
|
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100) |
|
|
|
def build_vector_db_from_local_html(folder_path="data"): |
|
all_docs = [] |
|
for filename in os.listdir(folder_path): |
|
if filename.endswith(".html"): |
|
loader = UnstructuredHTMLLoader(os.path.join(folder_path, filename)) |
|
docs = loader.load() |
|
chunks = text_splitter.split_documents(docs) |
|
all_docs.extend(chunks) |
|
db = FAISS.from_documents(all_docs, embedding_model) |
|
return db |
|
|
|
db = build_vector_db_from_local_html() |
|
|
|
def rag_chat(user_input): |
|
docs = db.similarity_search(user_input, k=3) |
|
context = "\n\n".join([doc.page_content for doc in docs]) |
|
messages = [ |
|
{"role": "system", "content": "你是一個親切的諮詢師,多和使用者聊天,且幫助使用者了解數位性暴力並提供協助。請使用繁體中文回答。"}, |
|
{"role": "user", "content": f"以下是相關資料:\n{context}\n\n請回答這個問題:{user_input}"} |
|
] |
|
headers = {"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"} |
|
payload = {"model": GROQ_MODEL, "messages": messages} |
|
try: |
|
res = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=payload) |
|
res.raise_for_status() |
|
return res.json()["choices"][0]["message"]["content"] |
|
except Exception as e: |
|
return f"錯誤:{str(e)}" |
|
|
|
iface = gr.Interface(fn=rag_chat, |
|
inputs=gr.Textbox(label="輸入你的問題", placeholder="請輸入問題...", lines=3), |
|
outputs="text", |
|
title="AI諮詢機器人", |
|
description="詢問我關於數位性暴力的事情,或者你遇到甚麼困境?") |
|
|
|
iface.launch() |
|
|