import os import gradio as gr import requests from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings from langchain.document_loaders import UnstructuredHTMLLoader from langchain.text_splitter import CharacterTextSplitter # API Key 從環境變數讀取 GROQ_API_KEY = os.environ.get("GROQ_API_KEY") GROQ_MODEL = "llama3-70b-8192" #GROQ_MODEL = "gemma2-9b-it" embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-base-zh") text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100) def build_vector_db_from_local_html(folder_path="data"): all_docs = [] for filename in os.listdir(folder_path): if filename.endswith(".html"): loader = UnstructuredHTMLLoader(os.path.join(folder_path, filename)) docs = loader.load() chunks = text_splitter.split_documents(docs) all_docs.extend(chunks) db = FAISS.from_documents(all_docs, embedding_model) return db db = build_vector_db_from_local_html() def rag_chat(user_input): docs = db.similarity_search(user_input, k=3) context = "\n\n".join([doc.page_content for doc in docs]) messages = [ {"role": "system", "content": "你是一個親切的諮詢師,多和使用者聊天,且幫助使用者了解數位性暴力並提供協助。請使用繁體中文回答。"}, {"role": "user", "content": f"以下是相關資料:\n{context}\n\n請回答這個問題:{user_input}"} ] headers = {"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"} payload = {"model": GROQ_MODEL, "messages": messages} try: res = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=payload) res.raise_for_status() return res.json()["choices"][0]["message"]["content"] except Exception as e: return f"錯誤:{str(e)}" iface = gr.Interface(fn=rag_chat, inputs=gr.Textbox(label="輸入你的問題", placeholder="請輸入問題...", lines=3), outputs="text", title="AI諮詢機器人", description="詢問我關於數位性暴力的事情,或者你遇到甚麼困境?") iface.launch()