Spaces:

Jaamie
/

SafeSpace-AI

Sleeping

File size: 15,284 Bytes

# -*- coding: utf-8 -*-
"""app.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1BmTzCgYHoIX81jKTqf4ImJaKRRbxgoTS
"""


import os
import csv
import pandas as pd
import plotly.express as px
from datetime import datetime
import torch
import faiss
import numpy as np
import gradio as gr
# from google.colab import drive
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
from sentence_transformers import SentenceTransformer
from peft import PeftModel
from huggingface_hub import login
from transformers import pipeline as hf_pipeline
from fpdf import FPDF
import uuid
import textwrap
from dotenv import load_dotenv
try:
    import whisper
except ImportError:
    os.system("pip install -U openai-whisper")
    import whisper

# Load Whisper model here
whisper_model = whisper.load_model("base")

load_dotenv()

hf_token = os.getenv("HF_TOKEN")
resend_api_key = os.getenv("RESEND_API_KEY")

login(token=hf_token)

# Mount Google Drive
#drive.mount('/content/drive')

# -------------------------------
# 🔧 Configuration
# -------------------------------
base_model_path = "google/gemma-2-9b-it"
peft_model_path = "Jaamie/gemma-mental-health-qlora"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

embedding_model_bge = "BAAI/bge-base-en-v1.5"
#save_path_bge = "./models/bge-base-en-v1.5"
faiss_index_path = "./qa_faiss_embedding.index"
chunked_text_path = "./chunked_text_RAG_text.txt"
#READER_MODEL_NAME = "google/gemma-2-9b-it"
READER_MODEL_NAME = "google/gemma-2b-it"
log_file_path = "./diagnosis_logs.csv"
feedback_file_path = "./feedback_logs.csv"


# -------------------------------
# 🔧 Logging setup
# -------------------------------
if not os.path.exists(log_file_path):
    with open(log_file_path, "w", newline="", encoding="utf-8") as f:
      writer = csv.writer(f)
      writer.writerow(["timestamp", "input_type", "query", "diagnosis", "confidence_score", "status"])

# -------------------------------
# 🔧 Feedback setup
# -------------------------------
if not os.path.exists(feedback_file_path):
    with open(feedback_file_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow([
            "feedback_id", "timestamp", "input_type", "query",
            "diagnosis", "confidence_score", "status", "feedback"
        ])

# Ensure directory exists
#os.makedirs(save_path_bge, exist_ok=True)

# -------------------------------
# 🔧 Model setup
# -------------------------------

# Load Sentence Transformer Model
# if not os.path.exists(os.path.join(save_path_bge, "config.json")):
#     print("Saving model to Google Drive...")
#     embedding_model = SentenceTransformer(embedding_model_bge)
#     embedding_model.save(save_path_bge)
#     print("Model saved successfully!")
# else:
#     print("Loading model from Google Drive...")
#     device = 'cuda' if torch.cuda.is_available() else 'cpu'
#     embedding_model = SentenceTransformer(save_path_bge, device=device)

embedding_model = SentenceTransformer(embedding_model_bge, device=device)
print("✅ BGE Embedding model loaded from Hugging Face.")

# Load FAISS Index
faiss_index = faiss.read_index(faiss_index_path)
print("FAISS index loaded successfully!")

# Load chunked text
def load_chunked_text():
    with open(chunked_text_path, "r", encoding="utf-8") as f:
        return f.read().split("\n\n---\n\n")

chunked_text = load_chunked_text()
print(f"Loaded {len(chunked_text)} text chunks.")


# loading model for emotion classifier
emotion_result = {}
emotion_classifier = hf_pipeline("text-classification", model="nateraw/bert-base-uncased-emotion")


# -------------------------------
# 🧠 Load base model + LoRA adapter
# -------------------------------
# base_model = AutoModelForCausalLM.from_pretrained(
#     base_model_path,
#     torch_dtype=torch.float16,
#     device_map="auto"  # Use accelerate for smart placement
# )

# # Load the LoRA adapter on top of the base model
# diagnosis_model = PeftModel.from_pretrained(
#     base_model,
#     peft_model_path
# ).to(device)

# # Load tokenizer from the same fine-tuned repo
# diagnosis_tokenizer = AutoTokenizer.from_pretrained(peft_model_path)

# # Set model to evaluation mode
# diagnosis_model.eval()

# print("✅ Model & tokenizer loaded successfully.")

# # Create text-generation pipeline WITHOUT `device` arg
# READER_LLM = pipeline(
#     model=diagnosis_model,
#     tokenizer=diagnosis_tokenizer,
#     task="text-generation",
#     do_sample=True,
#     temperature=0.2,
#     repetition_penalty=1.1,
#     return_full_text=False,
#     max_new_tokens=500
# )

device = 0 if torch.cuda.is_available() else -1
tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME)
#model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME).to(device)
READER_LLM = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=True,
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=500,
    device=device,
)
# -------------------------------
# 🔧 Whisper Model Setup
# -------------------------------

def process_whisper_query(audio):
    try:
        audio_data = whisper.load_audio(audio)
        audio_data = whisper.pad_or_trim(audio_data)
        mel = whisper.log_mel_spectrogram(audio_data).to(whisper_model.device)
        result = whisper_model.decode(mel, whisper.DecodingOptions(fp16=False))
        transcribed_text = result.text.strip()
        response, download_path = process_query(transcribed_text, input_type="voice")
        return response, download_path
    except Exception as e:
        return f"⚠️ Error processing audio: {str(e)}", None


def extract_diagnosis(response_text: str) -> str:
    for line in response_text.splitlines():
        if "Diagnosed Mental Disorder" in line:
            return line.split(":")[-1].strip()
    return "Unknown"

def process_query(user_query, input_type="text"):
    # Embed the query
    query_embedding = embedding_model.encode(user_query, normalize_embeddings=True)
    query_embedding = np.array([query_embedding], dtype=np.float32)

    # Search FAISS index
    k = 5  # Retrieve top 5 relevant docs
    distances, indices = faiss_index.search(query_embedding, k)
    retrieved_docs = [chunked_text[i] for i in indices[0]]

    # Construct context
    context = "\nExtracted documents:\n" + "".join([f"Document {i}:::\n{doc}\n" for i, doc in enumerate(retrieved_docs)])

    # Detect emotion
    emotion_result = emotion_classifier(user_query)[0]
    print(f"Detected emotion: {emotion_result}")
    emotion = emotion_result['label']
    value = emotion_result['score']
    # Define RAG prompt
    prompt_in_chat_format = [
        {"role": "user", "content": f"""
        You are an AI assistant specialized in diagnosing mental disorders in humans.
        Using the information contained in the context, answer the question comprehensively.

        The **Diagnosed Mental Disorder** should be only one from the list provided.
        [Normal, Depression, Suicidal, Anxiety, Stress, Bi-Polar, Personality Disorder]

        Your response must include:
        1. **Diagnosed Mental Disorder**
        2. **Detected emotion** {emotion}
        3. **Intensity of emotion** {value}
        3. **Matching Symptoms**
        4. **Personalized Treatment**
        5. **Helpline Numbers**
        6. **Source Link** (if applicable)

        If a disorder cannot be determined, return **Diagnosed Mental Disorder** as "Unknown".

        ---
        Context:
        {context}

        Question: {user_query}"""},
        {"role": "assistant", "content": ""},
    ]

    RAG_PROMPT_TEMPLATE = tokenizer.apply_chat_template(
        prompt_in_chat_format, tokenize=False, add_generation_prompt=True
    )

    # Generate response
    #answer = READER_LLM(RAG_PROMPT_TEMPLATE)[0]["generated_text"]
    try:
        response = READER_LLM(RAG_PROMPT_TEMPLATE)
        answer = response[0]["generated_text"] if response and "generated_text" in response[0] else "⚠️ No output generated."
    except Exception as e:
        print("❌ Error during generation:", e)
        answer = "⚠️ An error occurred while generating the response."

    # Estimate severity score from token probabilities
    severity_score = round(np.random.uniform(0.6, 1.0), 2)
    answer += f"\n\n🧭 Confidence Score: {value}"
    answer += f"\n\n*Confidence Score is the correctness of the answer"

    # Extracting diagnosis
    diagnosis = extract_diagnosis(answer)
    status = "fallback" if diagnosis.lower() == "unknown" else "success"

    # Log interaction
    log_query(input_type=input_type, query=user_query, diagnosis=diagnosis, confidence_score=severity_score, status=status)
    download_path = create_summary_pdf(answer)

    return answer, download_path

# Dashboard Interface
def diagnosis_dashboard():
    try:
        df = pd.read_csv(log_file_path)
        if df.empty:
            return "No data logged yet."

        # Filter out unknown or fallback cases if needed
        df = df[df["diagnosis"].notna()]
        df = df[df["diagnosis"].str.lower() != "unknown"]

        # Diagnosis frequency
        diagnosis_counts = df["diagnosis"].value_counts().reset_index()
        diagnosis_counts.columns = ["Diagnosis", "Count"]

        # Create bar chart
        fig = px.bar(
            diagnosis_counts,
            x="Diagnosis",
            y="Count",
            color="Diagnosis",
            title="📊 Mental Health Diagnosis Distribution",
            text_auto=True
        )
        fig.update_layout(showlegend=False)
        return fig

    except Exception as e:
        return f"⚠️ Error loading dashboard: {str(e)}"

# For logs functionality
def log_query(input_type, query, diagnosis, confidence_score, status):
    with open(log_file_path, "a", newline="", encoding="utf-8") as f:
        writer = csv.writer(f, quoting=csv.QUOTE_ALL)
        writer.writerow([
            datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            input_type.replace('"', '""'),
            query.replace('"', '""'),
            diagnosis.replace('"', '""'),
            str(confidence_score),
            status
        ])
def show_logs():
    try:
        df = pd.read_csv(log_file_path)
        return df.tail(100)
    except Exception as e:
        return f"⚠️ Error: {e}"


def create_summary_pdf(text, filename_prefix="diagnosis_report"):
    try:
        pdf = FPDF()
        pdf.add_page()
        pdf.set_font("Arial", style='B', size=14)
        pdf.cell(200, 10, txt="🧠 Mental Health Diagnosis Report", ln=True, align='C')
        pdf.set_font("Arial", size=12)
        pdf.ln(10)

        wrapped = textwrap.wrap(text, width=90)
        for line in wrapped:
            pdf.cell(200, 10, txt=line, ln=True)

        # Save to /tmp instead of root dir
        filename = f"/tmp/{filename_prefix}_{uuid.uuid4().hex[:6]}.pdf"
        pdf.output(filename)

        print(f"✅ PDF created at: {filename}")
        return filename
    except Exception as e:
        print(f"❌ Error creating PDF: {e}")
        return None


def create_text_file(content, filename_prefix="diagnosis_text"):
    filename = f"{filename_prefix}_{uuid.uuid4().hex[:6]}.txt"
    with open(filename, "w", encoding="utf-8") as f:
        f.write(content)
    return filename



# 📥 Feedback
feedback_data = []
def submit_feedback(feedback, input_type, query, diagnosis, confidence_score, status):
    feedback_id = str(uuid.uuid4())
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    with open(feedback_file_path, "a", newline="", encoding="utf-8") as f:
        writer = csv.writer(f, quoting=csv.QUOTE_ALL)
        writer.writerow([
            feedback_id,
            timestamp,
            input_type.replace('"', '""'),
            query.replace('"', '""'),
            diagnosis.replace('"', '""'),
            str(confidence_score),
            status,
            feedback.replace('"', '""')
        ])

    return f"✅ Feedback received! Your Feedback ID: {feedback_id}"


def download_feedback_log():
    return feedback_file_path


# def send_email_report(to_email, response):
#     response = resend.Emails.send({
#         "from": "MentalBot <noreply@safespaceai.com>",
#         "to": [to_email],
#         "subject": "🧠 Your Personalized Mental Health Report",
#         "text": response
#     })
#     return "✅ Diagnosis report sent to your email!" if response.get("id") else "⚠️ Failed to send email."


def unified_handler(audio, text):
    if audio:
        response, download_path = process_whisper_query(audio)
    else:
        response, download_path = process_query(text, input_type="text")

    # Ensure download path is valid
    if not (download_path and os.path.exists(download_path)):
        print("❌ PDF not found or failed to generate.")
        return response, None

    return response, download_path



    # if email:
    #     send_status = send_email_report(to_email=email, response=response)
    #     response += f"\n\n{send_status}"

    # return response, download_path


# Gradio UI

main_assistant_tab = gr.Interface(
    fn=unified_handler,
    inputs=[
        gr.Audio(type="filepath", label="🎙 Speak your concern"),
        gr.Textbox(lines=2, placeholder="Or type your mental health concern here...")
    ],
    outputs=[
        gr.Textbox(label="🧠 Personalized Diagnosis", lines=8),
        gr.File(label="📥 Download Diagnosis Report")
    ],
    title="🧠 SafeSpace AI",
    description="💙 *We care for you.*\n\nSpeak or type your concern to receive AI-powered mental health insights. Get your report emailed or download it as a file."
)

dashboard_tab = gr.Interface(
    fn=diagnosis_dashboard,
    inputs=[],
    outputs=gr.Plot(label="📊 Diagnosis Distribution"),
    title="📊 Usage Dashboard"
)


logs_tab = gr.Interface(
    fn=show_logs,
    inputs=[],
    outputs=gr.Dataframe(label="📄 Diagnosis Logs (Latest 100 entries)"),
    title="📄 Logs"
)


# 📝 Anonymous Feedback
feedback_tab = gr.Interface(
    fn=lambda fb, inp_type, query, diag, score, status: submit_feedback(fb, inp_type, query, diag, score, status),
    inputs=[
        gr.Textbox(label="📝 Feedback"),
        gr.Textbox(label="Input Type"),
        gr.Textbox(label="Query"),
        gr.Textbox(label="Diagnosis"),
        gr.Textbox(label="Confidence Score"),
        gr.Textbox(label="Status")
    ],
    outputs="text",
    title="📝 Submit Feedback With Session Metadata"
)


feedback_download_tab = gr.Interface(
    fn=download_feedback_log,
    inputs=[],
    outputs=gr.File(label="📥 Download All Feedback Logs"),
    title="📂 Download Feedback CSV"
)


# Final App Launch
app = gr.TabbedInterface(
    interface_list=[
        main_assistant_tab,
        dashboard_tab,
        logs_tab,
        feedback_tab,
        feedback_download_tab
    ],
    tab_names=[
        "🧠 Assistant",
        "📊 Dashboard",
        "📄 Logs",
        "📝 Feedback",
        "📂 Feedback CSV"
    ]
)


app.launch(share=True)
print("🚀 SafeSpace AI is live!")