Eid_chat / app.py
Hafiza Maham
updat app file
c653454
# Eid code
from flask import Flask, request, jsonify
from flask_cors import CORS
import os
import json
import re
from sentence_transformers import SentenceTransformer, CrossEncoder, util
import torch
from typing import List, Dict
import random
import datetime
from fuzzywuzzy import fuzz
app = Flask(__name__) # Fixed: __name__ instead of _name_
CORS(app)
class EnhancedMultilingualEidQABot:
def __init__(self, data_file='dataSet.json'): # Fixed: __init__ instead of _init_
print("\U0001F504 Loading multilingual models...")
self.bi_encoder = None
self.cross_encoder = None
print("\U0001F4D6 Processing dataset...")
self.data = self._load_dataset(data_file)
self.knowledge_chunks = self._create_chunks()
self.chunk_embeddings = None
self.question_patterns = self._initialize_question_patterns()
print("\u2705 Bot ready!\n")
def _ensure_embeddings(self):
if self.chunk_embeddings is None:
self._load_models()
print("\U0001F9E0 Creating embeddings...")
self.chunk_embeddings = self.bi_encoder.encode(
[chunk['text'] for chunk in self.knowledge_chunks],
convert_to_tensor=True,
show_progress_bar=True
)
def _load_dataset(self, data_file):
try:
with open(data_file, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception as e:
print(f"Error loading dataset: {e}")
return []
def _create_chunks(self):
chunks = []
for item in self.data:
text = item['text']
tag = item.get('tag', 'General')
chunks.append({
'text': text,
'tag': tag,
'type': 'original',
'score_boost': 1.0
})
if 'eid' in text.lower() or 'عید' in text:
chunks.append({'text': f"Eid info: {text}", 'tag': tag, 'type': 'enhanced', 'score_boost': 1.1})
if 'prayer' in text.lower() or 'نماز' in text:
chunks.append({'text': f"Prayer info: {text}", 'tag': tag, 'type': 'enhanced', 'score_boost': 1.2})
if 'qurbani' in text.lower() or 'قربانی' in text or 'sacrifice' in text.lower():
chunks.append({'text': f"Qurbani info: {text}", 'tag': tag, 'type': 'enhanced', 'score_boost': 1.2})
if 'funny' in tag.lower() or 'shair' in tag.lower():
chunks.append({'text': f"Fun: {text}", 'tag': tag, 'type': 'enhanced', 'score_boost': 0.9})
if 'gaza' in text.lower() or 'غزہ' in text:
chunks.append({'text': f"Gaza info: {text}", 'tag': tag, 'type': 'enhanced', 'score_boost': 1.3})
return chunks
def _load_models(self):
if self.bi_encoder is None:
print("\U0001F504 Loading bi-encoder...")
self.bi_encoder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
if self.cross_encoder is None:
print("\U0001F504 Loading cross-encoder...")
self.cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
def _initialize_question_patterns(self):
tag_keywords = {}
for chunk in self.data:
tag = chunk.get("tag", "").lower()
if tag not in tag_keywords:
tag_keywords[tag] = set()
tag_keywords[tag].update(tag.replace('_', ' ').split())
# Heuristics
if "greeting" in tag:
tag_keywords[tag].update(["hi", "hello", "salaam", "eid mubarak", "السلام"])
elif "prayer" in tag:
tag_keywords[tag].update(["prayer", "namaz", "salah", "نماز"])
elif "qurbani" in tag or "sacrifice" in tag:
tag_keywords[tag].update(["qurbani", "sacrifice", "janwar", "bakra", "قربانی"])
elif "gaza" in tag:
tag_keywords[tag].update(["gaza", "غزہ", "palestine", "فلسطین"])
return {k: list(v) for k, v in tag_keywords.items()}
def _clean_input(self, text: str) -> str:
text = re.sub(r'\s+', ' ', text.strip().lower())
text = re.sub(r'[^\w\s؟!]', '', text)
return text
def _fuzzy_match(self, word: str, keywords: List[str]) -> bool:
return any(fuzz.ratio(word, keyword) > 80 for keyword in keywords)
def _detect_question_type(self, question: str) -> str:
cleaned_question = self._clean_input(question)
words = cleaned_question.split()
for category, keywords in self.question_patterns.items():
if any(self._fuzzy_match(word, keywords) for word in words):
return category
return 'general'
def _get_contextual_boost(self, chunk: Dict, question_type: str) -> float:
boost = chunk.get('score_boost', 1.0)
if question_type in chunk['tag'].lower():
boost *= 1.3
return boost
def _is_time_sensitive(self, question: str) -> bool:
time_keywords = ['time', 'waqt', 'kab', 'when', 'کب', 'وقت']
return any(self._fuzzy_match(word, time_keywords) for word in question.lower().split())
def answer_question(self, question: str) -> str:
self._load_models()
self._ensure_embeddings()
cleaned_question = self._clean_input(question)
if not cleaned_question:
return self._get_default_response('empty')
question_type = self._detect_question_type(cleaned_question)
question_embedding = self.bi_encoder.encode(cleaned_question, convert_to_tensor=True)
cos_scores = util.cos_sim(question_embedding, self.chunk_embeddings)[0]
boosted_scores = [score * self._get_contextual_boost(self.knowledge_chunks[i], question_type)
for i, score in enumerate(cos_scores)]
top_k = min(15, len(self.knowledge_chunks))
top_results = torch.topk(torch.tensor(boosted_scores), k=top_k)
top_chunks = [self.knowledge_chunks[i]['text'] for i in top_results.indices.tolist()]
top_scores = top_results.values.tolist()
rerank_pairs = [(cleaned_question, chunk) for chunk in top_chunks]
rerank_scores = self.cross_encoder.predict(rerank_pairs)
combined_scores = [(rerank_scores[i] * 0.7 + top_scores[i] * 0.3) for i in range(len(rerank_scores))]
best_idx = max(range(len(combined_scores)), key=lambda i: combined_scores[i])
best_chunk = top_chunks[best_idx]
for prefix in ["Eid info: ", "Prayer info: ", "Qurbani info: ", "Fun: ", "Gaza info: "]:
if best_chunk.startswith(prefix):
best_chunk = best_chunk[len(prefix):]
break
if self._is_time_sensitive(cleaned_question):
date = datetime.datetime.now().strftime('%B %d, %Y')
best_chunk += f"\n\n🕒 آج {date} ہے۔ عید الاضحیٰ عام طور پر 10th Dhul-Hijjah کو ہوتی ہے۔"
return best_chunk + "\n\n This is a demo. Your feedback matters."
def _get_default_response(self, question_type: str) -> str:
return {
'empty': "❓ Ask something about Eid!",
'general': "🌟 I'm your Eid Assistant. Ask me anything about Eid!"
}.get(question_type, "🌟 I'm your Eid Assistant. Ask me anything about Eid!")
def get_random_by_tag(self, tag_keyword: str) -> str:
matches = [c['text'] for c in self.knowledge_chunks if tag_keyword in c['tag'].lower()]
return random.choice(matches) if matches else "No info found."
# Instantiate the bot
bot = EnhancedMultilingualEidQABot('dataSet.json')
@app.route('/ask', methods=['POST'])
def ask():
question = request.get_json().get('question', '')
return jsonify({'answer': bot.answer_question(question)})
@app.route('/tags', methods=['GET'])
def tags():
unique_tags = sorted({chunk['tag'] for chunk in bot.knowledge_chunks})
return jsonify({'tags': unique_tags})
@app.route('/tag/<tag>', methods=['GET'])
def get_by_tag(tag):
results = [chunk['text'] for chunk in bot.knowledge_chunks if tag.lower() in chunk['tag'].lower()]
return jsonify({'results': results})
@app.route('/')
def home():
return "✅ Eid Assistant API is running."
if __name__ == '__main__': # Fixed: __name__ and __main__ instead of _name_ and _main_
app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 5000)))