|
from sentence_transformers import SentenceTransformer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
import gradio as gr |
|
import arxiv |
|
from semanticscholar import SemanticScholar |
|
import requests |
|
|
|
|
|
GOOGLE_API_KEY = "AIzaSyAhMzIHz2R5VaHC7uSXcZ9yK4luL0yV3sM" |
|
GOOGLE_CSE_ID = "b2d08ab5820ff465d" |
|
|
|
|
|
model = SentenceTransformer('all-MiniLM-L6-v2') |
|
|
|
|
|
DOMAINS = { |
|
"Real Analysis": "Studies properties of real-valued functions, sequences, limits, continuity, differentiation, Riemann/ Lebesgue integration, and convergence in the real number system.", |
|
"Complex Analysis": "Explores analytic functions of complex variables, contour integration, conformal mappings, and singularity theory.", |
|
"Functional Analysis": "Deals with infinite-dimensional vector spaces, Banach and Hilbert spaces, linear operators, duality, and spectral theory in the context of functional spaces.", |
|
"Measure Theory": "Studies sigma-algebras, measures, measurable functions, and integrals, forming the foundation for modern probability and real analysis.", |
|
"Fourier and Harmonic Analysis": "Analyzes functions via decompositions into sines, cosines, or general orthogonal bases, often involving Fourier series, Fourier transforms, and convolution techniques.", |
|
"Calculus of Variations": "Optimizes functionals over infinite-dimensional spaces, leading to Euler-Lagrange equations and applications in physics and control theory.", |
|
"Metric Geometry": "Explores geometric properties of metric spaces and the behavior of functions and sequences under various notions of distance.", |
|
"Ordinary Differential Equations (ODEs)": "Involves differential equations with functions of a single variable, their qualitative behavior, existence, uniqueness, and methods of solving them.", |
|
"Partial Differential Equations (PDEs)": "Deals with multivariable functions involving partial derivatives, including wave, heat, and Laplace equations.", |
|
"Dynamical Systems": "Studies evolution of systems over time using discrete or continuous-time equations, stability theory, phase portraits, and attractors.", |
|
"Linear Algebra": "Focuses on vector spaces, linear transformations, eigenvalues, diagonalization, and matrices.", |
|
"Abstract Algebra": "General study of algebraic structures such as groups, rings, fields, and modules.", |
|
"Group Theory": "Investigates algebraic structures with a single binary operation satisfying group axioms, including symmetry groups and applications.", |
|
"Ring and Module Theory": "Extends group theory to rings (two operations) and modules (generalized vector spaces).", |
|
"Field Theory": "Studies field extensions, algebraic and transcendental elements, and classical constructions.", |
|
"Galois Theory": "Connects field theory and group theory to solve polynomial equations and understand solvability.", |
|
"Algebraic Number Theory": "Applies tools from abstract algebra to study integers, Diophantine equations, and number fields.", |
|
"Representation Theory": "Studies abstract algebraic structures by representing their elements as linear transformations of vector spaces.", |
|
"Algebraic Geometry": "Examines solutions to polynomial equations using geometric and algebraic techniques like varieties, schemes, and morphisms.", |
|
"Differential Geometry": "Studies geometric structures on smooth manifolds, curvature, geodesics, and applications in general relativity.", |
|
"Topology": "Analyzes qualitative spatial properties preserved under continuous deformations, including homeomorphism, compactness, and connectedness.", |
|
"Geometric Topology": "Explores topological manifolds and their classification, knot theory, and low-dimensional topology.", |
|
"Symplectic Geometry": "Studies geometry arising from Hamiltonian systems and phase space, central to classical mechanics.", |
|
"Combinatorics": "Covers enumeration, existence, construction, and optimization of discrete structures.", |
|
"Graph Theory": "Deals with the study of graphs, networks, trees, connectivity, and coloring problems.", |
|
"Discrete Geometry": "Focuses on geometric objects and combinatorial properties in finite settings, such as polytopes and tilings.", |
|
"Set Theory": "Studies sets, cardinality, ordinals, ZFC axioms, and independence results.", |
|
"Mathematical Logic": "Includes propositional logic, predicate logic, proof theory, model theory, and recursion theory.", |
|
"Category Theory": "Provides a high-level, structural framework to relate different mathematical systems through morphisms and objects.", |
|
"Probability Theory": "Mathematical foundation for randomness, including random variables, distributions, expectation, and stochastic processes.", |
|
"Mathematical Statistics": "Theory behind estimation, hypothesis testing, confidence intervals, and likelihood inference.", |
|
"Stochastic Processes": "Studies processes that evolve with randomness over time, like Markov chains and Brownian motion.", |
|
"Information Theory": "Analyzes data transmission, entropy, coding theory, and information content in probabilistic settings.", |
|
"Numerical Analysis": "Designs and analyzes algorithms to approximate solutions of mathematical problems including root-finding, integration, and differential equations.", |
|
"Optimization": "Studies finding best outcomes under constraints, including convex optimization, linear programming, and integer programming.", |
|
"Operations Research": "Applies optimization, simulation, and probabilistic modeling to decision-making problems in logistics, finance, and industry.", |
|
"Control Theory": "Mathematically models and regulates dynamic systems through feedback and optimal control strategies.", |
|
"Computational Mathematics": "Applies algorithmic and numerical techniques to solve mathematical problems on computers.", |
|
"Game Theory": "Analyzes strategic interaction among rational agents using payoff matrices and equilibrium concepts.", |
|
"Machine Learning Theory": "Explores the mathematical foundation of algorithms that learn from data, covering generalization, VC dimension, and convergence.", |
|
"Spectral Theory": "Studies the spectrum (eigenvalues) of linear operators, primarily in Hilbert/Banach spaces, relevant to quantum mechanics and PDEs.", |
|
"Operator Theory": "Focuses on properties of linear operators on function spaces and their classification.", |
|
"Mathematical Physics": "Uses advanced mathematical tools to solve and model problems in physics, often involving differential geometry and functional analysis.", |
|
"Financial Mathematics": "Applies stochastic calculus and optimization to problems in pricing, risk, and investment.", |
|
"Mathematics Education": "Focuses on teaching methods, learning theories, and curriculum design in mathematics.", |
|
"History of Mathematics": "Studies the historical development of mathematical concepts, theorems, and personalities.", |
|
"Others / Multidisciplinary": "Covers problems that span multiple mathematical areas or do not fall neatly into a traditional domain." |
|
} |
|
|
|
|
|
domain_names = list(DOMAINS.keys()) |
|
domain_texts = list(DOMAINS.values()) |
|
domain_embeddings = model.encode(domain_texts) |
|
|
|
def fetch_arxiv_refs(query, max_results=5): |
|
refs = [] |
|
try: |
|
search = arxiv.Search(query=query, max_results=max_results) |
|
for r in search.results(): |
|
refs.append({ |
|
"title": r.title, |
|
"authors": ", ".join(a.name for a in r.authors[:3]), |
|
"year": r.published.year, |
|
"url": r.entry_id, |
|
"source": "arXiv" |
|
}) |
|
except: |
|
pass |
|
return refs |
|
|
|
|
|
def fetch_google_cse_links(query, max_results=5): |
|
url = "https://www.googleapis.com/customsearch/v1" |
|
links = [] |
|
all_snippets = [] |
|
all_items = [] |
|
try: |
|
for start in range(1, 100, 100): |
|
params = { |
|
"q": query, |
|
"key": GOOGLE_API_KEY, |
|
"cx": GOOGLE_CSE_ID, |
|
"num": 10, |
|
"start": start |
|
} |
|
res = requests.get(url, params=params) |
|
items = res.json().get("items", []) |
|
if not items: |
|
break |
|
for item in items: |
|
url = item.get("link", "") |
|
excluded_domains = [ |
|
"facebook.com", "twitter.com", "instagram.com", "linkedin.com", "tiktok.com", |
|
"wolframalpha.com", "symbolab.com", "cymath.com", "mathway.com", "mathsolver.microsoft.com", |
|
"photomath.com", "mathpapa.com", "integral-calculator.com", "derivative-calculator.net", |
|
"mathportal.org", "stattrek.com", "calculatorsoup.com", "desmos.com", "geogebra.org", |
|
"socratic.org", "chegg.com", "quizlet.com" |
|
] |
|
if any(domain in url.lower() for domain in excluded_domains): |
|
continue |
|
snippet = item.get("snippet", "") |
|
all_items.append({ |
|
"title": item.get("title", "No Title"), |
|
"url": url, |
|
"snippet": snippet |
|
}) |
|
all_snippets.append(snippet) |
|
|
|
if not all_snippets: |
|
return links |
|
|
|
snippet_embeddings = model.encode(all_snippets) |
|
used = set() |
|
for i, emb_i in enumerate(snippet_embeddings): |
|
if i in used: |
|
continue |
|
group = [i] |
|
for j in range(i + 1, len(snippet_embeddings)): |
|
if j in used: |
|
continue |
|
sim = cosine_similarity([emb_i], [snippet_embeddings[j]])[0][0] |
|
if sim > 0.8: |
|
group.append(j) |
|
used.add(j) |
|
|
|
chosen = None |
|
for idx in group: |
|
if ".pdf" in all_items[idx]["url"].lower(): |
|
chosen = all_items[idx] |
|
break |
|
if not chosen: |
|
chosen = all_items[group[0]] |
|
links.append({ |
|
"title": chosen["title"], |
|
"url": chosen["url"], |
|
"snippet": chosen["snippet"], |
|
"source": "Google CSE" |
|
}) |
|
used.update(group) |
|
if len(links) >= max_results: |
|
break |
|
except Exception as e: |
|
print("Google CSE Error:", e) |
|
return links |
|
|
|
|
|
|
|
from bs4 import BeautifulSoup |
|
def extract_top_sections_from_url(query, url, top_k=5): |
|
try: |
|
res = requests.get(url, timeout=6) |
|
if res.status_code != 200: |
|
return [] |
|
soup = BeautifulSoup(res.text, 'html.parser') |
|
paras = soup.find_all(['p', 'li', 'div']) |
|
clean_paras = [p.get_text(strip=True) for p in paras if len(p.get_text(strip=True)) > 80] |
|
if not clean_paras: |
|
return [] |
|
query_embed = model.encode(query, convert_to_tensor=True) |
|
para_embeds = model.encode(clean_paras, convert_to_tensor=True) |
|
sims = cosine_similarity([query_embed], para_embeds)[0] |
|
top_indices = sims.argsort()[-top_k:][::-1] |
|
return [clean_paras[i] for i in top_indices] |
|
except Exception as e: |
|
print(f"Error extracting from {url}: {e}") |
|
return [] |
|
|
|
|
|
def classify_math_question(question): |
|
q_embed = model.encode([question]) |
|
scores = cosine_similarity(q_embed, domain_embeddings)[0] |
|
sorted_indices = scores.argsort()[::-1] |
|
major = domain_names[sorted_indices[0]] |
|
minor = domain_names[sorted_indices[1]] |
|
major_reason = DOMAINS[major] |
|
minor_reason = DOMAINS[minor] |
|
|
|
out = f"<b>Major Domain:</b> {major}<br><i>Reason:</i> {major_reason}<br><br>" |
|
out += f"<b>Minor Domain:</b> {minor}<br><i>Reason:</i> {minor_reason}<br><br>" |
|
|
|
refs = fetch_arxiv_refs(question, max_results=5) |
|
links = fetch_google_cse_links(question, max_results=5) |
|
all_links = links |
|
|
|
|
|
enriched_links = [] |
|
for link in all_links: |
|
top_sections = extract_top_sections_from_url(question, link['url']) |
|
link['top_sections'] = top_sections |
|
enriched_links.append(link) |
|
|
|
if refs: |
|
out += "<b>Top Academic References (arXiv):</b><ul>" |
|
for p in refs: |
|
out += f"<li><b>{p['title']}</b> ({p['year']}) - <i>{p['authors']}</i><br><a href='{p['url']}' target='_blank'>{p['url']}</a></li>" |
|
out += "</ul>" |
|
else: |
|
out += "<i>No academic references found.</i><br>" |
|
|
|
if enriched_links: |
|
out += "<b>Top Web Resources (Google CSE):</b><ul>" |
|
for link in enriched_links: |
|
out += f"<li><b>{link['title']}</b><br>{link['snippet']}<br><a href='{link['url']}' target='_blank'>{link['url']}</a>" |
|
if link['top_sections']: |
|
out += "<br><u>Top Extracted Sections:</u><ol>" |
|
for sec in link['top_sections']: |
|
out += f"<li>{sec}</li>" |
|
out += "</ol>" |
|
out += "</li>" |
|
out += "</ul>" |
|
else: |
|
out += "<i>No web links found.</i>" |
|
|
|
return out |
|
|
|
|
|
iface = gr.Interface( |
|
fn=classify_math_question, |
|
inputs=gr.Textbox(lines=5, label="Enter Math Question (LaTeX supported)"), |
|
outputs=gr.HTML(label="Predicted Domains + References"), |
|
title="⚡ Math Domain Classifier with arXiv + Google", |
|
description="Classifies math problems into major/minor domains and fetches fast references from arXiv and Google." |
|
) |
|
|
|
iface.launch() |