Spaces:
Sleeping
Sleeping
File size: 13,755 Bytes
c02e09b 125237a 051c6a0 c02e09b 125237a c02e09b 051c6a0 c02e09b f3afefa c02e09b f3afefa 125237a f3afefa 125237a f3afefa c02e09b 051c6a0 c02e09b b8db557 026dc36 125237a 026dc36 125237a 026dc36 125237a 026dc36 125237a 051c6a0 125237a 051c6a0 125237a 051c6a0 b221737 051c6a0 125237a 051c6a0 c02e09b 125237a de77ff5 051c6a0 125237a 026dc36 125237a 026dc36 125237a 051c6a0 125237a c02e09b 051c6a0 c02e09b 125237a 051c6a0 c02e09b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 |
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
import arxiv
from semanticscholar import SemanticScholar
import requests
GOOGLE_API_KEY = "AIzaSyAhMzIHz2R5VaHC7uSXcZ9yK4luL0yV3sM"
GOOGLE_CSE_ID = "b2d08ab5820ff465d"
# Load sentence transformer
model = SentenceTransformer('all-MiniLM-L6-v2')
# Math domain definitions
DOMAINS = {
"Real Analysis": "Studies properties of real-valued functions, sequences, limits, continuity, differentiation, Riemann/ Lebesgue integration, and convergence in the real number system.",
"Complex Analysis": "Explores analytic functions of complex variables, contour integration, conformal mappings, and singularity theory.",
"Functional Analysis": "Deals with infinite-dimensional vector spaces, Banach and Hilbert spaces, linear operators, duality, and spectral theory in the context of functional spaces.",
"Measure Theory": "Studies sigma-algebras, measures, measurable functions, and integrals, forming the foundation for modern probability and real analysis.",
"Fourier and Harmonic Analysis": "Analyzes functions via decompositions into sines, cosines, or general orthogonal bases, often involving Fourier series, Fourier transforms, and convolution techniques.",
"Calculus of Variations": "Optimizes functionals over infinite-dimensional spaces, leading to Euler-Lagrange equations and applications in physics and control theory.",
"Metric Geometry": "Explores geometric properties of metric spaces and the behavior of functions and sequences under various notions of distance.",
"Ordinary Differential Equations (ODEs)": "Involves differential equations with functions of a single variable, their qualitative behavior, existence, uniqueness, and methods of solving them.",
"Partial Differential Equations (PDEs)": "Deals with multivariable functions involving partial derivatives, including wave, heat, and Laplace equations.",
"Dynamical Systems": "Studies evolution of systems over time using discrete or continuous-time equations, stability theory, phase portraits, and attractors.",
"Linear Algebra": "Focuses on vector spaces, linear transformations, eigenvalues, diagonalization, and matrices.",
"Abstract Algebra": "General study of algebraic structures such as groups, rings, fields, and modules.",
"Group Theory": "Investigates algebraic structures with a single binary operation satisfying group axioms, including symmetry groups and applications.",
"Ring and Module Theory": "Extends group theory to rings (two operations) and modules (generalized vector spaces).",
"Field Theory": "Studies field extensions, algebraic and transcendental elements, and classical constructions.",
"Galois Theory": "Connects field theory and group theory to solve polynomial equations and understand solvability.",
"Algebraic Number Theory": "Applies tools from abstract algebra to study integers, Diophantine equations, and number fields.",
"Representation Theory": "Studies abstract algebraic structures by representing their elements as linear transformations of vector spaces.",
"Algebraic Geometry": "Examines solutions to polynomial equations using geometric and algebraic techniques like varieties, schemes, and morphisms.",
"Differential Geometry": "Studies geometric structures on smooth manifolds, curvature, geodesics, and applications in general relativity.",
"Topology": "Analyzes qualitative spatial properties preserved under continuous deformations, including homeomorphism, compactness, and connectedness.",
"Geometric Topology": "Explores topological manifolds and their classification, knot theory, and low-dimensional topology.",
"Symplectic Geometry": "Studies geometry arising from Hamiltonian systems and phase space, central to classical mechanics.",
"Combinatorics": "Covers enumeration, existence, construction, and optimization of discrete structures.",
"Graph Theory": "Deals with the study of graphs, networks, trees, connectivity, and coloring problems.",
"Discrete Geometry": "Focuses on geometric objects and combinatorial properties in finite settings, such as polytopes and tilings.",
"Set Theory": "Studies sets, cardinality, ordinals, ZFC axioms, and independence results.",
"Mathematical Logic": "Includes propositional logic, predicate logic, proof theory, model theory, and recursion theory.",
"Category Theory": "Provides a high-level, structural framework to relate different mathematical systems through morphisms and objects.",
"Probability Theory": "Mathematical foundation for randomness, including random variables, distributions, expectation, and stochastic processes.",
"Mathematical Statistics": "Theory behind estimation, hypothesis testing, confidence intervals, and likelihood inference.",
"Stochastic Processes": "Studies processes that evolve with randomness over time, like Markov chains and Brownian motion.",
"Information Theory": "Analyzes data transmission, entropy, coding theory, and information content in probabilistic settings.",
"Numerical Analysis": "Designs and analyzes algorithms to approximate solutions of mathematical problems including root-finding, integration, and differential equations.",
"Optimization": "Studies finding best outcomes under constraints, including convex optimization, linear programming, and integer programming.",
"Operations Research": "Applies optimization, simulation, and probabilistic modeling to decision-making problems in logistics, finance, and industry.",
"Control Theory": "Mathematically models and regulates dynamic systems through feedback and optimal control strategies.",
"Computational Mathematics": "Applies algorithmic and numerical techniques to solve mathematical problems on computers.",
"Game Theory": "Analyzes strategic interaction among rational agents using payoff matrices and equilibrium concepts.",
"Machine Learning Theory": "Explores the mathematical foundation of algorithms that learn from data, covering generalization, VC dimension, and convergence.",
"Spectral Theory": "Studies the spectrum (eigenvalues) of linear operators, primarily in Hilbert/Banach spaces, relevant to quantum mechanics and PDEs.",
"Operator Theory": "Focuses on properties of linear operators on function spaces and their classification.",
"Mathematical Physics": "Uses advanced mathematical tools to solve and model problems in physics, often involving differential geometry and functional analysis.",
"Financial Mathematics": "Applies stochastic calculus and optimization to problems in pricing, risk, and investment.",
"Mathematics Education": "Focuses on teaching methods, learning theories, and curriculum design in mathematics.",
"History of Mathematics": "Studies the historical development of mathematical concepts, theorems, and personalities.",
"Others / Multidisciplinary": "Covers problems that span multiple mathematical areas or do not fall neatly into a traditional domain."
}
# Core Functions
domain_names = list(DOMAINS.keys())
domain_texts = list(DOMAINS.values())
domain_embeddings = model.encode(domain_texts)
def fetch_arxiv_refs(query, max_results=5):
refs = []
try:
search = arxiv.Search(query=query, max_results=max_results)
for r in search.results():
refs.append({
"title": r.title,
"authors": ", ".join(a.name for a in r.authors[:3]),
"year": r.published.year,
"url": r.entry_id,
"source": "arXiv"
})
except:
pass
return refs
def fetch_google_cse_links(query, max_results=5):
url = "https://www.googleapis.com/customsearch/v1"
links = []
all_snippets = []
all_items = []
try:
for start in range(1, 100, 100): # fetch up to 100 results in batches of 100
params = {
"q": query,
"key": GOOGLE_API_KEY,
"cx": GOOGLE_CSE_ID,
"num": 10,
"start": start
}
res = requests.get(url, params=params)
items = res.json().get("items", [])
if not items:
break
for item in items:
url = item.get("link", "")
excluded_domains = [
"facebook.com", "twitter.com", "instagram.com", "linkedin.com", "tiktok.com",
"wolframalpha.com", "symbolab.com", "cymath.com", "mathway.com", "mathsolver.microsoft.com",
"photomath.com", "mathpapa.com", "integral-calculator.com", "derivative-calculator.net",
"mathportal.org", "stattrek.com", "calculatorsoup.com", "desmos.com", "geogebra.org",
"socratic.org", "chegg.com", "quizlet.com"
]
if any(domain in url.lower() for domain in excluded_domains):
continue
snippet = item.get("snippet", "")
all_items.append({
"title": item.get("title", "No Title"),
"url": url,
"snippet": snippet
})
all_snippets.append(snippet)
if not all_snippets:
return links
snippet_embeddings = model.encode(all_snippets)
used = set()
for i, emb_i in enumerate(snippet_embeddings):
if i in used:
continue
group = [i]
for j in range(i + 1, len(snippet_embeddings)):
if j in used:
continue
sim = cosine_similarity([emb_i], [snippet_embeddings[j]])[0][0]
if sim > 0.8:
group.append(j)
used.add(j)
# Prefer a result with PDF in URL if available
chosen = None
for idx in group:
if ".pdf" in all_items[idx]["url"].lower():
chosen = all_items[idx]
break
if not chosen:
chosen = all_items[group[0]]
links.append({
"title": chosen["title"],
"url": chosen["url"],
"snippet": chosen["snippet"],
"source": "Google CSE"
})
used.update(group)
if len(links) >= max_results:
break
except Exception as e:
print("Google CSE Error:", e)
return links
# Extract top-5 semantically relevant sections from a web page using all-MiniLM-L6-v2
from bs4 import BeautifulSoup
def extract_top_sections_from_url(query, url, top_k=5):
try:
res = requests.get(url, timeout=6)
if res.status_code != 200:
return []
soup = BeautifulSoup(res.text, 'html.parser')
paras = soup.find_all(['p', 'li', 'div'])
clean_paras = [p.get_text(strip=True) for p in paras if len(p.get_text(strip=True)) > 80]
if not clean_paras:
return []
query_embed = model.encode(query, convert_to_tensor=True)
para_embeds = model.encode(clean_paras, convert_to_tensor=True)
sims = cosine_similarity([query_embed], para_embeds)[0]
top_indices = sims.argsort()[-top_k:][::-1]
return [clean_paras[i] for i in top_indices]
except Exception as e:
print(f"Error extracting from {url}: {e}")
return []
# Output
def classify_math_question(question):
q_embed = model.encode([question])
scores = cosine_similarity(q_embed, domain_embeddings)[0]
sorted_indices = scores.argsort()[::-1]
major = domain_names[sorted_indices[0]]
minor = domain_names[sorted_indices[1]]
major_reason = DOMAINS[major]
minor_reason = DOMAINS[minor]
out = f"<b>Major Domain:</b> {major}<br><i>Reason:</i> {major_reason}<br><br>"
out += f"<b>Minor Domain:</b> {minor}<br><i>Reason:</i> {minor_reason}<br><br>"
refs = fetch_arxiv_refs(question, max_results=5)
links = fetch_google_cse_links(question, max_results=5)
all_links = links
# Enrich links with top-5 extracted sections
enriched_links = []
for link in all_links:
top_sections = extract_top_sections_from_url(question, link['url'])
link['top_sections'] = top_sections
enriched_links.append(link)
if refs:
out += "<b>Top Academic References (arXiv):</b><ul>"
for p in refs:
out += f"<li><b>{p['title']}</b> ({p['year']}) - <i>{p['authors']}</i><br><a href='{p['url']}' target='_blank'>{p['url']}</a></li>"
out += "</ul>"
else:
out += "<i>No academic references found.</i><br>"
if enriched_links:
out += "<b>Top Web Resources (Google CSE):</b><ul>"
for link in enriched_links:
out += f"<li><b>{link['title']}</b><br>{link['snippet']}<br><a href='{link['url']}' target='_blank'>{link['url']}</a>"
if link['top_sections']:
out += "<br><u>Top Extracted Sections:</u><ol>"
for sec in link['top_sections']:
out += f"<li>{sec}</li>"
out += "</ol>"
out += "</li>"
out += "</ul>"
else:
out += "<i>No web links found.</i>"
return out
# gradio
iface = gr.Interface(
fn=classify_math_question,
inputs=gr.Textbox(lines=5, label="Enter Math Question (LaTeX supported)"),
outputs=gr.HTML(label="Predicted Domains + References"),
title="⚡ Math Domain Classifier with arXiv + Google",
description="Classifies math problems into major/minor domains and fetches fast references from arXiv and Google."
)
iface.launch() |