|
<!doctype html> |
|
<html lang="en"> |
|
<head> |
|
<meta charset="UTF-8" /> |
|
<title>Qwen3 Embedding Scatterplot</title> |
|
<style> |
|
* { |
|
margin: 0; |
|
padding: 0; |
|
box-sizing: border-box; |
|
} |
|
body { |
|
font-family: Arial, sans-serif; |
|
display: flex; |
|
height: 100vh; |
|
margin: 0; |
|
overflow: hidden; |
|
} |
|
|
|
#left-pane, |
|
#right-pane { |
|
flex: 1; |
|
display: flex; |
|
flex-direction: column; |
|
padding: 20px; |
|
box-sizing: border-box; |
|
overflow: hidden; |
|
gap: 4px; |
|
} |
|
|
|
|
|
#left-pane { |
|
border-right: 1px solid #e0e0e0; |
|
} |
|
|
|
textarea { |
|
width: 100%; |
|
flex: 1; |
|
min-height: 0; |
|
resize: none; |
|
font-size: 14px; |
|
line-height: 1.4; |
|
padding: 10px; |
|
box-sizing: border-box; |
|
border: 1px solid #ccc; |
|
border-radius: 4px; |
|
margin-bottom: 8px; |
|
} |
|
|
|
button { |
|
margin-top: 10px; |
|
padding: 8px 16px; |
|
font-size: 18px; |
|
border: none; |
|
background-color: #007bff; |
|
color: white; |
|
border-radius: 4px; |
|
cursor: pointer; |
|
} |
|
|
|
button:disabled { |
|
background-color: #aaa; |
|
cursor: not-allowed; |
|
} |
|
|
|
#plot { |
|
width: 100%; |
|
flex: 1; |
|
min-height: 0; |
|
} |
|
</style> |
|
</head> |
|
|
|
<body> |
|
<div id="left-pane"> |
|
<h3>Sentences:</h3> |
|
<textarea |
|
id="text-input" |
|
placeholder="Each line is treated as one sentence" |
|
></textarea> |
|
<h3>Labels:</h3> |
|
<textarea |
|
id="labels-input" |
|
placeholder="Each line is treated as one label" |
|
style="max-height: 200px" |
|
></textarea> |
|
<button id="embed-btn">Embed & Plot</button> |
|
<p |
|
id="status" |
|
style=" |
|
margin-top: 8px; |
|
font-size: 16px; |
|
font-weight: 600; |
|
text-align: center; |
|
" |
|
> |
|
Loading model... |
|
</p> |
|
</div> |
|
|
|
<div id="right-pane"> |
|
<h3>Scatterplot:</h3> |
|
<div id="plot"></div> |
|
</div> |
|
|
|
<script type="module"> |
|
import "https://cdn.jsdelivr.net/npm/plotly.js@3.0.1/dist/plotly-basic.min.js"; |
|
import { UMAP } from "https://cdn.jsdelivr.net/npm/umap-js@1.4.0/+esm"; |
|
import { pipeline } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.5.2"; |
|
const sentences = [ |
|
|
|
"The sun peeked through the clouds after a drizzly morning.", |
|
"A gentle breeze rustled the leaves as we walked along the shoreline.", |
|
"Heavy rains caused flooding in several low-lying neighborhoods.", |
|
"It was so hot that even the birds sought shade under the palm trees.", |
|
"By midnight, the temperature had dropped below freezing.", |
|
"Thunderstorms lit up the sky with flashes of lightning.", |
|
"A thick fog settled over the city streets at dawn.", |
|
"The air smelled of ozone after the sudden hailstorm.", |
|
"I watched the snowflakes drift silently onto the ground.", |
|
"A double rainbow appeared after the rain shower.", |
|
"The humidity soared to uncomfortable levels by midday.", |
|
"Dust devils formed in the dry desert plains.", |
|
"The barometer readings indicated an approaching front.", |
|
"A sudden gust of wind knocked over the garden chairs.", |
|
"Light drizzle turned into a torrential downpour within minutes.", |
|
|
|
|
|
"The new smartphone features a foldable display and 5G connectivity.", |
|
"In the world of AI, transformers have revolutionized natural language processing.", |
|
"Quantum computing promises to solve problems beyond classical computers' reach.", |
|
"Blockchain technology is being explored for secure voting systems.", |
|
"Virtual reality headsets are becoming more affordable and accessible.", |
|
"The rise of electric vehicles is reshaping the automotive industry.", |
|
"Cloud computing allows businesses to scale resources dynamically.", |
|
"Machine learning algorithms can now predict stock market trends with surprising accuracy.", |
|
"Augmented reality applications are transforming retail experiences.", |
|
"The Internet of Things connects everyday devices to the web for smarter living.", |
|
"Cybersecurity threats are evolving, requiring constant vigilance.", |
|
"3D printing is enabling rapid prototyping and custom manufacturing.", |
|
"Edge computing reduces latency by processing data closer to the source.", |
|
"Biometric authentication methods are enhancing security in devices.", |
|
"Wearable technology is tracking health metrics in real-time.", |
|
"Artificial intelligence is being used to create realistic deepfakes.", |
|
|
|
|
|
"Preheat the oven to 375°F before you start mixing the batter.", |
|
"She finely chopped the garlic and sautéed it in two tablespoons of olive oil.", |
|
"A pinch of saffron adds a beautiful color and aroma to traditional paella.", |
|
"If the soup is too salty, add a peeled potato to absorb excess sodium.", |
|
"Let the bread dough rise for at least an hour in a warm, draft-free spot.", |
|
"Marinate the chicken overnight in a blend of citrus and spices.", |
|
"Use a cast-iron skillet to sear the steak on high heat.", |
|
"Whisk the egg whites until they form stiff peaks.", |
|
"Fold in the chocolate chips gently to keep the batter airy.", |
|
"Brush the pastry with an egg wash for a golden finish.", |
|
"Slow-roast the pork shoulder until it falls off the bone.", |
|
"Garnish the salad with toasted nuts and fresh herbs.", |
|
"Deglaze the pan with white wine for a rich sauce.", |
|
"Simmer the curry paste until the aroma intensifies.", |
|
"Let the risotto rest before serving to thicken slightly.", |
|
|
|
|
|
"He dribbled past two defenders and sank a three-pointer at the buzzer.", |
|
"The marathon runner kept a steady pace despite the sweltering heat.", |
|
"Their home team clinched the championship with a last-minute goal.", |
|
"NASCAR fans cheered as the cars roared around the oval track.", |
|
"She landed a perfect triple axel at the figure skating championship.", |
|
"The cyclist pedaled up the steep hill in record time.", |
|
"He pitched a no-hitter during the high school baseball game.", |
|
"The quarterback threw a touchdown pass under heavy pressure.", |
|
"They scored a hat-trick in the hockey final.", |
|
"The boxer delivered a swift uppercut in the final round.", |
|
"Surfers caught massive waves at dawn on the Pacific coast.", |
|
"Fans erupted when the underdog scored the winning goal.", |
|
"The swimmer broke the national record in the 200m freestyle.", |
|
"The gymnast executed a flawless routine on the balance beam.", |
|
"The rugby team celebrated their victory with a traditional haka.", |
|
|
|
|
|
"The stock market rallied after positive earnings reports.", |
|
"Investors are closely watching interest rate changes by the Federal Reserve.", |
|
"Cryptocurrency prices have been extremely volatile this year.", |
|
"Diversification is key to managing investment risk effectively.", |
|
"Inflation rates have reached a 40-year high, impacting consumer spending.", |
|
"Many companies are adopting ESG criteria to attract socially conscious investors.", |
|
"The bond market is reacting to geopolitical tensions and supply chain disruptions.", |
|
"Venture capital funding for startups has surged in the tech sector.", |
|
"Exchange-traded funds (ETFs) offer a way to invest in diversified portfolios.", |
|
"The global economy is recovering from the pandemic, but challenges remain.", |
|
"Central banks are exploring digital currencies to modernize payment systems.", |
|
"Retail investors are increasingly participating in the stock market through apps.", |
|
"Hedge funds are using complex algorithms to gain an edge in trading.", |
|
"Real estate prices have skyrocketed in urban areas due to low inventory.", |
|
"The startup raised $10 million in its Series A funding round.", |
|
|
|
|
|
"The symphony orchestra played a hauntingly beautiful melody.", |
|
"She strummed her guitar softly, filling the room with a warm sound.", |
|
"The DJ mixed tracks seamlessly, keeping the crowd dancing all night.", |
|
"His voice soared during the high notes of the ballad.", |
|
"The band played an acoustic set in the intimate coffee shop.", |
|
"Jazz musicians often improvise solos based on the chord changes.", |
|
"The opera singer hit the high C with perfect pitch.", |
|
"The choir harmonized beautifully, filling the church with sound.", |
|
"He composed a symphony that was performed at the concert hall.", |
|
"The singer-songwriter wrote heartfelt lyrics about love and loss.", |
|
"The rock band headlined the festival, drawing a massive crowd.", |
|
"Hip-hop artists use rhythm and rhyme to tell powerful stories.", |
|
"The violinist played a virtuosic solo that left the audience in awe.", |
|
"Folk music often reflects the culture and traditions of a community.", |
|
"The gospel choir lifted spirits with their uplifting performance.", |
|
|
|
|
|
"The fall of the Berlin Wall in 1989 marked the end of the Cold War.", |
|
"Ancient Egypt's pyramids are a testament to their architectural prowess.", |
|
"Europe's Renaissance period sparked a revival in art and science.", |
|
"The signing of the Declaration of Independence in 1776 established the United States.", |
|
"The Industrial Revolution transformed economies and societies worldwide.", |
|
"Rome was the center of a vast empire that influenced law and governance.", |
|
"The discovery of the New World by Christopher Columbus in 1492 changed global trade.", |
|
"The French Revolution in 1789 led to significant political and social change.", |
|
"World War II was a global conflict that reshaped international relations.", |
|
"The fall of the Roman Empire in 476 AD marked the beginning of the Middle Ages.", |
|
"The invention of the printing press revolutionized the spread of knowledge.", |
|
"The Cold War was characterized by political tension between the U.S. and the Soviet Union.", |
|
"The ancient Silk Road connected East and West through trade routes.", |
|
"The signing of the Magna Carta in 1215 established principles of due process.", |
|
"Exploration during the Age of Discovery expanded European empires across the globe.", |
|
].sort(() => Math.random() - 0.5); |
|
|
|
document.getElementById("text-input").value = sentences.join("\n"); |
|
|
|
const labels = [ |
|
"Weather", |
|
"Technology", |
|
"Cooking", |
|
"Sports", |
|
"Finance", |
|
"Music", |
|
"History", |
|
]; |
|
document.getElementById("labels-input").value = labels.join("\n"); |
|
|
|
const statusEl = document.getElementById("status"); |
|
const embed = await pipeline( |
|
"feature-extraction", |
|
"onnx-community/Qwen3-Embedding-0.6B-ONNX", |
|
{ device: "webgpu", dtype: "q4f16" }, |
|
); |
|
statusEl.textContent = "Ready!"; |
|
|
|
document.getElementById("embed-btn").disabled = false; |
|
|
|
document |
|
.getElementById("embed-btn") |
|
.addEventListener("click", async () => { |
|
const textInput = document.getElementById("text-input").value.trim(); |
|
if (!textInput) { |
|
alert("No text detected."); |
|
return; |
|
} |
|
|
|
const sentences = textInput |
|
.split("\n") |
|
.map((s) => s.trim()) |
|
.filter((s) => s.length > 0); |
|
if (sentences.length === 0) { |
|
alert("No non-empty lines detected."); |
|
return; |
|
} |
|
|
|
statusEl.textContent = `Embedding ${sentences.length} sentence(s)...`; |
|
const task = |
|
"Given a textual input sentence, retrieve relevant categories that best describe it."; |
|
const output = await embed( |
|
sentences.map((s) => `Instruct: ${task}\nQuery:${s}`), |
|
{ pooling: "mean", normalize: true }, |
|
); |
|
const embeddings = output.tolist(); |
|
|
|
statusEl.textContent = "Running UMAP (2D projection)..."; |
|
const nNeighbors = Math.max(1, Math.min(sentences.length - 1, 15)); |
|
const umap = new UMAP({ nComponents: 2, nNeighbors, minDist: 0.1 }); |
|
const coords = umap.fit(embeddings); |
|
|
|
statusEl.textContent = "Plotting projection..."; |
|
|
|
const labelInput = document |
|
.getElementById("labels-input") |
|
.value.trim(); |
|
const labelTexts = labelInput |
|
.split("\n") |
|
.map((s) => s.trim()) |
|
.filter((s) => s); |
|
|
|
let labelEmbeddings = []; |
|
if (labelTexts.length) { |
|
statusEl.textContent = "Embedding labels..."; |
|
const lblOut = await embed( |
|
labelTexts.map((x) => `Category: ${x}`), |
|
{ pooling: "mean", normalize: true }, |
|
); |
|
labelEmbeddings = lblOut.tolist(); |
|
} |
|
const threshold = 0.1; |
|
const assigned = embeddings.map((sentEmb) => { |
|
if (!labelEmbeddings.length) return "Other"; |
|
const sims = labelEmbeddings.map((lblEmb) => |
|
sentEmb.reduce((sum, v, i) => sum + v * lblEmb[i], 0), |
|
); |
|
let maxSim = -Infinity, |
|
idx = -1; |
|
for (let i = 0; i < sims.length; i++) { |
|
if (sims[i] > maxSim) { |
|
maxSim = sims[i]; |
|
idx = i; |
|
} |
|
} |
|
return maxSim >= threshold ? labelTexts[idx] : "Other"; |
|
}); |
|
|
|
|
|
const uniq = Array.from(new Set(assigned)); |
|
const data = uniq.map((lbl) => { |
|
const inds = assigned |
|
.map((a, i) => (a === lbl ? i : -1)) |
|
.filter((i) => i >= 0); |
|
return { |
|
x: inds.map((i) => coords[i][0]), |
|
y: inds.map((i) => coords[i][1]), |
|
mode: "markers", |
|
type: "scatter", |
|
name: lbl, |
|
text: inds.map((i) => sentences[i]), |
|
marker: { size: 12 }, |
|
}; |
|
}); |
|
const layout = { |
|
title: { text: "Labels" }, |
|
margin: { l: 0, r: 0, t: 0, b: 0 }, |
|
}; |
|
const config = { responsive: true }; |
|
Plotly.newPlot("plot", data, layout, config); |
|
statusEl.textContent = "Done! Hover over points to see sentences."; |
|
}); |
|
</script> |
|
</body> |
|
</html> |
|
|