MikeDoes commited on
Commit
d46aa0f
·
verified ·
1 Parent(s): 6e118b7

Upload 4 files

Browse files
Files changed (4) hide show
  1. components.js +131 -0
  2. data.jsonl +2 -0
  3. index.html +129 -0
  4. inference.js +148 -0
components.js ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { loadModel, processText, isModelLoaded } from './inference.js';
2
+
3
+ // DOM Elements
4
+ const inputText = document.getElementById('inputText');
5
+ const outputText = document.getElementById('outputText');
6
+ const statusElement = document.getElementById('processingStatus');
7
+ const privacyMaskDiv = document.getElementById('privacyMask');
8
+ const thresholdInput = document.getElementById('thresholdInput');
9
+ const settingsButton = document.getElementById('settingsButton');
10
+ const settingsPanel = document.getElementById('settingsPanel');
11
+
12
+ // Initialize variables
13
+ let currentInput = "";
14
+ let settingsVisible = false;
15
+ let samples = {};
16
+
17
+ // Add debounce to input handler
18
+ let timeout;
19
+ inputText.addEventListener('input', (event) => {
20
+ currentInput = event.target.value;
21
+ statusElement.textContent = 'Processing...';
22
+ clearTimeout(timeout);
23
+ timeout = setTimeout(updateOutput, 300);
24
+ });
25
+
26
+ async function updateOutput() {
27
+ if (!isModelLoaded) {
28
+ statusElement.textContent = 'Loading model...';
29
+ outputText.value = "";
30
+ return;
31
+ }
32
+
33
+ try {
34
+ const threshold = parseFloat(thresholdInput.value) || 0.01;
35
+ const processed = await processText(currentInput, threshold);
36
+ statusElement.textContent = `Processed ${currentInput.length} characters`;
37
+ outputText.value = processed.maskedText;
38
+
39
+ privacyMaskDiv.innerHTML = '';
40
+
41
+ if (processed.replacements.length > 0) {
42
+ processed.replacements.forEach(replacement => {
43
+ const tile = document.createElement('div');
44
+ tile.className = 'bg-gray-800 p-3 rounded-lg border border-white/10 hover:border-white/20 transition duration-200 hover:-translate-y-[2px] hover:shadow-[0_4px_6px_rgba(0,0,0,0.1)]';
45
+ tile.innerHTML = `
46
+ <div class="text-xs text-white/60 mb-1">${replacement.placeholder}</div>
47
+ <div class="text-sm text-white font-medium">${replacement.original}</div>
48
+ <div class="text-xs text-white/40 mt-1">Sensitive Information</div>
49
+ <div class="text-xs text-white/40 mt-1">Activation: ${Math.round(replacement.activation * 100)}%</div>
50
+ `;
51
+ privacyMaskDiv.appendChild(tile);
52
+ });
53
+ } else {
54
+ const emptyState = document.createElement('div');
55
+ emptyState.className = 'text-center text-white/40 py-4';
56
+ emptyState.textContent = 'No sensitive information detected.';
57
+ privacyMaskDiv.appendChild(emptyState);
58
+ }
59
+ } catch (err) {
60
+ statusElement.textContent = 'Error processing text';
61
+ console.error("Error processing text:", err);
62
+ outputText.value = "Error processing text.";
63
+ }
64
+ }
65
+
66
+ // Settings toggle functionality
67
+ settingsButton.addEventListener('click', (e) => {
68
+ settingsVisible = !settingsVisible;
69
+ settingsPanel.classList.toggle('hidden', !settingsVisible);
70
+ e.stopPropagation();
71
+ });
72
+
73
+ document.addEventListener('click', (e) => {
74
+ if (settingsVisible && !settingsPanel.contains(e.target)) {
75
+ settingsPanel.classList.add('hidden');
76
+ settingsVisible = false;
77
+ }
78
+ });
79
+
80
+ // Load sample data from data.jsonl
81
+ async function loadSamples() {
82
+ try {
83
+ const response = await fetch('data.jsonl');
84
+ const text = await response.text();
85
+ const lines = text.split('\n').filter(line => line.trim() !== '');
86
+ samples = {};
87
+ lines.forEach(line => {
88
+ const obj = JSON.parse(line);
89
+ samples[obj.title] = obj.text;
90
+ });
91
+ } catch (err) {
92
+ console.error("Error loading samples:", err);
93
+ }
94
+ }
95
+
96
+ // Sample button event listeners
97
+ document.getElementById('sampleEmailButton').addEventListener('click', () => {
98
+ if (samples["customer email"]) {
99
+ inputText.value = samples["customer email"];
100
+ currentInput = samples["customer email"];
101
+ updateOutput();
102
+ } else {
103
+ alert("Sample customer email not found.");
104
+ }
105
+ });
106
+
107
+ document.getElementById('sampleDocumentButton').addEventListener('click', () => {
108
+ if (samples["document"]) {
109
+ inputText.value = samples["document"];
110
+ currentInput = samples["document"];
111
+ updateOutput();
112
+ } else {
113
+ alert("Sample document not found.");
114
+ }
115
+ });
116
+
117
+ // Initialize the application
118
+ async function init() {
119
+ statusElement.textContent = 'Loading model and samples...';
120
+ try {
121
+ await loadModel();
122
+ await loadSamples();
123
+ statusElement.textContent = 'Model and samples loaded';
124
+ updateOutput();
125
+ } catch (err) {
126
+ statusElement.textContent = 'Error loading model or samples';
127
+ outputText.value = "Error loading model or samples.";
128
+ }
129
+ }
130
+
131
+ init();
data.jsonl ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ {"title":"customer email","text":"Customer Complaint:\n\n\"This is Sarah Gomez born on the 9th March 2002 in San-Fransisco with policy number #2301. I've been on hold for over 30 minutes trying to speak with a customer service representative about my claim on life insurance. It's incredibly frustrating. Is there a faster way to get the help I need?\"\n\nThis is the complaint and can you write a quick message saying we are on the case and I will review this as soon as possible."}
2
+ {"title": "document","text": "COMMUNITY CENTER MEMBERSHIP REGISTRATION FORM\n\nPersonal Information:\n\n -Full Name: Kelsang Rishi\n -Date of Birth: 1939-04-21T00:00:00\n - Gender: Non-binary\n - Street Address: 50 Farm-to-Market 3059\n \n - City: Marshfield\n - Postal/ZIP Code: 44691\n - Phone Number: +19-19-079.8396\n - Email Address: ZJuly@tutanota.com\n"}
index.html ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Short Text and Open Source: Anonymiser</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/iconify/2.0.0/iconify.min.js"></script>
9
+ <style>
10
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;700&display=swap');
11
+ * {
12
+ font-family: 'Inter', sans-serif;
13
+ }
14
+ ::-webkit-scrollbar {
15
+ width: 6px;
16
+ }
17
+ ::-webkit-scrollbar-track {
18
+ background: #2d2d2d;
19
+ }
20
+ ::-webkit-scrollbar-thumb {
21
+ background: #4a4a4a;
22
+ border-radius: 3px;
23
+ }
24
+ </style>
25
+ </head>
26
+ <body class="bg-gray-900 min-h-screen">
27
+ <!-- Branding Header -->
28
+ <div class="bg-black/30 py-4 border-b border-white/10">
29
+ <div class="max-w-7xl mx-auto px-4 flex items-center justify-between">
30
+ <div class="flex items-center space-x-3">
31
+ <img src="ai4privacy-logo.png" alt="Logo" class="h-8 w-8">
32
+ <div>
33
+ <span class="text-xl font-bold text-white">Ai4Privacy</span>
34
+ <span class="block text-xs text-white/60">Short Text Anonymization Locally in Your Browser</span>
35
+ </div>
36
+ </div>
37
+ <!-- Settings Button -->
38
+ <button id="settingsButton" class="text-white/60 hover:text-white transition-colors">
39
+ <span class="iconify" data-icon="mdi:cog" data-width="24"></span>
40
+ </button>
41
+ </div>
42
+ </div>
43
+
44
+ <!-- Settings Panel -->
45
+ <div id="settingsPanel" class="hidden absolute right-4 top-20 bg-gray-800 border border-white/10 rounded-xl p-4 w-64 space-y-4 z-50">
46
+ <div>
47
+ <label class="block text-sm text-white/80 mb-2">Detection Threshold</label>
48
+ <input type="number" id="thresholdInput" step="0.001" min="0" max="1" value="0.3"
49
+ class="w-full bg-gray-700 border border-white/10 rounded-lg px-3 py-2 text-white">
50
+ </div>
51
+ <div>
52
+ <label class="block text-sm text-white/80 mb-2">Language Model</label>
53
+ <select id="modelSelect" class="w-full bg-gray-700 border border-white/10 rounded-lg px-3 py-2 text-white">
54
+ <option value="english">English - ai4privacy/llama-ai4privacy-english-anonymiser-openpii</option>
55
+ </select>
56
+ </div>
57
+ </div>
58
+
59
+ <div class="max-w-7xl mx-auto px-4 py-8">
60
+ <div class="flex flex-col lg:flex-row gap-8">
61
+ <!-- Input/Output Section -->
62
+ <div class="flex-1 space-y-6">
63
+ <div>
64
+ <label class="block text-sm font-medium text-white/80 mb-2">Input Text</label>
65
+ <textarea
66
+ id="inputText"
67
+ class="w-full p-4 bg-gray-800 border border-white/10 rounded-xl text-white placeholder-white/30 focus:border-blue-500 focus:ring-2 focus:ring-blue-500/30 resize-none transition duration-200 ease-in-out"
68
+ rows="6"
69
+ placeholder="Enter sensitive text to anonymize..."
70
+ ></textarea>
71
+ </div>
72
+
73
+ <div>
74
+ <label class="block text-sm font-medium text-white/80 mb-2">Anonymized Output</label>
75
+ <textarea
76
+ id="outputText"
77
+ class="w-full p-4 bg-gray-800 border border-white/10 rounded-xl text-white/80 resize-none transition duration-200 ease-in-out"
78
+ rows="6"
79
+ readonly
80
+ ></textarea>
81
+ </div>
82
+ </div>
83
+
84
+ <!-- Privacy Mask Panel -->
85
+ <div class="lg:w-96">
86
+ <div class="sticky top-8">
87
+ <label class="block text-sm font-medium text-white/80 mb-2">Detected Entities</label>
88
+ <div class="bg-gray-800 border border-white/10 rounded-xl p-4">
89
+ <div class="mb-4">
90
+ <span id="processingStatus" class="text-xs text-white/40">Ready</span>
91
+ </div>
92
+ <div
93
+ id="privacyMask"
94
+ class="h-96 bg-gray-850 rounded-lg p-3 overflow-y-auto text-sm space-y-2 transition duration-200 ease-in-out"
95
+ >
96
+ <div class="text-center text-white/40 py-4">Processing results will appear here</div>
97
+ </div>
98
+ </div>
99
+ </div>
100
+ </div>
101
+ </div>
102
+ <!-- Sample Buttons -->
103
+ <div class="mt-8 flex justify-center space-x-4">
104
+ <button id="sampleEmailButton" class="bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition">
105
+ Sample Customer Email
106
+ </button>
107
+ <button id="sampleDocumentButton" class="bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition">
108
+ Sample Document
109
+ </button>
110
+ </div>
111
+ </div>
112
+
113
+ <!-- Branding Footer -->
114
+ <div class="fixed bottom-0 left-0 right-0 bg-black/30 border-t border-white/10 py-3">
115
+ <div class="max-w-7xl mx-auto px-4">
116
+ <div class="flex items-center justify-between">
117
+ <div class="text-sm text-white/50">© 2025 Ai4Privacy. All rights reserved. Use at your own risk. Ai4Privacy assumes no responsibility for implementation, accuracy, or any resulting damages.</div>
118
+ <div class="flex items-center space-x-4">
119
+ <span class="text-sm text-white/50">v2.1.0</span>
120
+ <div class="w-px h-4 bg-white/10"></div>
121
+ <img src="ai4privacy-logo.png" alt="Logo" class="h-6 w-6 opacity-70">
122
+ </div>
123
+ </div>
124
+ </div>
125
+ </div>
126
+
127
+ <script type="module" src="components.js"></script>
128
+ </body>
129
+ </html>
inference.js ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { AutoModel, AutoTokenizer } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.4.0';
2
+
3
+ let tokenizer, model;
4
+ let isModelLoaded = false;
5
+
6
+ async function loadModel() {
7
+ try {
8
+ tokenizer = await AutoTokenizer.from_pretrained('ai4privacy/llama-ai4privacy-english-anonymiser-openpii');
9
+ model = await AutoModel.from_pretrained('ai4privacy/llama-ai4privacy-english-anonymiser-openpii', { dtype: "q8" });
10
+ isModelLoaded = true;
11
+ } catch (err) {
12
+ console.error("Error loading model:", err);
13
+ isModelLoaded = false;
14
+ throw err;
15
+ }
16
+ }
17
+
18
+ async function processText(text, threshold = 0.3) {
19
+ if (!isModelLoaded) {
20
+ throw new Error('Model not loaded');
21
+ }
22
+ const inputs = await tokenizer(text);
23
+ const inputTokens = inputs.input_ids.data;
24
+ const tokenStrings = Array.from(inputTokens).map(id =>
25
+ tokenizer.decode([id], { skip_special_tokens: false })
26
+ );
27
+
28
+ const { logits } = await model(inputs);
29
+ const logitsData = Array.from(logits.data);
30
+ const numTokens = tokenStrings.length;
31
+ const numClasses = 3;
32
+
33
+ const logitsPerToken = [];
34
+ for (let i = 0; i < numTokens; i++) {
35
+ logitsPerToken.push(logitsData.slice(i * numClasses, (i + 1) * numClasses));
36
+ }
37
+
38
+ function softmax(logits) {
39
+ const expLogits = logits.map(Math.exp);
40
+ const sumExp = expLogits.reduce((a, b) => a + b, 0);
41
+ return expLogits.map(exp => exp / sumExp);
42
+ }
43
+
44
+ const tokenPredictions = tokenStrings.map((token, i) => {
45
+ const probs = softmax(logitsPerToken[i]);
46
+ const maxSensitive = Math.max(probs[0], probs[1]);
47
+ return {
48
+ token: token,
49
+ start: i,
50
+ end: i + 1,
51
+ probabilities: {
52
+ "B-PRIVATE": probs[0],
53
+ "I-PRIVATE": probs[1],
54
+ "O": probs[2]
55
+ },
56
+ maxSensitiveScore: maxSensitive
57
+ };
58
+ });
59
+
60
+ const aggregated = aggregatePrivacyTokens(tokenPredictions, threshold);
61
+ const { maskedText, replacements } = maskText(tokenPredictions, aggregated);
62
+ return { maskedText, replacements };
63
+ }
64
+
65
+ function aggregatePrivacyTokens(tokenPredictions, threshold) {
66
+ const aggregated = [];
67
+ let i = 0;
68
+ const n = tokenPredictions.length;
69
+
70
+ while (i < n) {
71
+ const currentToken = tokenPredictions[i];
72
+ if (['[CLS]', '[SEP]'].includes(currentToken.token)) {
73
+ i++;
74
+ continue;
75
+ }
76
+ const startsWithSpace = currentToken.token.startsWith(' ');
77
+ const isFirstWord = aggregated.length === 0 && i === 0;
78
+ if (startsWithSpace || isFirstWord) {
79
+ const group = {
80
+ tokens: [currentToken],
81
+ indices: [i],
82
+ scores: [currentToken.maxSensitiveScore],
83
+ startsWithSpace: startsWithSpace
84
+ };
85
+ i++;
86
+ while (i < n &&
87
+ !tokenPredictions[i].token.startsWith(' ') &&
88
+ !['[CLS]', '[SEP]'].includes(tokenPredictions[i].token)) {
89
+ group.tokens.push(tokenPredictions[i]);
90
+ group.indices.push(i);
91
+ group.scores.push(tokenPredictions[i].maxSensitiveScore);
92
+ i++;
93
+ }
94
+ if (Math.max(...group.scores) >= threshold) {
95
+ aggregated.push(group);
96
+ }
97
+ } else {
98
+ i++;
99
+ }
100
+ }
101
+ return aggregated;
102
+ }
103
+
104
+ function maskText(tokenPredictions, aggregatedGroups) {
105
+ const maskedTokens = [];
106
+ const replacements = [];
107
+ const maskedIndices = new Set();
108
+ let redactedCounter = 1;
109
+
110
+ aggregatedGroups.forEach(group => {
111
+ group.indices.forEach(idx => maskedIndices.add(idx));
112
+ });
113
+
114
+ tokenPredictions.forEach((token, idx) => {
115
+ if (['[CLS]', '[SEP]'].includes(token.token)) return;
116
+ if (maskedIndices.has(idx)) {
117
+ const group = aggregatedGroups.find(g => g.indices[0] === idx);
118
+ if (group) {
119
+ const originalTokens = group.tokens.map(t => t.token);
120
+ const originalText = originalTokens
121
+ .map((token, i) => (i === 0 && group.startsWithSpace ? token.trimStart() : token))
122
+ .join('');
123
+ const placeholder = `[PII_${redactedCounter}]`;
124
+ replacements.push({
125
+ original: originalText,
126
+ placeholder: placeholder,
127
+ activation: Math.max(...group.scores)
128
+ });
129
+ redactedCounter++;
130
+ const maskWithSpace = group.startsWithSpace ? ` ${placeholder}` : placeholder;
131
+ maskedTokens.push(maskWithSpace);
132
+ }
133
+ } else {
134
+ maskedTokens.push(token.token);
135
+ }
136
+ });
137
+
138
+ // First join the tokens, then split into lines.
139
+ const joinedText = maskedTokens.join('');
140
+ // For each line, collapse only spaces and tabs.
141
+ const processedLines = joinedText.split('\n').map(line => line.replace(/[ \t]+/g, ' ').trim());
142
+ const maskedText = processedLines.join('\n').trim();
143
+
144
+ return { maskedText, replacements };
145
+ }
146
+
147
+
148
+ export { loadModel, processText, isModelLoaded };