Seicas commited on
Commit
030aeb4
·
verified ·
1 Parent(s): bf50685

Upload privacy.py

Browse files
Files changed (1) hide show
  1. privacy.py +37 -0
privacy.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class MedicalPrivacyProcessor:
2
+ def __init__(self):
3
+ # Anonimleştirme için gerekli kaynakları yükle
4
+ self.pii_patterns = {
5
+ "isim": [r'\b[A-Z][a-z]+ [A-Z][a-z]+\b'],
6
+ "tc_no": [r'\b[1-9][0-9]{10}\b'],
7
+ "telefon": [r'\b0?5[0-9]{2} ?[0-9]{3} ?[0-9]{2} ?[0-9]{2}\b'],
8
+ "email": [r'\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b'],
9
+ "adres": [r'\b[A-Z][a-z]+ (Mahallesi|Mah\.)\b']
10
+ }
11
+
12
+ def anonymize_text(self, text):
13
+ """Metindeki kişisel verileri tespit eder ve anonimleştirir"""
14
+ if not text:
15
+ return text, {}
16
+
17
+ # Basit bir örnek: gerçek uygulamada daha gelişmiş olmalı
18
+ identified_data = {}
19
+ anonymized = text
20
+
21
+ # Her bir PII (Personally Identifiable Information) tipini kontrol et
22
+ import re
23
+ for data_type, patterns in self.pii_patterns.items():
24
+ matches = []
25
+ for pattern in patterns:
26
+ for match in re.finditer(pattern, text):
27
+ matches.append(match.group(0))
28
+
29
+ # Eşleşen verileri anonimleştir
30
+ for i, match in enumerate(matches):
31
+ replacement = f"[{data_type.upper()}_{i+1}]"
32
+ anonymized = anonymized.replace(match, replacement)
33
+
34
+ if matches:
35
+ identified_data[data_type] = matches
36
+
37
+ return anonymized, identified_data