Update README.md
Browse files
README.md
CHANGED
@@ -33,7 +33,6 @@ from gec.tag import rewrite
|
|
33 |
|
34 |
tokenizer = BertTokenizer.from_pretrained('CAMeL-Lab/text-editing-coda')
|
35 |
model = BertForTokenClassification.from_pretrained('CAMeL-Lab/text-editing-coda')
|
36 |
-
edits_map = model.config.id2label
|
37 |
|
38 |
text = 'ุฃูุง ุจุนุทูู ุฑูู
ุชููููู ู ุนููุงูู'.split()
|
39 |
|
@@ -43,7 +42,7 @@ with torch.no_grad():
|
|
43 |
logits = model(**tokenized_text).logits
|
44 |
preds = F.softmax(logits.squeeze(), dim=-1)
|
45 |
preds = torch.argmax(preds, dim=-1).cpu().numpy()
|
46 |
-
edits = [
|
47 |
assert len(edits) == len(tokenized_text['input_ids'][0][1:-1])
|
48 |
|
49 |
print(edits) # ['R_[ุง]K*', 'K*I_[ุง]K', 'K*', 'K*', 'K*', 'K*', 'K*R_[ู]', 'K*', 'MK*', 'R_[ู]']
|
|
|
33 |
|
34 |
tokenizer = BertTokenizer.from_pretrained('CAMeL-Lab/text-editing-coda')
|
35 |
model = BertForTokenClassification.from_pretrained('CAMeL-Lab/text-editing-coda')
|
|
|
36 |
|
37 |
text = 'ุฃูุง ุจุนุทูู ุฑูู
ุชููููู ู ุนููุงูู'.split()
|
38 |
|
|
|
42 |
logits = model(**tokenized_text).logits
|
43 |
preds = F.softmax(logits.squeeze(), dim=-1)
|
44 |
preds = torch.argmax(preds, dim=-1).cpu().numpy()
|
45 |
+
edits = [model.config.id2label[p] for p in preds[1:-1]]
|
46 |
assert len(edits) == len(tokenized_text['input_ids'][0][1:-1])
|
47 |
|
48 |
print(edits) # ['R_[ุง]K*', 'K*I_[ุง]K', 'K*', 'K*', 'K*', 'K*', 'K*R_[ู]', 'K*', 'MK*', 'R_[ู]']
|