Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
a8d120c
1
Parent(s):
536eb9a
update app
Browse files
app.py
CHANGED
@@ -2,6 +2,229 @@ import gradio as gr
|
|
2 |
from transformers import pipeline
|
3 |
import spaces
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
# Load the pipeline (token classification)
|
6 |
#token_classifier = pipeline("token-classification", model="WesScivetti/SNACS_English", aggregation_strategy="simple")
|
7 |
|
@@ -12,17 +235,39 @@ def classify_tokens(text):
|
|
12 |
aggregation_strategy="simple")
|
13 |
|
14 |
results = token_classifier(text)
|
|
|
|
|
15 |
output = ""
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
iface = gr.Interface(
|
21 |
fn=classify_tokens,
|
22 |
-
inputs=gr.Textbox(lines=4, placeholder="Enter
|
23 |
-
outputs="
|
24 |
-
title="SNACS
|
25 |
-
description="SNACS
|
|
|
26 |
)
|
27 |
|
28 |
iface.launch()
|
|
|
2 |
from transformers import pipeline
|
3 |
import spaces
|
4 |
|
5 |
+
COLORS = {'None': '#6adf97',
|
6 |
+
'O': '#f18621',
|
7 |
+
'B-p.Purpose-p.Purpose': '#554065',
|
8 |
+
'B-p.SocialRel-p.Gestalt': '#8ea0d7',
|
9 |
+
'B-p.Cost-p.Cost': '#f4b518',
|
10 |
+
'B-p.Topic-p.Topic': '#976cae',
|
11 |
+
'B-p.Originator-p.Gestalt': '#f25ca8',
|
12 |
+
'B-p.Originator-p.Source': '#a08323',
|
13 |
+
'B-p.Recipient-p.Goal': '#725be0',
|
14 |
+
'B-p.Possessor-p.Possessor': '#b5ce9e',
|
15 |
+
'B-p.Gestalt-p.Gestalt': '#34a8a9',
|
16 |
+
'B-p.Ancillary-p.Ancillary': '#73f29f',
|
17 |
+
'I-p.Ancillary-p.Ancillary': '#73f29f',
|
18 |
+
'B-p.ComparisonRef-p.Goal': '#6a26db',
|
19 |
+
'B-p.Source-p.Source': '#5cc334',
|
20 |
+
'I-p.Source-p.Source': '#5cc334',
|
21 |
+
'B-p.Theme-p.Theme': '#5b88c8',
|
22 |
+
'B-p.Locus-p.Locus': '#4c39c8',
|
23 |
+
'B-p.Characteristic-p.Characteristic': '#661943',
|
24 |
+
'B-p.Explanation-p.Explanation': '#852e58',
|
25 |
+
'B-p.OrgMember-p.Possessor': '#e3bd42',
|
26 |
+
'B-p.Goal-p.Goal': '#6bfc3c',
|
27 |
+
'B-p.Manner-p.Manner': '#436097',
|
28 |
+
'B-p.ComparisonRef-p.ComparisonRef': '#4df5a9',
|
29 |
+
'B-p.Cost-p.Locus': '#fe5990',
|
30 |
+
'B-p.Duration-p.Duration': '#5e454e',
|
31 |
+
'B-p.Identity-p.Identity': '#cb49ed',
|
32 |
+
'B-p.OrgMember-p.Gestalt': '#18fdd1',
|
33 |
+
'B-p.Experiencer-p.Goal': '#400043',
|
34 |
+
'B-p.QuantityItem-p.Whole': '#5f3ba4',
|
35 |
+
'B-p.Whole-p.Gestalt': '#497114',
|
36 |
+
'B-p.PartPortion-p.PartPortion': '#edfc14',
|
37 |
+
'I-p.PartPortion-p.PartPortion': '#edfc14',
|
38 |
+
'B-p.Time-p.Time': '#4605b0',
|
39 |
+
'B-p.Approximator-p.Approximator': '#553ee1',
|
40 |
+
'B-p.Direction-p.Direction': '#687447',
|
41 |
+
'B-p.Locus-p.Direction': '#12b336',
|
42 |
+
'B-p.Instrument-p.Path': '#0ccdda',
|
43 |
+
'I-p.Instrument-p.Path': '#0ccdda',
|
44 |
+
'B-p.QuantityItem-p.Gestalt': '#d88be2',
|
45 |
+
'B-p.Species-p.Species': '#4dfc63',
|
46 |
+
'B-p.Org-p.Ancillary': '#6a5b9c',
|
47 |
+
'B-p.Agent-p.Gestalt': '#f373bf',
|
48 |
+
'B-p.SocialRel-p.Ancillary': '#4ee1dc',
|
49 |
+
'B-p.Circumstance-p.Locus': '#38abe5',
|
50 |
+
'B-p.Circumstance-p.Circumstance': '#69caeb',
|
51 |
+
'B-p.Path-p.Path': '#3637c0',
|
52 |
+
'B-p.Whole-p.Whole': '#00d816',
|
53 |
+
'I-p.Locus-p.Locus': '#4c39c8',
|
54 |
+
'B-p.Manner-p.Locus': '#67fc5f',
|
55 |
+
'I-p.Manner-p.Locus': '#67fc5f',
|
56 |
+
'B-p.QuantityItem-p.QuantityItem': '#dbbc2d',
|
57 |
+
'B-p.Theme-p.Purpose': '#cb56ba',
|
58 |
+
'B-p.Goal-p.Locus': '#b3597f',
|
59 |
+
'B-p.Extent-p.Extent': '#5cadfa',
|
60 |
+
'I-p.Extent-p.Extent': '#5cadfa',
|
61 |
+
'B-p.Experiencer-p.Gestalt': '#8275f4',
|
62 |
+
'B-p.Means-p.Means': '#b1bfb7',
|
63 |
+
'B-p.Beneficiary-p.Beneficiary': '#0e9582',
|
64 |
+
'B-p.Org-p.Beneficiary': '#c48ea7',
|
65 |
+
'B-p.Stimulus-p.Topic': '#a6af3a',
|
66 |
+
'B-p.Recipient-p.Ancillary': '#a5ff4b',
|
67 |
+
'B-p.Beneficiary-p.Possessor': '#c941dc',
|
68 |
+
'B-p.Agent-p.Ancillary': '#d18ce9',
|
69 |
+
'B-p.Theme-p.Gestalt': '#b71c4f',
|
70 |
+
'B-p.StartTime-p.StartTime': '#9b3cf9',
|
71 |
+
'I-p.ComparisonRef-p.ComparisonRef': '#4df5a9',
|
72 |
+
'B-p.Cost-p.Extent': '#117f70',
|
73 |
+
'B-p.Manner-p.Source': '#460233',
|
74 |
+
'I-p.Manner-p.Source': '#460233',
|
75 |
+
'B-p.Characteristic-p.Source': '#41c518',
|
76 |
+
'I-p.Characteristic-p.Source': '#41c518',
|
77 |
+
'B-p.Locus-p.Path': '#d3c136',
|
78 |
+
'I-p.Topic-p.Topic': '#976cae',
|
79 |
+
'B-p.Manner-p.ComparisonRef': '#32cbcb',
|
80 |
+
'B-p.Extent-p.Whole': '#94454f',
|
81 |
+
'I-p.Extent-p.Whole': '#94454f',
|
82 |
+
'B-p.Experiencer-p.Beneficiary': '#1f2d98',
|
83 |
+
'B-p.Theme-p.ComparisonRef': '#ef3f97',
|
84 |
+
'I-p.Time-p.Time': '#4605b0',
|
85 |
+
'B-p.Stuff-p.Stuff': '#9919e8',
|
86 |
+
'B-p.Theme-p.Goal': '#d7c6d1',
|
87 |
+
'B-p.Interval-p.Interval': '#042206',
|
88 |
+
'B-p.Time-p.Whole': '#ecf0a1',
|
89 |
+
'I-p.Circumstance-p.Circumstance': '#69caeb',
|
90 |
+
'B-p.Stimulus-p.Beneficiary': '#af168a',
|
91 |
+
'B-p.Time-p.Interval': '#5cc4a8',
|
92 |
+
'B-p.Characteristic-p.Locus': '#ac54e6',
|
93 |
+
'B-p.Characteristic-p.Extent': '#0ec04c',
|
94 |
+
'B-p.EndTime-p.EndTime': '#29e89e',
|
95 |
+
'B-p.Experiencer-p.Ancillary': '#bce155',
|
96 |
+
'B-p.Agent-p.Agent': '#aac43b',
|
97 |
+
'B-p.PartPortion-p.Source': '#9eb3c3',
|
98 |
+
'B-p.Org-p.Locus': '#434851',
|
99 |
+
'I-p.Characteristic-p.Locus': '#ac54e6',
|
100 |
+
'B-p.Locus-p.Source': '#7121d7',
|
101 |
+
'I-p.Locus-p.Source': '#7121d7',
|
102 |
+
'B-p.Duration-p.Extent': '#ca1096',
|
103 |
+
'B-p.Characteristic-p.Identity': '#345c8d',
|
104 |
+
'B-p.Possession-p.PartPortion': '#e592aa',
|
105 |
+
'B-p.Possession-p.Theme': '#a59bec',
|
106 |
+
'B-p.Whole-p.Locus': '#0bc209',
|
107 |
+
'B-p.Direction-p.Goal': '#9d90cd',
|
108 |
+
'B-p.Gestalt-p.Locus': '#97f830',
|
109 |
+
'B-p.Org-p.Gestalt': '#2f2c3c',
|
110 |
+
'B-p.Stimulus-p.Goal': '#c40f02',
|
111 |
+
'B-p.Theme-p.Instrument': '#a312ed',
|
112 |
+
'B-p.Stimulus-p.Force': '#d98ddb',
|
113 |
+
'I-p.Purpose-p.Purpose': '#554065',
|
114 |
+
'B-p.Beneficiary-p.Theme': '#68fdb4',
|
115 |
+
'B-p.Characteristic-p.Goal': '#a60b97',
|
116 |
+
'I-p.Characteristic-p.Goal': '#a60b97',
|
117 |
+
'B-p.Time-p.Goal': '#97567c',
|
118 |
+
'I-p.Direction-p.Direction': '#687447',
|
119 |
+
'B-p.Explanation-p.Time': '#90f72f',
|
120 |
+
'B-p.Instrument-p.Manner': '#2b1869',
|
121 |
+
'B-p.Possession-p.Ancillary': '#a9672c',
|
122 |
+
'B-p.Instrument-p.Instrument': '#6eb1ef',
|
123 |
+
'B-p.Ensemble-p.Ancillary': '#93fb41',
|
124 |
+
'I-p.Cost-p.Locus': '#fe5990',
|
125 |
+
'B-p.Recipient-p.Gestalt': '#0674a2',
|
126 |
+
'B-p.Agent-p.Source': '#bf427f',
|
127 |
+
'I-p.Circumstance-p.Locus': '#38abe5',
|
128 |
+
'B-p.Whole-p.Source': '#dae5cb',
|
129 |
+
'B-p.Stimulus-p.Explanation': '#108bd6',
|
130 |
+
'B-p.Stimulus-p.Direction': '#aa0f64',
|
131 |
+
'I-p.Explanation-p.Explanation': '#852e58',
|
132 |
+
'I-p.Approximator-p.Approximator': '#553ee1',
|
133 |
+
'B-p.ComparisonRef-p.Purpose': '#65fb63',
|
134 |
+
'B-p.ComparisonRef-p.Locus': '#e48da2',
|
135 |
+
'I-p.QuantityItem-p.Whole': '#5f3ba4',
|
136 |
+
'B-p.Theme-p.Ancillary': '#685b19',
|
137 |
+
'I-p.Manner-p.Manner': '#436097',
|
138 |
+
'B-p.Identity-p.ComparisonRef': '#caac20',
|
139 |
+
'I-p.Goal-p.Locus': '#b3597f',
|
140 |
+
'B-p.QuantityItem-p.Stuff': '#a1f649',
|
141 |
+
'B-p.Recipient-p.Direction': '#a8ba9d',
|
142 |
+
'B-p.Path-p.Locus': '#03c408',
|
143 |
+
'B-p.Originator-p.Agent': '#b46878',
|
144 |
+
'B-p.Beneficiary-p.Gestalt': '#26eaf0',
|
145 |
+
'B-p.Possessor-p.Ancillary': '#dd8d5e',
|
146 |
+
'B-p.Beneficiary-p.Goal': '#212bd7',
|
147 |
+
'B-p.OrgMember-p.PartPortion': '#bd7620',
|
148 |
+
'B-p.PartPortion-p.ComparisonRef': '#6fd197',
|
149 |
+
'B-p.Frequency-p.Extent': '#8a9e22',
|
150 |
+
'B-p.Beneficiary-p.Direction': '#094599',
|
151 |
+
'B-p.Characteristic-p.Stuff': '#02889c',
|
152 |
+
'B-p.Manner-p.Extent': '#686d06',
|
153 |
+
'I-p.Cost-p.Cost': '#f4b518',
|
154 |
+
'B-p.Theme-p.Whole': '#5a51fb',
|
155 |
+
'B-p.Frequency-p.Frequency': '#d26bc7',
|
156 |
+
'B-p.Purpose-p.Locus': '#80e1ac',
|
157 |
+
'B-p.Force-p.Gestalt': '#1063d3',
|
158 |
+
'B-p.Characteristic-p.Ancillary': '#947622',
|
159 |
+
'B-p.ComparisonRef-p.Source': '#b0954c',
|
160 |
+
'B-p.Org-p.Instrument': '#e2bfce',
|
161 |
+
'B-p.Theme-p.Characteristic': '#44b67f',
|
162 |
+
'B-p.Characteristic-p.Topic': '#b90264',
|
163 |
+
'I-p.Characteristic-p.Topic': '#b90264',
|
164 |
+
'B-p.Locus-p.Goal': '#5d62c0',
|
165 |
+
'B-p.Locus-p.Whole': '#e4222b',
|
166 |
+
'B-p.Theme-p.Locus': '#60211c',
|
167 |
+
'B-p.Frequency-p.Manner': '#6b5831',
|
168 |
+
'I-p.Frequency-p.Manner': '#6b5831',
|
169 |
+
'I-p.Ensemble-p.Ancillary': '#93fb41',
|
170 |
+
'B-p.Locus-p.Ancillary': '#8de37d',
|
171 |
+
'B-p.Topic-p.Identity': '#10a385',
|
172 |
+
'B-p.Org-p.Goal': '#b42090',
|
173 |
+
'B-p.SetIteration-p.SetIteration': '#11e7a6',
|
174 |
+
'B-p.PartPortion-p.Goal': '#ee8159',
|
175 |
+
'B-p.ComparisonRef-p.Ancillary': '#3270a9',
|
176 |
+
'B-p.Force-p.Force': '#dc6a3a',
|
177 |
+
'B-p.Approximator-p.Extent': '#005d48',
|
178 |
+
'I-p.Duration-p.Duration': '#5e454e',
|
179 |
+
'B-p.Manner-p.Stuff': '#920903',
|
180 |
+
'B-p.Path-p.Goal': '#543e80',
|
181 |
+
'B-p.Explanation-p.Source': '#e65656',
|
182 |
+
'B-p.Topic-p.Goal': '#31bcfc',
|
183 |
+
'I-p.Manner-p.ComparisonRef': '#32cbcb',
|
184 |
+
'B-p.Possession-p.Locus': '#1312e3',
|
185 |
+
'B-p.Circumstance-p.Path': '#8b9109',
|
186 |
+
'B-p.Gestalt-p.Source': '#7050ae',
|
187 |
+
'B-p.Agent-p.Locus': '#c9846e',
|
188 |
+
'B-p.Stimulus-p.Source': '#180a5f',
|
189 |
+
'B-p.Org-p.Whole': '#2a3053',
|
190 |
+
'I-p.Locus-p.Direction': '#12b336',
|
191 |
+
'B-p.Org-p.Source': '#ad1e85',
|
192 |
+
'B-p.Time-p.Extent': '#b1d4fa',
|
193 |
+
'I-p.Goal-p.Goal': '#6bfc3c',
|
194 |
+
'B-p.Possessor-p.Locus': '#ae306d',
|
195 |
+
'B-p.Force-p.Source': '#727a29',
|
196 |
+
'B-p.Gestalt-p.Topic': '#f47f98',
|
197 |
+
'I-p.Whole-p.Whole': '#00d816',
|
198 |
+
'B-p.Cost-p.Manner': '#a61141',
|
199 |
+
'B-p.Means-p.Path': '#54d11a',
|
200 |
+
'B-p.Originator-p.Instrument': '#44fe8a',
|
201 |
+
'B-p.PartPortion-p.Instrument': '#4f7170',
|
202 |
+
'B-p.Possession-p.Possession': '#d3abe4',
|
203 |
+
'I-p.Possession-p.Possession': '#d3abe4',
|
204 |
+
'B-p.Agent-p.Beneficiary': '#1c515e',
|
205 |
+
'B-p.Instrument-p.Locus': '#4460b0',
|
206 |
+
'B-p.Instrument-p.Theme': '#1bed0b',
|
207 |
+
'B-p.Duration-p.Gestalt': '#2f787f',
|
208 |
+
'I-p.Path-p.Path': '#3637c0',
|
209 |
+
'B-p.Theme-p.Source': '#54a6f9',
|
210 |
+
'B-p.Time-p.Gestalt': '#24ff12',
|
211 |
+
'B-p.Time-p.Direction': '#9e135c',
|
212 |
+
'B-p.Goal-p.Whole': '#5fad91',
|
213 |
+
'B-p.Explanation-p.Manner': '#983754',
|
214 |
+
'I-p.Explanation-p.Manner': '#983754',
|
215 |
+
'I-p.Time-p.Interval': '#5cc4a8',
|
216 |
+
'I-p.Org-p.Locus': '#434851',
|
217 |
+
'B-p.Gestalt-p.Purpose': '#9ff474',
|
218 |
+
'B-p.Stimulus-p.Theme': '#12dfa1',
|
219 |
+
'B-p.Locus-p.Gestalt': '#636042',
|
220 |
+
'B-p.Extent-p.Identity': '#1414fd',
|
221 |
+
'B-p.ComparisonRef-p.Beneficiary': '#f47ef3',
|
222 |
+
'B-p.Experiencer-p.Agent': '#21883e',
|
223 |
+
'B-p.Time-p.Duration': '#98b42b',
|
224 |
+
'B-p.SocialRel-p.Source': '#4f3f8f',
|
225 |
+
'B-p.Whole-p.Circumstance': '#c70411',
|
226 |
+
'B-p.Purpose-p.Goal': '#f2f199'}
|
227 |
+
|
228 |
# Load the pipeline (token classification)
|
229 |
#token_classifier = pipeline("token-classification", model="WesScivetti/SNACS_English", aggregation_strategy="simple")
|
230 |
|
|
|
235 |
aggregation_strategy="simple")
|
236 |
|
237 |
results = token_classifier(text)
|
238 |
+
|
239 |
+
sorted_results = sorted(results, key=lambda x: x["start"])
|
240 |
output = ""
|
241 |
+
last_idx = 0
|
242 |
+
|
243 |
+
for prep in sorted_results:
|
244 |
+
start = prep["start"]
|
245 |
+
end = prep["end"]
|
246 |
+
label = prep["entity_group"]
|
247 |
+
word = html.escape(text[start:end])
|
248 |
+
|
249 |
+
# Add untagged text before the entity
|
250 |
+
output += html.escape(text[last_idx:start])
|
251 |
+
|
252 |
+
# Add highlighted entity
|
253 |
+
color = COLORS.get(label, "#D3D3D3") # default light gray
|
254 |
+
output += f"<span style='background-color: {color}; padding: 2px; border-radius: 4px;' title='{label}'>{word}</span>"
|
255 |
+
|
256 |
+
last_idx = end
|
257 |
+
|
258 |
+
# Add remaining text
|
259 |
+
output += html.escape(text[last_idx:])
|
260 |
+
|
261 |
+
return f"<div style='font-family: sans-serif; line-height: 1.6;'>{output}</div>"
|
262 |
+
|
263 |
|
264 |
iface = gr.Interface(
|
265 |
fn=classify_tokens,
|
266 |
+
inputs=gr.Textbox(lines=4, placeholder="Enter a sentence...", label="Input Text"),
|
267 |
+
outputs=gr.HTML(label="SNACS Tagged Sentence"),
|
268 |
+
title="SNACS English Classification",
|
269 |
+
description="SNACS English Classification. See the <a href='https://arxiv.org/abs/1704.02134'>SNACS guidelines</a> for details.",
|
270 |
+
theme="default"
|
271 |
)
|
272 |
|
273 |
iface.launch()
|