WesScivetti commited on
Commit
a8d120c
·
1 Parent(s): 536eb9a

update app

Browse files
Files changed (1) hide show
  1. app.py +252 -7
app.py CHANGED
@@ -2,6 +2,229 @@ import gradio as gr
2
  from transformers import pipeline
3
  import spaces
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  # Load the pipeline (token classification)
6
  #token_classifier = pipeline("token-classification", model="WesScivetti/SNACS_English", aggregation_strategy="simple")
7
 
@@ -12,17 +235,39 @@ def classify_tokens(text):
12
  aggregation_strategy="simple")
13
 
14
  results = token_classifier(text)
 
 
15
  output = ""
16
- for entity in results:
17
- output += f"{entity['word']} ({entity['entity_group']}, score={entity['score']:.2f})\n"
18
- return output.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  iface = gr.Interface(
21
  fn=classify_tokens,
22
- inputs=gr.Textbox(lines=4, placeholder="Enter text to be classified..."),
23
- outputs="text",
24
- title="SNACS Tagging in English",
25
- description="SNACS Tagging in English"
 
26
  )
27
 
28
  iface.launch()
 
2
  from transformers import pipeline
3
  import spaces
4
 
5
+ COLORS = {'None': '#6adf97',
6
+ 'O': '#f18621',
7
+ 'B-p.Purpose-p.Purpose': '#554065',
8
+ 'B-p.SocialRel-p.Gestalt': '#8ea0d7',
9
+ 'B-p.Cost-p.Cost': '#f4b518',
10
+ 'B-p.Topic-p.Topic': '#976cae',
11
+ 'B-p.Originator-p.Gestalt': '#f25ca8',
12
+ 'B-p.Originator-p.Source': '#a08323',
13
+ 'B-p.Recipient-p.Goal': '#725be0',
14
+ 'B-p.Possessor-p.Possessor': '#b5ce9e',
15
+ 'B-p.Gestalt-p.Gestalt': '#34a8a9',
16
+ 'B-p.Ancillary-p.Ancillary': '#73f29f',
17
+ 'I-p.Ancillary-p.Ancillary': '#73f29f',
18
+ 'B-p.ComparisonRef-p.Goal': '#6a26db',
19
+ 'B-p.Source-p.Source': '#5cc334',
20
+ 'I-p.Source-p.Source': '#5cc334',
21
+ 'B-p.Theme-p.Theme': '#5b88c8',
22
+ 'B-p.Locus-p.Locus': '#4c39c8',
23
+ 'B-p.Characteristic-p.Characteristic': '#661943',
24
+ 'B-p.Explanation-p.Explanation': '#852e58',
25
+ 'B-p.OrgMember-p.Possessor': '#e3bd42',
26
+ 'B-p.Goal-p.Goal': '#6bfc3c',
27
+ 'B-p.Manner-p.Manner': '#436097',
28
+ 'B-p.ComparisonRef-p.ComparisonRef': '#4df5a9',
29
+ 'B-p.Cost-p.Locus': '#fe5990',
30
+ 'B-p.Duration-p.Duration': '#5e454e',
31
+ 'B-p.Identity-p.Identity': '#cb49ed',
32
+ 'B-p.OrgMember-p.Gestalt': '#18fdd1',
33
+ 'B-p.Experiencer-p.Goal': '#400043',
34
+ 'B-p.QuantityItem-p.Whole': '#5f3ba4',
35
+ 'B-p.Whole-p.Gestalt': '#497114',
36
+ 'B-p.PartPortion-p.PartPortion': '#edfc14',
37
+ 'I-p.PartPortion-p.PartPortion': '#edfc14',
38
+ 'B-p.Time-p.Time': '#4605b0',
39
+ 'B-p.Approximator-p.Approximator': '#553ee1',
40
+ 'B-p.Direction-p.Direction': '#687447',
41
+ 'B-p.Locus-p.Direction': '#12b336',
42
+ 'B-p.Instrument-p.Path': '#0ccdda',
43
+ 'I-p.Instrument-p.Path': '#0ccdda',
44
+ 'B-p.QuantityItem-p.Gestalt': '#d88be2',
45
+ 'B-p.Species-p.Species': '#4dfc63',
46
+ 'B-p.Org-p.Ancillary': '#6a5b9c',
47
+ 'B-p.Agent-p.Gestalt': '#f373bf',
48
+ 'B-p.SocialRel-p.Ancillary': '#4ee1dc',
49
+ 'B-p.Circumstance-p.Locus': '#38abe5',
50
+ 'B-p.Circumstance-p.Circumstance': '#69caeb',
51
+ 'B-p.Path-p.Path': '#3637c0',
52
+ 'B-p.Whole-p.Whole': '#00d816',
53
+ 'I-p.Locus-p.Locus': '#4c39c8',
54
+ 'B-p.Manner-p.Locus': '#67fc5f',
55
+ 'I-p.Manner-p.Locus': '#67fc5f',
56
+ 'B-p.QuantityItem-p.QuantityItem': '#dbbc2d',
57
+ 'B-p.Theme-p.Purpose': '#cb56ba',
58
+ 'B-p.Goal-p.Locus': '#b3597f',
59
+ 'B-p.Extent-p.Extent': '#5cadfa',
60
+ 'I-p.Extent-p.Extent': '#5cadfa',
61
+ 'B-p.Experiencer-p.Gestalt': '#8275f4',
62
+ 'B-p.Means-p.Means': '#b1bfb7',
63
+ 'B-p.Beneficiary-p.Beneficiary': '#0e9582',
64
+ 'B-p.Org-p.Beneficiary': '#c48ea7',
65
+ 'B-p.Stimulus-p.Topic': '#a6af3a',
66
+ 'B-p.Recipient-p.Ancillary': '#a5ff4b',
67
+ 'B-p.Beneficiary-p.Possessor': '#c941dc',
68
+ 'B-p.Agent-p.Ancillary': '#d18ce9',
69
+ 'B-p.Theme-p.Gestalt': '#b71c4f',
70
+ 'B-p.StartTime-p.StartTime': '#9b3cf9',
71
+ 'I-p.ComparisonRef-p.ComparisonRef': '#4df5a9',
72
+ 'B-p.Cost-p.Extent': '#117f70',
73
+ 'B-p.Manner-p.Source': '#460233',
74
+ 'I-p.Manner-p.Source': '#460233',
75
+ 'B-p.Characteristic-p.Source': '#41c518',
76
+ 'I-p.Characteristic-p.Source': '#41c518',
77
+ 'B-p.Locus-p.Path': '#d3c136',
78
+ 'I-p.Topic-p.Topic': '#976cae',
79
+ 'B-p.Manner-p.ComparisonRef': '#32cbcb',
80
+ 'B-p.Extent-p.Whole': '#94454f',
81
+ 'I-p.Extent-p.Whole': '#94454f',
82
+ 'B-p.Experiencer-p.Beneficiary': '#1f2d98',
83
+ 'B-p.Theme-p.ComparisonRef': '#ef3f97',
84
+ 'I-p.Time-p.Time': '#4605b0',
85
+ 'B-p.Stuff-p.Stuff': '#9919e8',
86
+ 'B-p.Theme-p.Goal': '#d7c6d1',
87
+ 'B-p.Interval-p.Interval': '#042206',
88
+ 'B-p.Time-p.Whole': '#ecf0a1',
89
+ 'I-p.Circumstance-p.Circumstance': '#69caeb',
90
+ 'B-p.Stimulus-p.Beneficiary': '#af168a',
91
+ 'B-p.Time-p.Interval': '#5cc4a8',
92
+ 'B-p.Characteristic-p.Locus': '#ac54e6',
93
+ 'B-p.Characteristic-p.Extent': '#0ec04c',
94
+ 'B-p.EndTime-p.EndTime': '#29e89e',
95
+ 'B-p.Experiencer-p.Ancillary': '#bce155',
96
+ 'B-p.Agent-p.Agent': '#aac43b',
97
+ 'B-p.PartPortion-p.Source': '#9eb3c3',
98
+ 'B-p.Org-p.Locus': '#434851',
99
+ 'I-p.Characteristic-p.Locus': '#ac54e6',
100
+ 'B-p.Locus-p.Source': '#7121d7',
101
+ 'I-p.Locus-p.Source': '#7121d7',
102
+ 'B-p.Duration-p.Extent': '#ca1096',
103
+ 'B-p.Characteristic-p.Identity': '#345c8d',
104
+ 'B-p.Possession-p.PartPortion': '#e592aa',
105
+ 'B-p.Possession-p.Theme': '#a59bec',
106
+ 'B-p.Whole-p.Locus': '#0bc209',
107
+ 'B-p.Direction-p.Goal': '#9d90cd',
108
+ 'B-p.Gestalt-p.Locus': '#97f830',
109
+ 'B-p.Org-p.Gestalt': '#2f2c3c',
110
+ 'B-p.Stimulus-p.Goal': '#c40f02',
111
+ 'B-p.Theme-p.Instrument': '#a312ed',
112
+ 'B-p.Stimulus-p.Force': '#d98ddb',
113
+ 'I-p.Purpose-p.Purpose': '#554065',
114
+ 'B-p.Beneficiary-p.Theme': '#68fdb4',
115
+ 'B-p.Characteristic-p.Goal': '#a60b97',
116
+ 'I-p.Characteristic-p.Goal': '#a60b97',
117
+ 'B-p.Time-p.Goal': '#97567c',
118
+ 'I-p.Direction-p.Direction': '#687447',
119
+ 'B-p.Explanation-p.Time': '#90f72f',
120
+ 'B-p.Instrument-p.Manner': '#2b1869',
121
+ 'B-p.Possession-p.Ancillary': '#a9672c',
122
+ 'B-p.Instrument-p.Instrument': '#6eb1ef',
123
+ 'B-p.Ensemble-p.Ancillary': '#93fb41',
124
+ 'I-p.Cost-p.Locus': '#fe5990',
125
+ 'B-p.Recipient-p.Gestalt': '#0674a2',
126
+ 'B-p.Agent-p.Source': '#bf427f',
127
+ 'I-p.Circumstance-p.Locus': '#38abe5',
128
+ 'B-p.Whole-p.Source': '#dae5cb',
129
+ 'B-p.Stimulus-p.Explanation': '#108bd6',
130
+ 'B-p.Stimulus-p.Direction': '#aa0f64',
131
+ 'I-p.Explanation-p.Explanation': '#852e58',
132
+ 'I-p.Approximator-p.Approximator': '#553ee1',
133
+ 'B-p.ComparisonRef-p.Purpose': '#65fb63',
134
+ 'B-p.ComparisonRef-p.Locus': '#e48da2',
135
+ 'I-p.QuantityItem-p.Whole': '#5f3ba4',
136
+ 'B-p.Theme-p.Ancillary': '#685b19',
137
+ 'I-p.Manner-p.Manner': '#436097',
138
+ 'B-p.Identity-p.ComparisonRef': '#caac20',
139
+ 'I-p.Goal-p.Locus': '#b3597f',
140
+ 'B-p.QuantityItem-p.Stuff': '#a1f649',
141
+ 'B-p.Recipient-p.Direction': '#a8ba9d',
142
+ 'B-p.Path-p.Locus': '#03c408',
143
+ 'B-p.Originator-p.Agent': '#b46878',
144
+ 'B-p.Beneficiary-p.Gestalt': '#26eaf0',
145
+ 'B-p.Possessor-p.Ancillary': '#dd8d5e',
146
+ 'B-p.Beneficiary-p.Goal': '#212bd7',
147
+ 'B-p.OrgMember-p.PartPortion': '#bd7620',
148
+ 'B-p.PartPortion-p.ComparisonRef': '#6fd197',
149
+ 'B-p.Frequency-p.Extent': '#8a9e22',
150
+ 'B-p.Beneficiary-p.Direction': '#094599',
151
+ 'B-p.Characteristic-p.Stuff': '#02889c',
152
+ 'B-p.Manner-p.Extent': '#686d06',
153
+ 'I-p.Cost-p.Cost': '#f4b518',
154
+ 'B-p.Theme-p.Whole': '#5a51fb',
155
+ 'B-p.Frequency-p.Frequency': '#d26bc7',
156
+ 'B-p.Purpose-p.Locus': '#80e1ac',
157
+ 'B-p.Force-p.Gestalt': '#1063d3',
158
+ 'B-p.Characteristic-p.Ancillary': '#947622',
159
+ 'B-p.ComparisonRef-p.Source': '#b0954c',
160
+ 'B-p.Org-p.Instrument': '#e2bfce',
161
+ 'B-p.Theme-p.Characteristic': '#44b67f',
162
+ 'B-p.Characteristic-p.Topic': '#b90264',
163
+ 'I-p.Characteristic-p.Topic': '#b90264',
164
+ 'B-p.Locus-p.Goal': '#5d62c0',
165
+ 'B-p.Locus-p.Whole': '#e4222b',
166
+ 'B-p.Theme-p.Locus': '#60211c',
167
+ 'B-p.Frequency-p.Manner': '#6b5831',
168
+ 'I-p.Frequency-p.Manner': '#6b5831',
169
+ 'I-p.Ensemble-p.Ancillary': '#93fb41',
170
+ 'B-p.Locus-p.Ancillary': '#8de37d',
171
+ 'B-p.Topic-p.Identity': '#10a385',
172
+ 'B-p.Org-p.Goal': '#b42090',
173
+ 'B-p.SetIteration-p.SetIteration': '#11e7a6',
174
+ 'B-p.PartPortion-p.Goal': '#ee8159',
175
+ 'B-p.ComparisonRef-p.Ancillary': '#3270a9',
176
+ 'B-p.Force-p.Force': '#dc6a3a',
177
+ 'B-p.Approximator-p.Extent': '#005d48',
178
+ 'I-p.Duration-p.Duration': '#5e454e',
179
+ 'B-p.Manner-p.Stuff': '#920903',
180
+ 'B-p.Path-p.Goal': '#543e80',
181
+ 'B-p.Explanation-p.Source': '#e65656',
182
+ 'B-p.Topic-p.Goal': '#31bcfc',
183
+ 'I-p.Manner-p.ComparisonRef': '#32cbcb',
184
+ 'B-p.Possession-p.Locus': '#1312e3',
185
+ 'B-p.Circumstance-p.Path': '#8b9109',
186
+ 'B-p.Gestalt-p.Source': '#7050ae',
187
+ 'B-p.Agent-p.Locus': '#c9846e',
188
+ 'B-p.Stimulus-p.Source': '#180a5f',
189
+ 'B-p.Org-p.Whole': '#2a3053',
190
+ 'I-p.Locus-p.Direction': '#12b336',
191
+ 'B-p.Org-p.Source': '#ad1e85',
192
+ 'B-p.Time-p.Extent': '#b1d4fa',
193
+ 'I-p.Goal-p.Goal': '#6bfc3c',
194
+ 'B-p.Possessor-p.Locus': '#ae306d',
195
+ 'B-p.Force-p.Source': '#727a29',
196
+ 'B-p.Gestalt-p.Topic': '#f47f98',
197
+ 'I-p.Whole-p.Whole': '#00d816',
198
+ 'B-p.Cost-p.Manner': '#a61141',
199
+ 'B-p.Means-p.Path': '#54d11a',
200
+ 'B-p.Originator-p.Instrument': '#44fe8a',
201
+ 'B-p.PartPortion-p.Instrument': '#4f7170',
202
+ 'B-p.Possession-p.Possession': '#d3abe4',
203
+ 'I-p.Possession-p.Possession': '#d3abe4',
204
+ 'B-p.Agent-p.Beneficiary': '#1c515e',
205
+ 'B-p.Instrument-p.Locus': '#4460b0',
206
+ 'B-p.Instrument-p.Theme': '#1bed0b',
207
+ 'B-p.Duration-p.Gestalt': '#2f787f',
208
+ 'I-p.Path-p.Path': '#3637c0',
209
+ 'B-p.Theme-p.Source': '#54a6f9',
210
+ 'B-p.Time-p.Gestalt': '#24ff12',
211
+ 'B-p.Time-p.Direction': '#9e135c',
212
+ 'B-p.Goal-p.Whole': '#5fad91',
213
+ 'B-p.Explanation-p.Manner': '#983754',
214
+ 'I-p.Explanation-p.Manner': '#983754',
215
+ 'I-p.Time-p.Interval': '#5cc4a8',
216
+ 'I-p.Org-p.Locus': '#434851',
217
+ 'B-p.Gestalt-p.Purpose': '#9ff474',
218
+ 'B-p.Stimulus-p.Theme': '#12dfa1',
219
+ 'B-p.Locus-p.Gestalt': '#636042',
220
+ 'B-p.Extent-p.Identity': '#1414fd',
221
+ 'B-p.ComparisonRef-p.Beneficiary': '#f47ef3',
222
+ 'B-p.Experiencer-p.Agent': '#21883e',
223
+ 'B-p.Time-p.Duration': '#98b42b',
224
+ 'B-p.SocialRel-p.Source': '#4f3f8f',
225
+ 'B-p.Whole-p.Circumstance': '#c70411',
226
+ 'B-p.Purpose-p.Goal': '#f2f199'}
227
+
228
  # Load the pipeline (token classification)
229
  #token_classifier = pipeline("token-classification", model="WesScivetti/SNACS_English", aggregation_strategy="simple")
230
 
 
235
  aggregation_strategy="simple")
236
 
237
  results = token_classifier(text)
238
+
239
+ sorted_results = sorted(results, key=lambda x: x["start"])
240
  output = ""
241
+ last_idx = 0
242
+
243
+ for prep in sorted_results:
244
+ start = prep["start"]
245
+ end = prep["end"]
246
+ label = prep["entity_group"]
247
+ word = html.escape(text[start:end])
248
+
249
+ # Add untagged text before the entity
250
+ output += html.escape(text[last_idx:start])
251
+
252
+ # Add highlighted entity
253
+ color = COLORS.get(label, "#D3D3D3") # default light gray
254
+ output += f"<span style='background-color: {color}; padding: 2px; border-radius: 4px;' title='{label}'>{word}</span>"
255
+
256
+ last_idx = end
257
+
258
+ # Add remaining text
259
+ output += html.escape(text[last_idx:])
260
+
261
+ return f"<div style='font-family: sans-serif; line-height: 1.6;'>{output}</div>"
262
+
263
 
264
  iface = gr.Interface(
265
  fn=classify_tokens,
266
+ inputs=gr.Textbox(lines=4, placeholder="Enter a sentence...", label="Input Text"),
267
+ outputs=gr.HTML(label="SNACS Tagged Sentence"),
268
+ title="SNACS English Classification",
269
+ description="SNACS English Classification. See the <a href='https://arxiv.org/abs/1704.02134'>SNACS guidelines</a> for details.",
270
+ theme="default"
271
  )
272
 
273
  iface.launch()