Update benchmark and add document-wise bar plot
Browse files- app.py +23 -0
- document_results.csv +254 -0
- leaderboard.csv +25 -17
- requirements.txt +2 -1
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import pandas as pd
|
|
|
4 |
from lexoid.api import parse
|
5 |
|
6 |
parser_options = ["LLM_PARSE", "STATIC_PARSE", "AUTO"]
|
@@ -163,9 +164,31 @@ with gr.Blocks(title="Lexoid Document Parser") as app:
|
|
163 |
|
164 |
# Leaderboard loaded from leaderboard.csv
|
165 |
df = pd.read_csv("leaderboard.csv")
|
|
|
|
|
|
|
|
|
166 |
leaderboard = gr.Dataframe(
|
167 |
value=df,
|
168 |
label="Leaderboard",
|
169 |
)
|
170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
app.launch()
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import pandas as pd
|
4 |
+
import plotly.express as px
|
5 |
from lexoid.api import parse
|
6 |
|
7 |
parser_options = ["LLM_PARSE", "STATIC_PARSE", "AUTO"]
|
|
|
164 |
|
165 |
# Leaderboard loaded from leaderboard.csv
|
166 |
df = pd.read_csv("leaderboard.csv")
|
167 |
+
# Sort df by `sequence_matcher` in descending order and use new index as "Rank"
|
168 |
+
df = df.sort_values(by="sequence_matcher", ascending=False).reset_index(drop=True)
|
169 |
+
df.index += 1
|
170 |
+
df.index.name = "Rank"
|
171 |
leaderboard = gr.Dataframe(
|
172 |
value=df,
|
173 |
label="Leaderboard",
|
174 |
)
|
175 |
|
176 |
+
df = pd.read_csv("document_results.csv")
|
177 |
+
fig = px.bar(
|
178 |
+
df,
|
179 |
+
x="Input File",
|
180 |
+
y="sequence_matcher",
|
181 |
+
color="model",
|
182 |
+
labels={
|
183 |
+
"Input File": "Document",
|
184 |
+
"sequence_matcher": "Sequence Matcher Score",
|
185 |
+
"model": "Model",
|
186 |
+
},
|
187 |
+
barmode="group",
|
188 |
+
title="Sequence Matcher Scores by Document and Model",
|
189 |
+
)
|
190 |
+
|
191 |
+
gr.Plot(value=fig)
|
192 |
+
|
193 |
+
|
194 |
app.launch()
|
document_results.csv
ADDED
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Input File,sequence_matcher,cosine,jaccard,precision,recall,f1_score,model,parser_type
|
2 |
+
benchmark,0.9943117178612059,0.9976483450234231,0.9519230769230769,0.9658536585365853,0.9850746268656716,0.9753694581280787,claude-3-5-sonnet-20241022,LLM_PARSE1752195527
|
3 |
+
benchmark,0.9775376741541086,0.9964731546055415,0.9468599033816425,0.9560975609756097,0.98989898989899,0.9727047146401985,claude-3-7-sonnet-20250219,LLM_PARSE1752192118
|
4 |
+
benchmark,0.9823663253697383,0.9976483450234231,0.9660194174757282,0.9707317073170731,0.995,0.982716049382716,claude-opus-4-20250514,LLM_PARSE1752191645
|
5 |
+
benchmark,0.9872122762148338,0.9953021449322264,0.9471153846153846,0.9609756097560975,0.985,0.9728395061728395,claude-sonnet-4-20250514,LLM_PARSE1752191876
|
6 |
+
costco_bill,0.9504,0.9843196907197536,0.9333333333333333,0.9545454545454546,0.9767441860465116,0.9655172413793104,claude-3-5-sonnet-20241022,LLM_PARSE1752195570
|
7 |
+
costco_bill,0.06652512384996462,0.2313271194718138,0.16463414634146342,0.3068181818181818,0.2621359223300971,0.28272251308900526,claude-3-7-sonnet-20250219,LLM_PARSE1752192159
|
8 |
+
costco_bill,0.9686520376175548,0.9604564350002722,0.9021739130434783,0.9431818181818182,0.9540229885057471,0.9485714285714286,claude-opus-4-20250514,LLM_PARSE1752191693
|
9 |
+
costco_bill,0.9704236610711431,0.9428666305684691,0.9,0.9204545454545454,0.9759036144578314,0.9473684210526315,claude-sonnet-4-20250514,LLM_PARSE1752191931
|
10 |
+
cvs_coupon,0.9435665914221218,0.9114379397804238,0.868421052631579,0.8918918918918919,0.9705882352941176,0.9295774647887325,claude-3-5-sonnet-20241022,LLM_PARSE1752195586
|
11 |
+
cvs_coupon,0.1813186813186813,0.263433026844427,0.1797752808988764,0.43243243243243246,0.23529411764705882,0.30476190476190473,claude-3-7-sonnet-20250219,LLM_PARSE1752192169
|
12 |
+
cvs_coupon,0.9435665914221218,0.9114379397804238,0.868421052631579,0.8918918918918919,0.9705882352941176,0.9295774647887325,claude-opus-4-20250514,LLM_PARSE1752191715
|
13 |
+
cvs_coupon,0.577639751552795,0.5994541118316568,0.42105263157894735,0.43243243243243246,0.9411764705882353,0.5925925925925927,claude-sonnet-4-20250514,LLM_PARSE1752191954
|
14 |
+
grocery_bill,0.8917819365337672,0.9539973123930129,0.824468085106383,0.9011627906976745,0.9064327485380117,0.9037900874635568,claude-3-5-sonnet-20241022,LLM_PARSE1752195595
|
15 |
+
grocery_bill,0.9136807817589576,0.9595981354624561,0.8695652173913043,0.9302325581395349,0.9302325581395349,0.9302325581395349,claude-3-7-sonnet-20250219,LLM_PARSE1752192176
|
16 |
+
grocery_bill,0.5111492281303602,0.613928759048487,0.3791208791208791,0.4011627906976744,0.8734177215189873,0.549800796812749,claude-opus-4-20250514,LLM_PARSE1752191725
|
17 |
+
grocery_bill,0.7186358099878197,0.9022129995709839,0.7424242424242424,0.8546511627906976,0.8497109826589595,0.8521739130434782,claude-sonnet-4-20250514,LLM_PARSE1752191970
|
18 |
+
medical_invoice_sample1,0.43752629364745477,0.8753582889384084,0.6850828729281768,0.6850828729281768,1.0,0.8131147540983606,claude-3-5-sonnet-20241022,LLM_PARSE1752195617
|
19 |
+
medical_invoice_sample1,0.544229149115417,0.8067712761863249,0.7142857142857143,0.7182320441988951,0.9923664122137404,0.8333333333333334,claude-3-7-sonnet-20250219,LLM_PARSE1752192196
|
20 |
+
medical_invoice_sample1,0.450374531835206,0.68955623868024,0.6042780748663101,0.6243093922651933,0.9495798319327731,0.7533333333333334,claude-opus-4-20250514,LLM_PARSE1752191751
|
21 |
+
medical_invoice_sample1,0.6186770428015564,0.9481130966260292,0.9230769230769231,0.9281767955801105,0.9940828402366864,0.9599999999999999,claude-sonnet-4-20250514,LLM_PARSE1752192003
|
22 |
+
medical_travel_request_OWCP_957,0.9809800850302081,0.9928651210459996,0.9514563106796117,0.9751243781094527,0.9751243781094527,0.9751243781094527,claude-3-5-sonnet-20241022,LLM_PARSE1752195633
|
23 |
+
medical_travel_request_OWCP_957,0.7990593153906455,0.9111930756739456,0.7922705314009661,0.8159203980099502,0.9647058823529412,0.8840970350404312,claude-3-7-sonnet-20250219,LLM_PARSE1752192213
|
24 |
+
medical_travel_request_OWCP_957,0.6848351648351648,0.9897516503852182,0.9468599033816425,0.9751243781094527,0.9702970297029703,0.9727047146401985,claude-opus-4-20250514,LLM_PARSE1752191774
|
25 |
+
medical_travel_request_OWCP_957,0.6881483116309866,0.9906247813089888,0.9514563106796117,0.9751243781094527,0.9751243781094527,0.9751243781094527,claude-sonnet-4-20250514,LLM_PARSE1752192028
|
26 |
+
test_1,1.0,1.0000000000000009,1.0,1.0,1.0,1.0,claude-3-5-sonnet-20241022,LLM_PARSE1752195653
|
27 |
+
test_1,1.0,1.0000000000000009,1.0,1.0,1.0,1.0,claude-3-7-sonnet-20250219,LLM_PARSE1752192831
|
28 |
+
test_1,1.0,1.0000000000000009,1.0,1.0,1.0,1.0,claude-opus-4-20250514,LLM_PARSE1752191803
|
29 |
+
test_1,1.0,1.0000000000000009,1.0,1.0,1.0,1.0,claude-sonnet-4-20250514,LLM_PARSE1752192055
|
30 |
+
test_2,0.8635294117647059,0.9344803961620254,0.9384615384615385,0.953125,0.9838709677419355,0.9682539682539683,claude-3-5-sonnet-20241022,LLM_PARSE1752195662
|
31 |
+
test_2,0.017057569296375266,0.5182028975919346,0.30327868852459017,0.578125,0.3894736842105263,0.46540880503144655,claude-3-7-sonnet-20250219,LLM_PARSE1752192840
|
32 |
+
test_2,0.7800729040097205,0.9076057641198195,0.875,0.875,1.0,0.9333333333333333,claude-opus-4-20250514,LLM_PARSE1752191815
|
33 |
+
test_2,0.9107344632768362,0.956789666727399,1.0,1.0,1.0,1.0,claude-sonnet-4-20250514,LLM_PARSE1752192065
|
34 |
+
test_3,0.54868041694389,0.6800251173856684,0.47194719471947194,0.4897260273972603,0.9285714285714286,0.6412556053811659,claude-3-5-sonnet-20241022,LLM_PARSE1752195674
|
35 |
+
test_3,0.47942238267148013,0.6104071875503646,0.40273037542662116,0.4041095890410959,0.9915966386554622,0.5742092457420924,claude-3-7-sonnet-20250219,LLM_PARSE1752192849
|
36 |
+
test_3,0.4668769716088328,0.6112992185446862,0.3835616438356164,0.3835616438356164,1.0,0.5544554455445544,claude-opus-4-20250514,LLM_PARSE1752191829
|
37 |
+
test_3,0.4896007650011953,0.6130325357013243,0.4246575342465753,0.4246575342465753,1.0,0.5961538461538461,claude-sonnet-4-20250514,LLM_PARSE1752192079
|
38 |
+
test_4,0.994413407821229,0.9792016292549367,0.9682539682539683,0.9838709677419355,0.9838709677419355,0.9838709677419355,claude-3-5-sonnet-20241022,LLM_PARSE1752195694
|
39 |
+
test_4,0.994413407821229,0.9792016292549367,0.9682539682539683,0.9838709677419355,0.9838709677419355,0.9838709677419355,claude-3-7-sonnet-20250219,LLM_PARSE1752192867
|
40 |
+
test_4,0.994413407821229,0.9792016292549367,0.9682539682539683,0.9838709677419355,0.9838709677419355,0.9838709677419355,claude-opus-4-20250514,LLM_PARSE1752191849
|
41 |
+
test_4,0.994413407821229,0.9792016292549367,0.9682539682539683,0.9838709677419355,0.9838709677419355,0.9838709677419355,claude-sonnet-4-20250514,LLM_PARSE1752192097
|
42 |
+
test_5,1.0,1.0,1.0,1.0,1.0,1.0,claude-3-5-sonnet-20241022,LLM_PARSE1752195703
|
43 |
+
test_5,1.0,1.0,1.0,1.0,1.0,1.0,claude-3-7-sonnet-20250219,LLM_PARSE1752192877
|
44 |
+
test_5,1.0,1.0,1.0,1.0,1.0,1.0,claude-opus-4-20250514,LLM_PARSE1752191860
|
45 |
+
test_5,1.0,1.0,1.0,1.0,1.0,1.0,claude-sonnet-4-20250514,LLM_PARSE1752192108
|
46 |
+
benchmark,0.7154370737755734,0.9749846454998445,0.9516908212560387,0.9609756097560975,0.9899497487437185,0.9752475247524752,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193580
|
47 |
+
benchmark,0.9695945945945946,0.987761239745018,0.9471153846153846,0.9609756097560975,0.985,0.9728395061728395,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193698
|
48 |
+
costco_bill,0.9304987735077678,0.9804331969030851,0.8666666666666667,0.8863636363636364,0.975,0.9285714285714285,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193607
|
49 |
+
costco_bill,0.9761146496815286,0.9763315683176068,0.9438202247191011,0.9545454545454546,0.9882352941176471,0.9710982658959537,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193719
|
50 |
+
cvs_coupon,0.8640776699029126,0.8657283989931209,0.8157894736842105,0.8378378378378378,0.96875,0.8985507246376812,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193617
|
51 |
+
cvs_coupon,0.9684684684684685,0.9640488658284413,0.918918918918919,0.918918918918919,1.0,0.9577464788732395,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193726
|
52 |
+
grocery_bill,0.6153023006955591,0.7030506693946105,0.4722222222222222,0.4941860465116279,0.9139784946236559,0.641509433962264,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193627
|
53 |
+
grocery_bill,0.6180528182893181,0.879040239872227,0.7628865979381443,0.8604651162790697,0.8705882352941177,0.8654970760233918,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193732
|
54 |
+
medical_invoice_sample1,0.5519765739385066,0.9838399456156867,0.8846153846153846,0.8895027624309392,0.9938271604938271,0.9387755102040816,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193639
|
55 |
+
medical_invoice_sample1,0.5095588235294117,0.9302010671353808,0.8415300546448088,0.850828729281768,0.9871794871794872,0.913946587537092,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193743
|
56 |
+
medical_travel_request_OWCP_957,0.9807520143240823,0.9918980164869274,0.9420289855072463,0.9701492537313433,0.9701492537313433,0.9701492537313433,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193648
|
57 |
+
medical_travel_request_OWCP_957,0.8171557562076749,0.9225747170771591,0.8823529411764706,0.8955223880597015,0.9836065573770492,0.9375,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193749
|
58 |
+
test_1,1.0,1.0000000000000009,1.0,1.0,1.0,1.0,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193655
|
59 |
+
test_1,1.0,1.0000000000000009,1.0,1.0,1.0,1.0,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193759
|
60 |
+
test_2,0.5625744934445769,0.9399869182588547,0.9375,0.9375,1.0,0.967741935483871,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193661
|
61 |
+
test_2,0.2836990595611285,0.8354157828288629,0.7261904761904762,0.953125,0.7530864197530864,0.8413793103448276,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193764
|
62 |
+
test_3,0.49320295730980207,0.6359668796370604,0.4349315068493151,0.4349315068493151,1.0,0.6062052505966588,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193673
|
63 |
+
test_3,0.7360435712896324,0.8013312224709405,0.7431506849315068,0.7431506849315068,1.0,0.8526522593320236,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193771
|
64 |
+
test_4,0.994413407821229,0.9792016292549367,0.9682539682539683,0.9838709677419355,0.9838709677419355,0.9838709677419355,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193684
|
65 |
+
test_4,0.9670014347202296,0.9484458356080679,0.9206349206349206,0.9354838709677419,0.9830508474576272,0.9586776859504132,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193795
|
66 |
+
test_5,1.0,1.0,1.0,1.0,1.0,1.0,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193692
|
67 |
+
test_5,1.0,1.0,1.0,1.0,1.0,1.0,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193800
|
68 |
+
benchmark,0.9845183486238532,0.9946795114428338,0.9565217391304348,0.9658536585365853,0.99,0.9777777777777777,gemini-1.5-flash,LLM_PARSE1752192567
|
69 |
+
benchmark,0.989401317674019,0.9970404887955766,0.9707317073170731,0.9707317073170731,1.0,0.9851485148514851,gemini-1.5-pro,LLM_PARSE1752192756
|
70 |
+
benchmark,0.9874500855675984,0.9940983091457294,0.9567307692307693,0.9707317073170731,0.9851485148514851,0.9778869778869779,gemini-2.0-flash,LLM_PARSE1752191518
|
71 |
+
benchmark,0.9937250427837993,0.9964515385167041,0.966183574879227,0.975609756097561,0.9900990099009901,0.9828009828009828,gemini-2.5-flash,LLM_PARSE1752189460
|
72 |
+
benchmark,0.9339783722253842,0.9952735461930056,0.9758454106280193,0.9853658536585366,0.9901960784313726,0.9877750611246944,gemini-2.5-pro,LLM_PARSE1752190362
|
73 |
+
costco_bill,0.9760383386581469,1.0000000000000002,0.9772727272727273,0.9772727272727273,1.0,0.9885057471264368,gemini-1.5-flash,LLM_PARSE1752192601
|
74 |
+
costco_bill,0.9760383386581469,1.0000000000000002,0.9772727272727273,0.9772727272727273,1.0,0.9885057471264368,gemini-1.5-pro,LLM_PARSE1752192794
|
75 |
+
costco_bill,0.9984375,1.0000000000000002,1.0,1.0,1.0,1.0,gemini-2.0-flash,LLM_PARSE1752191535
|
76 |
+
costco_bill,0.9984375,1.0000000000000002,1.0,1.0,1.0,1.0,gemini-2.5-flash,LLM_PARSE1752189576
|
77 |
+
costco_bill,0.9808135072908672,0.9883546339951822,0.967032967032967,1.0,0.967032967032967,0.9832402234636871,gemini-2.5-pro,LLM_PARSE1752190408
|
78 |
+
cvs_coupon,0.9217002237136466,0.8655333872506977,0.825,0.8918918918918919,0.9166666666666666,0.9041095890410958,gemini-1.5-flash,LLM_PARSE1752192618
|
79 |
+
cvs_coupon,0.9774774774774775,0.9640488658284413,0.8947368421052632,0.918918918918919,0.9714285714285714,0.9444444444444445,gemini-1.5-pro,LLM_PARSE1752192807
|
80 |
+
cvs_coupon,0.9819819819819819,0.9640488658284413,0.9459459459459459,0.9459459459459459,1.0,0.9722222222222222,gemini-2.0-flash,LLM_PARSE1752191547
|
81 |
+
cvs_coupon,0.9910714285714286,0.9818700292070942,0.972972972972973,0.972972972972973,1.0,0.9863013698630138,gemini-2.5-flash,LLM_PARSE1752189629
|
82 |
+
cvs_coupon,0.9535864978902954,0.9472018579507341,0.9230769230769231,0.972972972972973,0.9473684210526315,0.9599999999999999,gemini-2.5-pro,LLM_PARSE1752190427
|
83 |
+
grocery_bill,0.9405537459283387,0.9750275100306482,0.8736263736263736,0.9244186046511628,0.9408284023668639,0.9325513196480938,gemini-1.5-flash,LLM_PARSE1752192631
|
84 |
+
grocery_bill,0.9508599508599509,0.9793457694002203,0.8833333333333333,0.9244186046511628,0.9520958083832335,0.9380530973451326,gemini-1.5-pro,LLM_PARSE1752192816
|
85 |
+
grocery_bill,0.9414654113794515,0.9812402317724886,0.8888888888888888,0.9302325581395349,0.9523809523809523,0.9411764705882352,gemini-2.0-flash,LLM_PARSE1752191554
|
86 |
+
grocery_bill,0.9500818330605565,0.9849687838909444,0.9050279329608939,0.9418604651162791,0.9585798816568047,0.9501466275659824,gemini-2.5-flash,LLM_PARSE1752189662
|
87 |
+
grocery_bill,0.9258353708231459,0.9868702347954403,0.9106145251396648,0.9476744186046512,0.9588235294117647,0.9532163742690059,gemini-2.5-pro,LLM_PARSE1752190444
|
88 |
+
medical_invoice_sample1,0.42026117926394935,0.947978241231951,0.8121546961325967,0.8121546961325967,1.0,0.8963414634146342,gemini-1.5-flash,LLM_PARSE1752192667
|
89 |
+
medical_invoice_sample1,0.3044982698961938,0.45924767143506656,0.3021978021978022,0.30386740331491713,0.9821428571428571,0.46413502109704646,gemini-1.5-pro,LLM_PARSE1752192832
|
90 |
+
medical_invoice_sample1,0.6664298401420959,0.9969468443298555,0.9945054945054945,1.0,0.9945054945054945,0.9972451790633609,gemini-2.0-flash,LLM_PARSE1752191577
|
91 |
+
medical_invoice_sample1,0.681592039800995,1.0000000000000004,1.0,1.0,1.0,1.0,gemini-2.5-flash,LLM_PARSE1752189695
|
92 |
+
medical_invoice_sample1,0.584140653031934,0.9969468443298555,0.9285714285714286,0.9337016574585635,0.9941176470588236,0.962962962962963,gemini-2.5-pro,LLM_PARSE1752190462
|
93 |
+
medical_travel_request_OWCP_957,0.9393939393939394,0.9918327708656376,0.9514563106796117,0.9751243781094527,0.9751243781094527,0.9751243781094527,gemini-1.5-flash,LLM_PARSE1752192682
|
94 |
+
medical_travel_request_OWCP_957,0.9836212699124972,0.9928651210459996,0.9655172413793104,0.9751243781094527,0.98989898989899,0.9824561403508771,gemini-1.5-pro,LLM_PARSE1752192915
|
95 |
+
medical_travel_request_OWCP_957,0.6677108972924591,0.9928651210459996,0.9514563106796117,0.9751243781094527,0.9751243781094527,0.9751243781094527,gemini-2.0-flash,LLM_PARSE1752191590
|
96 |
+
medical_travel_request_OWCP_957,0.6695086380973749,0.9928651210459996,0.9655172413793104,0.9751243781094527,0.98989898989899,0.9824561403508771,gemini-2.5-flash,LLM_PARSE1752189778
|
97 |
+
medical_travel_request_OWCP_957,0.9809800850302081,0.9928651210459996,0.9514563106796117,0.9751243781094527,0.9751243781094527,0.9751243781094527,gemini-2.5-pro,LLM_PARSE1752190495
|
98 |
+
test_1,0.9879154078549849,0.9859861817544773,0.9772727272727273,0.9772727272727273,1.0,0.9885057471264368,gemini-1.5-flash,LLM_PARSE1752192697
|
99 |
+
test_1,1.0,1.0000000000000009,1.0,1.0,1.0,1.0,gemini-1.5-pro,LLM_PARSE1752192932
|
100 |
+
test_1,0.9701492537313433,1.0000000000000009,1.0,1.0,1.0,1.0,gemini-2.0-flash,LLM_PARSE1752191603
|
101 |
+
test_1,1.0,1.0000000000000009,1.0,1.0,1.0,1.0,gemini-2.5-flash,LLM_PARSE1752189806
|
102 |
+
test_1,1.0,1.0000000000000009,1.0,1.0,1.0,1.0,gemini-2.5-pro,LLM_PARSE1752190516
|
103 |
+
test_2,0.8776978417266187,0.9333128812745526,0.8955223880597015,0.9375,0.9523809523809523,0.9448818897637795,gemini-1.5-flash,LLM_PARSE1752192706
|
104 |
+
test_2,0.09340659340659341,0.36201693721224343,0.24770642201834864,0.421875,0.375,0.39705882352941174,gemini-1.5-pro,LLM_PARSE1752192940
|
105 |
+
test_2,0.8158192090395481,0.956789666727399,1.0,1.0,1.0,1.0,gemini-2.0-flash,LLM_PARSE1752191606
|
106 |
+
test_2,0.6518636003172086,0.8971698769455133,0.735632183908046,1.0,0.735632183908046,0.847682119205298,gemini-2.5-flash,LLM_PARSE1752189834
|
107 |
+
test_2,0.6294339622641509,0.8215120986648818,0.6808510638297872,1.0,0.6808510638297872,0.810126582278481,gemini-2.5-pro,LLM_PARSE1752190529
|
108 |
+
test_3,0.9952516619183286,0.9972565576469495,1.0,1.0,1.0,1.0,gemini-1.5-flash,LLM_PARSE1752192715
|
109 |
+
test_3,0.3889315910837817,0.523970690224494,0.3287671232876712,0.3287671232876712,1.0,0.49484536082474223,gemini-1.5-pro,LLM_PARSE1752193025
|
110 |
+
test_3,0.9976201808662541,0.9993101121396254,1.0,1.0,1.0,1.0,gemini-2.0-flash,LLM_PARSE1752191632
|
111 |
+
test_3,0.9976201808662541,0.9993101121396254,1.0,1.0,1.0,1.0,gemini-2.5-flash,LLM_PARSE1752189870
|
112 |
+
test_3,0.9976201808662541,0.9993101121396254,1.0,1.0,1.0,1.0,gemini-2.5-pro,LLM_PARSE1752190545
|
113 |
+
test_4,0.5350140056022409,0.9383854946113873,0.9523809523809523,0.967741935483871,0.9836065573770492,0.975609756097561,gemini-1.5-flash,LLM_PARSE1752192733
|
114 |
+
test_4,0.9415204678362573,0.8866095469254991,0.8888888888888888,0.9032258064516129,0.9824561403508771,0.9411764705882352,gemini-1.5-pro,LLM_PARSE1752193042
|
115 |
+
test_4,0.9789621318373072,0.882281367849817,0.8636363636363636,0.9193548387096774,0.9344262295081968,0.9268292682926829,gemini-2.0-flash,LLM_PARSE1752191644
|
116 |
+
test_4,0.9846582984658299,0.9736442407200391,0.9682539682539683,0.9838709677419355,0.9838709677419355,0.9838709677419355,gemini-2.5-flash,LLM_PARSE1752189940
|
117 |
+
test_4,0.9874125874125874,0.9736442407200391,0.9682539682539683,0.9838709677419355,0.9838709677419355,0.9838709677419355,gemini-2.5-pro,LLM_PARSE1752190585
|
118 |
+
test_5,0.9666666666666667,0.98240551387501,0.9523809523809523,1.0,0.9523809523809523,0.975609756097561,gemini-1.5-flash,LLM_PARSE1752192748
|
119 |
+
test_5,1.0,1.0,1.0,1.0,1.0,1.0,gemini-1.5-pro,LLM_PARSE1752193050
|
120 |
+
test_5,0.8923076923076924,0.9122764299654935,0.8,1.0,0.8,0.888888888888889,gemini-2.0-flash,LLM_PARSE1752191650
|
121 |
+
test_5,1.0,1.0,1.0,1.0,1.0,1.0,gemini-2.5-flash,LLM_PARSE1752189960
|
122 |
+
test_5,1.0,1.0,1.0,1.0,1.0,1.0,gemini-2.5-pro,LLM_PARSE1752190596
|
123 |
+
benchmark,0.9008844159804819,0.9369832924227369,0.7980769230769231,0.8097560975609757,0.9822485207100592,0.8877005347593584,gpt-4.1,LLM_PARSE1752192631
|
124 |
+
benchmark,0.9289386150589658,0.9471961007993572,0.8454106280193237,0.8536585365853658,0.9887005649717514,0.9162303664921466,gpt-4.1-mini,LLM_PARSE1752193012
|
125 |
+
benchmark,0.9965850882185544,0.9976483450234231,0.9757281553398058,0.9804878048780488,0.995049504950495,0.9877149877149877,gpt-4o,LLM_PARSE1752193261
|
126 |
+
benchmark,0.9931662870159453,0.9941282806901699,0.9565217391304348,0.9658536585365853,0.99,0.9777777777777777,gpt-4o-mini,LLM_PARSE1752193572
|
127 |
+
costco_bill,0.788550323176362,0.7932010681164532,0.6451612903225806,0.6818181818181818,0.9230769230769231,0.7843137254901961,gpt-4.1,LLM_PARSE1752192743
|
128 |
+
costco_bill,0.9126378286683631,0.9097971354270784,0.8426966292134831,0.8522727272727273,0.9868421052631579,0.9146341463414636,gpt-4.1-mini,LLM_PARSE1752193080
|
129 |
+
costco_bill,0.9147410358565737,0.9137892432215373,0.8297872340425532,0.8863636363636364,0.9285714285714286,0.9069767441860465,gpt-4o,LLM_PARSE1752193339
|
130 |
+
costco_bill,0.7649687220732797,0.843704268935445,0.673469387755102,0.75,0.868421052631579,0.8048780487804879,gpt-4o-mini,LLM_PARSE1752193613
|
131 |
+
cvs_coupon,0.9623059866962306,0.9118604542100428,0.8717948717948718,0.918918918918919,0.9444444444444444,0.9315068493150684,gpt-4.1,LLM_PARSE1752192766
|
132 |
+
cvs_coupon,0.9715536105032823,0.9293902157051029,0.8974358974358975,0.9459459459459459,0.9459459459459459,0.9459459459459459,gpt-4.1-mini,LLM_PARSE1752193103
|
133 |
+
cvs_coupon,0.9668874172185431,0.9462438241241992,0.9210526315789473,0.9459459459459459,0.9722222222222222,0.9589041095890412,gpt-4o,LLM_PARSE1752193375
|
134 |
+
cvs_coupon,0.9490022172949002,0.8561732638838288,0.7560975609756098,0.8378378378378378,0.8857142857142857,0.8611111111111112,gpt-4o-mini,LLM_PARSE1752193631
|
135 |
+
grocery_bill,0.364741641337386,0.5646872951556197,0.3333333333333333,0.3430232558139535,0.921875,0.5,gpt-4.1,LLM_PARSE1752192788
|
136 |
+
grocery_bill,0.9288835915772745,0.9584150871707698,0.8518518518518519,0.936046511627907,0.9044943820224719,0.9199999999999999,gpt-4.1-mini,LLM_PARSE1752193112
|
137 |
+
grocery_bill,0.9075144508670521,0.972007104683558,0.8681318681318682,0.9186046511627907,0.9404761904761905,0.9294117647058823,gpt-4o,LLM_PARSE1752193388
|
138 |
+
grocery_bill,0.31939605110336816,0.653408018573298,0.3743016759776536,0.38953488372093026,0.9054054054054054,0.5447154471544715,gpt-4o-mini,LLM_PARSE1752193642
|
139 |
+
medical_invoice_sample1,0.27628865979381445,0.6525948468218983,0.49171270718232046,0.49171270718232046,1.0,0.6592592592592593,gpt-4.1,LLM_PARSE1752192825
|
140 |
+
medical_invoice_sample1,0.39835306227483275,0.5365263411641703,0.4308510638297872,0.44751381215469616,0.9204545454545454,0.6022304832713755,gpt-4.1-mini,LLM_PARSE1752193133
|
141 |
+
medical_invoice_sample1,0.3889409559512652,0.7301804605852915,0.580110497237569,0.580110497237569,1.0,0.7342657342657343,gpt-4o,LLM_PARSE1752193418
|
142 |
+
medical_invoice_sample1,0.5210384959713519,0.7731409228849677,0.5837837837837838,0.5966850828729282,0.9642857142857143,0.7372013651877134,gpt-4o-mini,LLM_PARSE1752193669
|
143 |
+
medical_travel_request_OWCP_957,0.7609702916325974,0.887973703662565,0.7205882352941176,0.7313432835820896,0.98,0.8376068376068375,gpt-4.1,LLM_PARSE1752192856
|
144 |
+
medical_travel_request_OWCP_957,0.7388641425389755,0.8658437531963121,0.6650246305418719,0.6716417910447762,0.9854014598540146,0.7988165680473374,gpt-4.1-mini,LLM_PARSE1752193149
|
145 |
+
medical_travel_request_OWCP_957,0.7442988840368753,0.8485150292937816,0.803921568627451,0.8159203980099502,0.9820359281437125,0.8913043478260868,gpt-4o,LLM_PARSE1752193453
|
146 |
+
medical_travel_request_OWCP_957,0.7607785168760779,0.8874992931050839,0.8177339901477833,0.8258706467661692,0.9880952380952381,0.8997289972899729,gpt-4o-mini,LLM_PARSE1752193685
|
147 |
+
test_1,0.8538587848932676,0.8499829522942023,0.8085106382978723,0.8636363636363636,0.926829268292683,0.8941176470588236,gpt-4.1,LLM_PARSE1752192879
|
148 |
+
test_1,0.7073170731707317,0.5692872229094355,0.5576923076923077,0.6590909090909091,0.7837837837837838,0.7160493827160493,gpt-4.1-mini,LLM_PARSE1752193171
|
149 |
+
test_1,1.0,1.0000000000000009,1.0,1.0,1.0,1.0,gpt-4o,LLM_PARSE1752193475
|
150 |
+
test_1,0.9879154078549849,0.9859861817544773,0.9772727272727273,0.9772727272727273,1.0,0.9885057471264368,gpt-4o-mini,LLM_PARSE1752193702
|
151 |
+
test_2,0.10275229357798166,0.5885438108408162,0.4019607843137255,0.640625,0.5189873417721519,0.5734265734265733,gpt-4.1,LLM_PARSE1752192906
|
152 |
+
test_2,0.768361581920904,0.9016179598417589,0.7878787878787878,0.8125,0.9629629629629629,0.8813559322033898,gpt-4.1-mini,LLM_PARSE1752193187
|
153 |
+
test_2,0.3020408163265306,0.8289104158435472,0.61,0.953125,0.6288659793814433,0.7577639751552796,gpt-4o,LLM_PARSE1752193500
|
154 |
+
test_2,0.45464725643896975,0.8527165377904645,0.8507462686567164,0.890625,0.95,0.9193548387096774,gpt-4o-mini,LLM_PARSE1752193711
|
155 |
+
test_3,0.3261848027535081,0.4359934422925263,0.29692832764505117,0.2979452054794521,0.9886363636363636,0.45789473684210524,gpt-4.1,LLM_PARSE1752192933
|
156 |
+
test_3,0.24040066777963273,0.4232113544305079,0.24232081911262798,0.24315068493150685,0.9861111111111112,0.3901098901098901,gpt-4.1-mini,LLM_PARSE1752193207
|
157 |
+
test_3,0.5386939344643272,0.6623639230615441,0.47440273037542663,0.476027397260274,0.9928571428571429,0.6435185185185186,gpt-4o,LLM_PARSE1752193512
|
158 |
+
test_3,0.49667300380228135,0.6304139746314001,0.46598639455782315,0.4691780821917808,0.9856115107913669,0.6357308584686774,gpt-4o-mini,LLM_PARSE1752193726
|
159 |
+
test_4,0.994413407821229,0.9792016292549367,0.9682539682539683,0.9838709677419355,0.9838709677419355,0.9838709677419355,gpt-4.1,LLM_PARSE1752192971
|
160 |
+
test_4,0.994413407821229,0.9792016292549367,0.9682539682539683,0.9838709677419355,0.9838709677419355,0.9838709677419355,gpt-4.1-mini,LLM_PARSE1752193234
|
161 |
+
test_4,0.994413407821229,0.9792016292549367,0.9682539682539683,0.9838709677419355,0.9838709677419355,0.9838709677419355,gpt-4o,LLM_PARSE1752193544
|
162 |
+
test_4,0.7440559440559441,0.6695411051373749,0.8507462686567164,0.9193548387096774,0.9193548387096774,0.9193548387096774,gpt-4o-mini,LLM_PARSE1752193744
|
163 |
+
test_5,0.5103448275862069,1.0,0.5769230769230769,0.75,0.7142857142857143,0.7317073170731706,gpt-4.1,LLM_PARSE1752192990
|
164 |
+
test_5,0.847457627118644,0.8542113981255111,0.68,0.85,0.7727272727272727,0.8095238095238095,gpt-4.1-mini,LLM_PARSE1752193247
|
165 |
+
test_5,1.0,1.0,1.0,1.0,1.0,1.0,gpt-4o,LLM_PARSE1752193555
|
166 |
+
test_5,1.0,1.0,1.0,1.0,1.0,1.0,gpt-4o-mini,LLM_PARSE1752193752
|
167 |
+
benchmark,0.8646162690428284,0.9833706893475125,0.9182692307692307,0.9317073170731708,0.9845360824742269,0.9573934837092734,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193557
|
168 |
+
benchmark,0.8039322774440196,0.9381793603264821,0.8591549295774648,0.8926829268292683,0.9581151832460733,0.9242424242424242,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193636
|
169 |
+
benchmark,0.8646162690428284,0.9833706893475125,0.9182692307692307,0.9317073170731708,0.9845360824742269,0.9573934837092734,meta-llama/Llama-Vision-Free,LLM_PARSE1752193941
|
170 |
+
costco_bill,0.38760806916426516,0.6662878518816022,0.456,0.6477272727272727,0.6063829787234043,0.6263736263736264,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193573
|
171 |
+
costco_bill,0.3740685543964232,0.5988965870697539,0.4596774193548387,0.6477272727272727,0.6129032258064516,0.6298342541436464,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193716
|
172 |
+
costco_bill,0.38760806916426516,0.6662878518816022,0.456,0.6477272727272727,0.6063829787234043,0.6263736263736264,meta-llama/Llama-Vision-Free,LLM_PARSE1752193976
|
173 |
+
cvs_coupon,0.8583509513742071,0.8493550473732521,0.6888888888888889,0.8378378378378378,0.7948717948717948,0.8157894736842105,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193580
|
174 |
+
cvs_coupon,0.598079561042524,0.8734566254000021,0.75,0.8918918918918919,0.825,0.8571428571428571,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193733
|
175 |
+
cvs_coupon,0.9094736842105263,0.8759877585881911,0.7674418604651163,0.8918918918918919,0.8461538461538461,0.868421052631579,meta-llama/Llama-Vision-Free,LLM_PARSE1752193982
|
176 |
+
grocery_bill,0.5517522412387939,0.81470846566113,0.6097560975609756,0.7267441860465116,0.7911392405063291,0.7575757575757576,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193585
|
177 |
+
grocery_bill,0.4329576369996114,0.6752012250444275,0.5107296137339056,0.6918604651162791,0.6611111111111111,0.6761363636363636,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193747
|
178 |
+
grocery_bill,0.6,0.8182133341234487,0.6116504854368932,0.7325581395348837,0.7875,0.7590361445783131,meta-llama/Llama-Vision-Free,LLM_PARSE1752193987
|
179 |
+
medical_invoice_sample1,0.681783243658724,0.9472935340823311,0.7845303867403315,0.7845303867403315,1.0,0.8792569659442725,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193596
|
180 |
+
medical_invoice_sample1,0.3680649526387009,0.7954352821899798,0.5469613259668509,0.5469613259668509,1.0,0.7071428571428572,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193771
|
181 |
+
medical_invoice_sample1,0.6357254290171607,0.8977921331555682,0.7417582417582418,0.7458563535911602,0.9926470588235294,0.8517350157728707,meta-llama/Llama-Vision-Free,LLM_PARSE1752194006
|
182 |
+
medical_travel_request_OWCP_957,0.949358541525996,0.9914563670839204,0.9320388349514563,0.9552238805970149,0.9746192893401016,0.9648241206030151,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193601
|
183 |
+
medical_travel_request_OWCP_957,0.9331532748143146,0.9914563670839204,0.9320388349514563,0.9552238805970149,0.9746192893401016,0.9648241206030151,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193799
|
184 |
+
medical_travel_request_OWCP_957,0.949358541525996,0.9914563670839204,0.9320388349514563,0.9552238805970149,0.9746192893401016,0.9648241206030151,meta-llama/Llama-Vision-Free,LLM_PARSE1752194013
|
185 |
+
test_1,0.8388214904679376,0.5907352142521325,0.7727272727272727,0.7727272727272727,1.0,0.8717948717948718,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193607
|
186 |
+
test_1,0.9196141479099679,0.9134550238575389,0.8409090909090909,0.8409090909090909,1.0,0.9135802469135803,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193827
|
187 |
+
test_1,0.8099467140319716,0.5752160041929604,0.75,0.75,1.0,0.8571428571428571,meta-llama/Llama-Vision-Free,LLM_PARSE1752194021
|
188 |
+
test_2,0.4387755102040816,0.7944661768602863,0.8461538461538461,0.859375,0.9821428571428571,0.9166666666666665,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193610
|
189 |
+
test_2,0.33691164327002476,0.7878664683463091,0.5125,0.640625,0.7192982456140351,0.6776859504132231,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193844
|
190 |
+
test_2,0.4387755102040816,0.7944661768602863,0.8461538461538461,0.859375,0.9821428571428571,0.9166666666666665,meta-llama/Llama-Vision-Free,LLM_PARSE1752194027
|
191 |
+
test_3,0.3956175298804781,0.7941857105413662,0.6462585034013606,0.6506849315068494,0.9895833333333334,0.7851239669421488,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193616
|
192 |
+
test_3,0.6038095238095238,0.8853783887562813,0.7781569965870307,0.7808219178082192,0.9956331877729258,0.8752399232245681,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193858
|
193 |
+
test_3,0.42868525896414345,0.7954742905549371,0.6518771331058021,0.6541095890410958,0.9947916666666666,0.7892561983471074,meta-llama/Llama-Vision-Free,LLM_PARSE1752194033
|
194 |
+
test_4,0.9597701149425287,0.9428738047322169,0.9206349206349206,0.9354838709677419,0.9830508474576272,0.9586776859504132,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193627
|
195 |
+
test_4,0.4896907216494845,0.8594297517138777,0.9104477611940298,0.9838709677419355,0.9242424242424242,0.9531249999999999,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193905
|
196 |
+
test_4,0.9597701149425287,0.9428738047322169,0.9206349206349206,0.9354838709677419,0.9830508474576272,0.9586776859504132,meta-llama/Llama-Vision-Free,LLM_PARSE1752194065
|
197 |
+
test_5,0.5168067226890757,0.9785654006271267,1.0,1.0,1.0,1.0,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193630
|
198 |
+
test_5,0.29411764705882354,0.7184721906586644,0.43478260869565216,1.0,0.43478260869565216,0.6060606060606061,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193917
|
199 |
+
test_5,0.5168067226890757,0.9785654006271267,1.0,1.0,1.0,1.0,meta-llama/Llama-Vision-Free,LLM_PARSE1752194071
|
200 |
+
benchmark,0.9696622781911849,0.9847441394559939,0.9375,0.9512195121951219,0.9848484848484849,0.967741935483871,mistral-ocr-latest,LLM_PARSE1752338656
|
201 |
+
costco_bill,0.9398907103825137,0.9688803177143563,0.9130434782608695,0.9545454545454546,0.9545454545454546,0.9545454545454546,mistral-ocr-latest,LLM_PARSE1752338675
|
202 |
+
cvs_coupon,0.8979591836734694,0.8140511424435039,0.7560975609756098,0.8378378378378378,0.8857142857142857,0.8611111111111112,mistral-ocr-latest,LLM_PARSE1752338681
|
203 |
+
grocery_bill,0.7866388308977036,0.9479507228644266,0.8260869565217391,0.8837209302325582,0.926829268292683,0.9047619047619047,mistral-ocr-latest,LLM_PARSE1752338687
|
204 |
+
medical_invoice_sample1,0.6483983018139714,0.93463637677369,0.7692307692307693,0.7734806629834254,0.9929078014184397,0.8695652173913044,mistral-ocr-latest,LLM_PARSE1752338694
|
205 |
+
medical_travel_request_OWCP_957,0.9250785105428443,0.9916745936489192,0.9420289855072463,0.9701492537313433,0.9701492537313433,0.9701492537313433,mistral-ocr-latest,LLM_PARSE1752338697
|
206 |
+
test_1,0.89937106918239,0.6904920269308475,0.9772727272727273,0.9772727272727273,1.0,0.9885057471264368,mistral-ocr-latest,LLM_PARSE1752338700
|
207 |
+
test_2,0.8858757062146893,0.956789666727399,1.0,1.0,1.0,1.0,mistral-ocr-latest,LLM_PARSE1752338703
|
208 |
+
test_3,0.9852185089974294,0.9961134622761473,0.9863013698630136,0.9863013698630136,1.0,0.993103448275862,mistral-ocr-latest,LLM_PARSE1752338708
|
209 |
+
test_4,0.9670014347202296,0.9484458356080679,0.9206349206349206,0.9354838709677419,0.9830508474576272,0.9586776859504132,mistral-ocr-latest,LLM_PARSE1752338715
|
210 |
+
test_5,0.8896551724137931,1.0,1.0,1.0,1.0,1.0,mistral-ocr-latest,LLM_PARSE1752338717
|
211 |
+
benchmark,0.937950937950938,0.9931595734254032,0.975609756097561,0.975609756097561,1.0,0.9876543209876543,google/gemma-3-27b-it,LLM_PARSE1752193189
|
212 |
+
benchmark,0.8642487046632125,0.89213958158887,0.9245283018867925,0.9560975609756097,0.9655172413793104,0.9607843137254901,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193459
|
213 |
+
benchmark,0.21546134663341646,0.29284676308689694,0.15458937198067632,0.15609756097560976,0.9411764705882353,0.2677824267782427,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193044
|
214 |
+
costco_bill,0.852589641434263,0.9561488518737534,0.9333333333333333,0.9545454545454546,0.9767441860465116,0.9655172413793104,google/gemma-3-27b-it,LLM_PARSE1752193225
|
215 |
+
costco_bill,0.7669753086419753,0.9422227301386843,0.8817204301075269,0.9318181818181818,0.9425287356321839,0.9371428571428572,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193495
|
216 |
+
costco_bill,0.0,0.0,0.0,0.0,0.0,0.0,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193072
|
217 |
+
cvs_coupon,0.12922173274596183,0.4246115008733564,0.3333333333333333,0.40540540540540543,0.6521739130434783,0.5,google/gemma-3-27b-it,LLM_PARSE1752193239
|
218 |
+
cvs_coupon,0.676595744680851,0.825943861885887,0.7857142857142857,0.8918918918918919,0.868421052631579,0.88,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193505
|
219 |
+
cvs_coupon,0.5125,0.915390834704615,0.8,0.8648648648648649,0.9142857142857143,0.888888888888889,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193081
|
220 |
+
grocery_bill,0.8597014925373134,0.968767180260828,0.88268156424581,0.9186046511627907,0.9575757575757575,0.9376854599406528,google/gemma-3-27b-it,LLM_PARSE1752193279
|
221 |
+
grocery_bill,0.5961538461538461,0.895542566523866,0.7055837563451777,0.8081395348837209,0.8475609756097561,0.8273809523809523,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193510
|
222 |
+
grocery_bill,0.0,0.0,0.0,0.0,0.0,0.0,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193095
|
223 |
+
medical_invoice_sample1,0.5514157973174366,0.9438375682019277,0.8369565217391305,0.850828729281768,0.9808917197452229,0.9112426035502958,google/gemma-3-27b-it,LLM_PARSE1752193296
|
224 |
+
medical_invoice_sample1,0.5238095238095238,0.8654821601615998,0.6885245901639344,0.6961325966850829,0.984375,0.8155339805825242,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193526
|
225 |
+
medical_invoice_sample1,0.5923984272608126,0.9454713915672728,0.8115183246073299,0.856353591160221,0.9393939393939394,0.8959537572254335,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193104
|
226 |
+
medical_travel_request_OWCP_957,0.8744313011828936,0.9823788973625543,0.893719806763285,0.9203980099502488,0.9685863874345549,0.9438775510204082,google/gemma-3-27b-it,LLM_PARSE1752193319
|
227 |
+
medical_travel_request_OWCP_957,0.9333633498424133,0.9904869530574749,0.9227053140096618,0.9502487562189055,0.9695431472081218,0.9597989949748743,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193533
|
228 |
+
medical_travel_request_OWCP_957,0.921727395411606,0.9650691379973317,0.8701923076923077,0.900497512437811,0.9627659574468085,0.9305912596401028,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193125
|
229 |
+
test_1,0.9577039274924471,0.9859861817544773,0.9772727272727273,0.9772727272727273,1.0,0.9885057471264368,google/gemma-3-27b-it,LLM_PARSE1752193363
|
230 |
+
test_1,0.8682432432432432,0.8513135168957464,0.7272727272727273,0.7272727272727273,1.0,0.8421052631578948,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193544
|
231 |
+
test_1,0.7553551296505073,0.9299846825779932,0.9565217391304348,1.0,0.9565217391304348,0.9777777777777777,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193132
|
232 |
+
test_2,0.09429280397022333,0.2303955584794412,0.1527777777777778,0.171875,0.5789473684210527,0.2650602409638554,google/gemma-3-27b-it,LLM_PARSE1752193372
|
233 |
+
test_2,0.09375,0.2358455734776353,0.2153846153846154,0.21875,0.9333333333333333,0.35443037974683544,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193551
|
234 |
+
test_2,0.0,0.0,0.0,0.0,0.0,0.0,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193148
|
235 |
+
test_3,0.5540571428571428,0.6965109155704817,0.5136986301369864,0.5136986301369864,1.0,0.6787330316742082,google/gemma-3-27b-it,LLM_PARSE1752193402
|
236 |
+
test_3,0.5178280121183874,0.6601984018137722,0.4828767123287671,0.4828767123287671,1.0,0.6512702078521939,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193561
|
237 |
+
test_3,0.6811800610376398,0.8244587258416712,0.7064846416382252,0.708904109589041,0.9951923076923077,0.8279999999999998,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193154
|
238 |
+
test_4,0.9404934687953556,0.9209651369162134,0.9047619047619048,0.9193548387096774,0.9827586206896551,0.95,google/gemma-3-27b-it,LLM_PARSE1752193426
|
239 |
+
test_4,0.49577464788732395,0.6577918775316766,0.835820895522388,0.9032258064516129,0.9180327868852459,0.9105691056910569,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193570
|
240 |
+
test_4,0.509915014164306,0.9384222336078193,0.90625,0.9354838709677419,0.9666666666666667,0.9508196721311476,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193179
|
241 |
+
test_5,0.11688311688311688,0.145217121515546,0.1,0.1,1.0,0.18181818181818182,google/gemma-3-27b-it,LLM_PARSE1752193431
|
242 |
+
test_5,0.9830508474576272,0.9804331969030868,0.9523809523809523,1.0,0.9523809523809523,0.975609756097561,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193575
|
243 |
+
test_5,0.9698996655518395,0.9804331969030868,0.9523809523809523,1.0,0.9523809523809523,0.975609756097561,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193187
|
244 |
+
benchmark,0.3131591013296653,0.6959250370277419,0.3510204081632653,0.8390243902439024,0.37636761487964987,0.5196374622356495,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752327183
|
245 |
+
costco_bill,0.20055710306406685,0.20747572194890912,0.1590909090909091,0.1590909090909091,1.0,0.2745098039215686,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752327423
|
246 |
+
cvs_coupon,0.9384965831435079,0.9380089897463492,0.8048780487804879,0.8918918918918919,0.8918918918918919,0.8918918918918919,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752327533
|
247 |
+
grocery_bill,0.05804416403785489,0.068531396277478,0.06857142857142857,0.06976744186046512,0.8,0.1283422459893048,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752327568
|
248 |
+
medical_invoice_sample1,0.4715447154471545,0.5420295004635525,0.3879781420765027,0.39226519337016574,0.9726027397260274,0.5590551181102362,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752327679
|
249 |
+
medical_travel_request_OWCP_957,0.9116704805491991,0.9846151720521983,0.9,0.9402985074626866,0.9545454545454546,0.9473684210526316,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752327801
|
250 |
+
test_1,0.9142857142857143,0.9045941094942251,0.8409090909090909,0.8409090909090909,1.0,0.9135802469135803,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752327887
|
251 |
+
test_2,0.07761828814460393,0.07491099737364178,0.07368421052631578,0.21875,0.1,0.13725490196078433,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752327917
|
252 |
+
test_3,0.9631433674297776,0.9852545986269438,0.9692832764505119,0.9726027397260274,0.9964912280701754,0.9844020797227038,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752328039
|
253 |
+
test_4,0.42105263157894735,0.41443971976512034,0.30158730158730157,0.3064516129032258,0.95,0.4634146341463415,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752328144
|
254 |
+
test_5,0.08142116950407106,0.595844223293273,0.75,0.75,1.0,0.8571428571428571,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752328266
|
leaderboard.csv
CHANGED
@@ -1,17 +1,25 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model,sequence_matcher,cosine,jaccard,precision,recall,f1_score,Time (s),Cost ($)
|
2 |
+
AUTO,0.905 (±0.111),0.967 (±0.051),0.944 (±0.069),0.976 (±0.031),0.966 (±0.061),0.970 (±0.038),10.312432592565363,0.0006797545454545454
|
3 |
+
gemini-1.5-flash,0.868 (±0.198),0.965 (±0.041),0.925 (±0.064),0.948 (±0.056),0.974 (±0.029),0.960 (±0.035),17.19163768941706,0.0004351772727272727
|
4 |
+
gemini-1.5-pro,0.782 (±0.341),0.833 (±0.252),0.769 (±0.309),0.793 (±0.287),0.932 (±0.185),0.831 (±0.245),27.126133116808806,0.01274568181818182
|
5 |
+
gemini-2.0-flash,0.900 (±0.127),0.971 (±0.040),0.946 (±0.067),0.976 (±0.031),0.967 (±0.060),0.971 (±0.037),12.433907595547764,0.0008061636363636364
|
6 |
+
gemini-2.5-flash,0.902 (±0.151),0.984 (±0.030),0.956 (±0.078),0.986 (±0.019),0.969 (±0.078),0.976 (±0.045),48.67426510290666,0.010509618181818182
|
7 |
+
gemini-2.5-pro,0.907 (±0.151),0.973 (±0.053),0.937 (±0.091),0.982 (±0.023),0.954 (±0.092),0.965 (±0.054),22.231554529883645,0.023052272727272727
|
8 |
+
claude-opus-4-20250514,0.798 (±0.230),0.878 (±0.159),0.809 (±0.238),0.823 (±0.238),0.972 (±0.038),0.873 (±0.173),21.011434186588634,0.09233181818181818
|
9 |
+
claude-sonnet-4-20250514,0.814 (±0.197),0.903 (±0.150),0.843 (±0.220),0.862 (±0.219),0.973 (±0.045),0.898 (±0.156),21.98797264966098,0.020450454545454546
|
10 |
+
claude-3-7-sonnet-20250219,0.634 (±0.395),0.752 (±0.298),0.667 (±0.338),0.739 (±0.266),0.795 (±0.323),0.748 (±0.286),70.10332116213712,0.017747727272727273
|
11 |
+
claude-3-5-sonnet-20241022,0.873 (±0.195),0.937 (±0.095),0.872 (±0.161),0.891 (±0.160),0.974 (±0.030),0.923 (±0.108),16.859260472384367,0.017785909090909092
|
12 |
+
qwen/qwen-2.5-vl-7b-instruct,0.469 (±0.364),0.617 (±0.441),0.560 (±0.421),0.584 (±0.441),0.693 (±0.446),0.610 (±0.439),13.234280304475265,0.0005954181818181818
|
13 |
+
google/gemma-3-27b-it,0.624 (±0.357),0.750 (±0.327),0.682 (±0.341),0.701 (±0.338),0.918 (±0.151),0.755 (±0.304),24.50513126633384,0.00019525454545454547
|
14 |
+
microsoft/phi-4-multimodal-instruct,0.665 (±0.258),0.800 (±0.217),0.738 (±0.221),0.779 (±0.239),0.944 (±0.050),0.829 (±0.183),10.961827516555786,0.0004915272727272727
|
15 |
+
accounts/fireworks/models/llama4-maverick-instruct-basic,0.792 (±0.206),0.914 (±0.128),0.843 (±0.201),0.854 (±0.200),0.981 (±0.025),0.901 (±0.141),10.71379793773998,0.0014935999999999999
|
16 |
+
accounts/fireworks/models/llama4-scout-instruct-basic,0.804 (±0.242),0.931 (±0.067),0.881 (±0.099),0.916 (±0.076),0.959 (±0.078),0.934 (±0.058),9.759119467301803,0.0008719636363636363
|
17 |
+
gpt-4.1,0.622 (±0.314),0.782 (±0.191),0.628 (±0.227),0.683 (±0.224),0.899 (±0.149),0.749 (±0.180),34.65604066848755,0.014605454545454545
|
18 |
+
gpt-4.1-mini,0.767 (±0.243),0.807 (±0.197),0.706 (±0.221),0.751 (±0.229),0.929 (±0.080),0.807 (±0.178),22.64150684530085,0.003515527272727273
|
19 |
+
gpt-4o,0.796 (±0.264),0.898 (±0.117),0.821 (±0.186),0.867 (±0.178),0.948 (±0.108),0.890 (±0.123),28.233586398037996,0.014729545454545455
|
20 |
+
gpt-4o-mini,0.727 (±0.245),0.832 (±0.136),0.755 (±0.209),0.784 (±0.210),0.951 (±0.048),0.844 (±0.150),17.197155345569957,0.006503372727272727
|
21 |
+
meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,0.677 (±0.226),0.850 (±0.134),0.780 (±0.166),0.827 (±0.122),0.919 (±0.130),0.867 (±0.112),7.225299813530662,0.0001541290909090909
|
22 |
+
meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,0.559 (±0.233),0.822 (±0.119),0.685 (±0.193),0.807 (±0.155),0.828 (±0.194),0.799 (±0.139),27.7422512661327,0.011019381818181817
|
23 |
+
meta-llama/Llama-Vision-Free,0.682 (±0.223),0.847 (±0.135),0.781 (±0.163),0.828 (±0.126),0.923 (±0.126),0.868 (±0.111),12.31139588356018,0.0
|
24 |
+
ds4sd/SmolDocling-256M-preview,0.486 (±0.378),0.583 (±0.355),0.510 (±0.348),0.580 (±0.350),0.822 (±0.301),0.607 (±0.330),108.91359732367776,0.0
|
25 |
+
mistral-ocr-latest,0.890 (±0.097),0.930 (±0.095),0.912 (±0.089),0.934 (±0.073),0.973 (±0.037),0.952 (±0.051),5.688163432207975,0.0012727272727272728
|
requirements.txt
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
lexoid==0.1.13
|
2 |
-
matplotlib==3.10.1
|
|
|
|
1 |
lexoid==0.1.13
|
2 |
+
matplotlib==3.10.1
|
3 |
+
plotly==6.2.0
|