mbrack commited on
Commit
8af2b1b
·
verified ·
1 Parent(s): 26dcb36

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +12 -5
index.html CHANGED
@@ -15,6 +15,7 @@
15
  .hero.is-primary { background-color: #f9d5e5; }
16
  .subtitle img { max-width: 100%; height: auto; }
17
  .section-title { margin-top: 2em; }
 
18
  </style>
19
  </head>
20
  <body>
@@ -62,7 +63,12 @@
62
  <span class="author-block"><sup>5</sup>Computer Science Department, TU Darmstadt,</span>
63
  <span class="author-block"><sup>6</sup>AI Sweden</span>
64
  </div>
65
-
 
 
 
 
 
66
  <div class="column has-text-centered">
67
  <span class="link-block">
68
  <a href="https://arxiv.org/abs/2505.22232" target="_blank"
@@ -154,6 +160,7 @@
154
  <li>Benchmark performance improvement over FineWeb2</li>
155
  <li>Higher document retention vs. FineWeb2 heuristic filter</li>
156
  <li>Effective dynamic threshold strategies: Trade-off document quality for quantity</li>
 
157
  </ul>
158
  </li>
159
  <li><strong>⚡ Annotation Speed:</strong> ~11,000 docs/min (A100 GPU, avg. 690 tokens)</li>
@@ -169,14 +176,14 @@
169
  <li><a href="https://huggingface.co/datasets/Jackal-AI/JQL-LLM-Edu-Annotations" target="_blank">🧠 Synthetic LLM-annotated dataset (14M+ documents)</a></li>
170
  <li><a href="https://huggingface.co/Jackal-AI/JQL-Edu-Heads" target="_blank">🪶 Lightweight annotation models</a>:
171
  <ul>
172
- <li>JQL-Gemma</li>
173
- <li>JQL-Mistral</li>
174
- <li>JQL-Llama</li>
175
  </ul>
176
  </li>
177
  <li>🛠️ Training & inference scripts</li>
178
  <ul>
179
- <li><a href="https://huggingface.co/Jackal-AI/JQL-Edu-Heads" target="_blank">Web Corpus Annotation</a></li>
180
  <li>More coming soon</li>
181
  </ul>
182
  <li>🗄️ Large-scale dataset coming soon</li>
 
15
  .hero.is-primary { background-color: #f9d5e5; }
16
  .subtitle img { max-width: 100%; height: auto; }
17
  .section-title { margin-top: 2em; }
18
+ .contact-info { margin-top: 1em; } /* Added style for contact info */
19
  </style>
20
  </head>
21
  <body>
 
63
  <span class="author-block"><sup>5</sup>Computer Science Department, TU Darmstadt,</span>
64
  <span class="author-block"><sup>6</sup>AI Sweden</span>
65
  </div>
66
+ <div class="is-size-5 contact-info has-text-centered">
67
+ <span class="icon">
68
+ <i class="fas fa-envelope"></i>
69
+ </span>
70
+ <span>Contact: mehdi.ali@iais.fraunhofer.de, brack@cs.tu-darmstadt.de</span>
71
+ </div>
72
  <div class="column has-text-centered">
73
  <span class="link-block">
74
  <a href="https://arxiv.org/abs/2505.22232" target="_blank"
 
160
  <li>Benchmark performance improvement over FineWeb2</li>
161
  <li>Higher document retention vs. FineWeb2 heuristic filter</li>
162
  <li>Effective dynamic threshold strategies: Trade-off document quality for quantity</li>
163
+ <li>Generalizes to unseen languages</li>
164
  </ul>
165
  </li>
166
  <li><strong>⚡ Annotation Speed:</strong> ~11,000 docs/min (A100 GPU, avg. 690 tokens)</li>
 
176
  <li><a href="https://huggingface.co/datasets/Jackal-AI/JQL-LLM-Edu-Annotations" target="_blank">🧠 Synthetic LLM-annotated dataset (14M+ documents)</a></li>
177
  <li><a href="https://huggingface.co/Jackal-AI/JQL-Edu-Heads" target="_blank">🪶 Lightweight annotation models</a>:
178
  <ul>
179
+ <li>JQL-Edu-Gemma</li>
180
+ <li>JQL-Edu-Mistral</li>
181
+ <li>JQL-Edu-Llama</li>
182
  </ul>
183
  </li>
184
  <li>🛠️ Training & inference scripts</li>
185
  <ul>
186
+ <li><a href="https://github.com/JQL-AI/JQL-Annotation-Pipeline" target="_blank">Web Corpus Annotation</a></li>
187
  <li>More coming soon</li>
188
  </ul>
189
  <li>🗄️ Large-scale dataset coming soon</li>