Update index.html
Browse files- index.html +12 -5
index.html
CHANGED
@@ -15,6 +15,7 @@
|
|
15 |
.hero.is-primary { background-color: #f9d5e5; }
|
16 |
.subtitle img { max-width: 100%; height: auto; }
|
17 |
.section-title { margin-top: 2em; }
|
|
|
18 |
</style>
|
19 |
</head>
|
20 |
<body>
|
@@ -62,7 +63,12 @@
|
|
62 |
<span class="author-block"><sup>5</sup>Computer Science Department, TU Darmstadt,</span>
|
63 |
<span class="author-block"><sup>6</sup>AI Sweden</span>
|
64 |
</div>
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
66 |
<div class="column has-text-centered">
|
67 |
<span class="link-block">
|
68 |
<a href="https://arxiv.org/abs/2505.22232" target="_blank"
|
@@ -154,6 +160,7 @@
|
|
154 |
<li>Benchmark performance improvement over FineWeb2</li>
|
155 |
<li>Higher document retention vs. FineWeb2 heuristic filter</li>
|
156 |
<li>Effective dynamic threshold strategies: Trade-off document quality for quantity</li>
|
|
|
157 |
</ul>
|
158 |
</li>
|
159 |
<li><strong>⚡ Annotation Speed:</strong> ~11,000 docs/min (A100 GPU, avg. 690 tokens)</li>
|
@@ -169,14 +176,14 @@
|
|
169 |
<li><a href="https://huggingface.co/datasets/Jackal-AI/JQL-LLM-Edu-Annotations" target="_blank">🧠 Synthetic LLM-annotated dataset (14M+ documents)</a></li>
|
170 |
<li><a href="https://huggingface.co/Jackal-AI/JQL-Edu-Heads" target="_blank">🪶 Lightweight annotation models</a>:
|
171 |
<ul>
|
172 |
-
<li>JQL-Gemma</li>
|
173 |
-
<li>JQL-Mistral</li>
|
174 |
-
<li>JQL-Llama</li>
|
175 |
</ul>
|
176 |
</li>
|
177 |
<li>🛠️ Training & inference scripts</li>
|
178 |
<ul>
|
179 |
-
<li><a href="https://
|
180 |
<li>More coming soon</li>
|
181 |
</ul>
|
182 |
<li>🗄️ Large-scale dataset coming soon</li>
|
|
|
15 |
.hero.is-primary { background-color: #f9d5e5; }
|
16 |
.subtitle img { max-width: 100%; height: auto; }
|
17 |
.section-title { margin-top: 2em; }
|
18 |
+
.contact-info { margin-top: 1em; } /* Added style for contact info */
|
19 |
</style>
|
20 |
</head>
|
21 |
<body>
|
|
|
63 |
<span class="author-block"><sup>5</sup>Computer Science Department, TU Darmstadt,</span>
|
64 |
<span class="author-block"><sup>6</sup>AI Sweden</span>
|
65 |
</div>
|
66 |
+
<div class="is-size-5 contact-info has-text-centered">
|
67 |
+
<span class="icon">
|
68 |
+
<i class="fas fa-envelope"></i>
|
69 |
+
</span>
|
70 |
+
<span>Contact: mehdi.ali@iais.fraunhofer.de, brack@cs.tu-darmstadt.de</span>
|
71 |
+
</div>
|
72 |
<div class="column has-text-centered">
|
73 |
<span class="link-block">
|
74 |
<a href="https://arxiv.org/abs/2505.22232" target="_blank"
|
|
|
160 |
<li>Benchmark performance improvement over FineWeb2</li>
|
161 |
<li>Higher document retention vs. FineWeb2 heuristic filter</li>
|
162 |
<li>Effective dynamic threshold strategies: Trade-off document quality for quantity</li>
|
163 |
+
<li>Generalizes to unseen languages</li>
|
164 |
</ul>
|
165 |
</li>
|
166 |
<li><strong>⚡ Annotation Speed:</strong> ~11,000 docs/min (A100 GPU, avg. 690 tokens)</li>
|
|
|
176 |
<li><a href="https://huggingface.co/datasets/Jackal-AI/JQL-LLM-Edu-Annotations" target="_blank">🧠 Synthetic LLM-annotated dataset (14M+ documents)</a></li>
|
177 |
<li><a href="https://huggingface.co/Jackal-AI/JQL-Edu-Heads" target="_blank">🪶 Lightweight annotation models</a>:
|
178 |
<ul>
|
179 |
+
<li>JQL-Edu-Gemma</li>
|
180 |
+
<li>JQL-Edu-Mistral</li>
|
181 |
+
<li>JQL-Edu-Llama</li>
|
182 |
</ul>
|
183 |
</li>
|
184 |
<li>🛠️ Training & inference scripts</li>
|
185 |
<ul>
|
186 |
+
<li><a href="https://github.com/JQL-AI/JQL-Annotation-Pipeline" target="_blank">Web Corpus Annotation</a></li>
|
187 |
<li>More coming soon</li>
|
188 |
</ul>
|
189 |
<li>🗄️ Large-scale dataset coming soon</li>
|