add-hub-search
#3
by
burtenshaw
HF Staff
- opened
- app.py +12 -3
- pyproject.toml +1 -0
- requirements.txt +87 -8
- uv.lock +14 -0
app.py
CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
|
|
2 |
from datasets import load_dataset, Dataset
|
3 |
from difflib import ndiff
|
4 |
import pandas as pd
|
|
|
5 |
|
6 |
from semhash import SemHash
|
7 |
from semhash.datamodels import DeduplicationResult
|
@@ -323,7 +324,12 @@ with gr.Blocks(
|
|
323 |
)
|
324 |
|
325 |
with gr.Row():
|
326 |
-
dataset1_name =
|
|
|
|
|
|
|
|
|
|
|
327 |
dataset1_split = gr.Textbox(
|
328 |
value=default_dataset1_split, label="Dataset 1 Split"
|
329 |
)
|
@@ -334,8 +340,11 @@ with gr.Blocks(
|
|
334 |
dataset2_inputs = gr.Column(visible=True)
|
335 |
with dataset2_inputs:
|
336 |
with gr.Row():
|
337 |
-
dataset2_name =
|
338 |
-
|
|
|
|
|
|
|
339 |
)
|
340 |
dataset2_split = gr.Textbox(
|
341 |
value=default_dataset2_split, label="Dataset 2 Split"
|
|
|
2 |
from datasets import load_dataset, Dataset
|
3 |
from difflib import ndiff
|
4 |
import pandas as pd
|
5 |
+
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
6 |
|
7 |
from semhash import SemHash
|
8 |
from semhash.datamodels import DeduplicationResult
|
|
|
324 |
)
|
325 |
|
326 |
with gr.Row():
|
327 |
+
dataset1_name = HuggingfaceHubSearch(
|
328 |
+
label="Dataset 1 Name",
|
329 |
+
placeholder="Search for datasets on HuggingFace Hub",
|
330 |
+
search_type="dataset",
|
331 |
+
value=default_dataset_name,
|
332 |
+
)
|
333 |
dataset1_split = gr.Textbox(
|
334 |
value=default_dataset1_split, label="Dataset 1 Split"
|
335 |
)
|
|
|
340 |
dataset2_inputs = gr.Column(visible=True)
|
341 |
with dataset2_inputs:
|
342 |
with gr.Row():
|
343 |
+
dataset2_name = HuggingfaceHubSearch(
|
344 |
+
label="Dataset 2 Name",
|
345 |
+
placeholder="Search for datasets on HuggingFace Hub",
|
346 |
+
search_type="dataset",
|
347 |
+
value=default_dataset_name,
|
348 |
)
|
349 |
dataset2_split = gr.Textbox(
|
350 |
value=default_dataset2_split, label="Dataset 2 Split"
|
pyproject.toml
CHANGED
@@ -6,6 +6,7 @@ readme = "README.md"
|
|
6 |
requires-python = ">=3.11"
|
7 |
dependencies = [
|
8 |
"datasets>=3.6.0",
|
|
|
9 |
"gradio[oauth]>=5.32.1",
|
10 |
"huggingface-hub>=0.32.3",
|
11 |
"model2vec>=0.5.0",
|
|
|
6 |
requires-python = ">=3.11"
|
7 |
dependencies = [
|
8 |
"datasets>=3.6.0",
|
9 |
+
"gradio-huggingfacehub-search>=0.0.12",
|
10 |
"gradio[oauth]>=5.32.1",
|
11 |
"huggingface-hub>=0.32.3",
|
12 |
"model2vec>=0.5.0",
|
requirements.txt
CHANGED
@@ -1,8 +1,87 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file was autogenerated by uv via the following command:
|
2 |
+
# uv export --format requirements-txt --no-hashes
|
3 |
+
aiofiles==24.1.0
|
4 |
+
aiohappyeyeballs==2.6.1
|
5 |
+
aiohttp==3.12.7
|
6 |
+
aiosignal==1.3.2
|
7 |
+
annotated-types==0.7.0
|
8 |
+
anyio==4.9.0
|
9 |
+
attrs==25.3.0
|
10 |
+
audioop-lts==0.2.1 ; python_full_version >= '3.13'
|
11 |
+
authlib==1.6.0
|
12 |
+
certifi==2025.4.26
|
13 |
+
cffi==1.17.1 ; platform_python_implementation != 'PyPy'
|
14 |
+
charset-normalizer==3.4.2
|
15 |
+
click==8.2.1 ; sys_platform != 'emscripten'
|
16 |
+
colorama==0.4.6 ; platform_system == 'Windows'
|
17 |
+
cryptography==45.0.3
|
18 |
+
datasets==3.6.0
|
19 |
+
dill==0.3.8
|
20 |
+
fastapi==0.115.12
|
21 |
+
ffmpy==0.6.0
|
22 |
+
filelock==3.18.0
|
23 |
+
frozendict==2.4.6
|
24 |
+
frozenlist==1.6.0
|
25 |
+
fsspec==2025.3.0
|
26 |
+
gradio==5.32.1
|
27 |
+
gradio-client==1.10.2
|
28 |
+
gradio-huggingfacehub-search==0.0.12
|
29 |
+
groovy==0.1.2
|
30 |
+
h11==0.16.0
|
31 |
+
hf-xet==1.1.2 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
|
32 |
+
httpcore==1.0.9
|
33 |
+
httpx==0.28.1
|
34 |
+
huggingface-hub==0.32.3
|
35 |
+
idna==3.10
|
36 |
+
itsdangerous==2.2.0
|
37 |
+
jinja2==3.1.6
|
38 |
+
joblib==1.5.1
|
39 |
+
markdown-it-py==3.0.0
|
40 |
+
markupsafe==3.0.2
|
41 |
+
mdurl==0.1.2
|
42 |
+
model2vec==0.5.0
|
43 |
+
multidict==6.4.4
|
44 |
+
multiprocess==0.70.16
|
45 |
+
numpy==2.2.6
|
46 |
+
orjson==3.10.18
|
47 |
+
packaging==25.0
|
48 |
+
pandas==2.2.3
|
49 |
+
pillow==11.2.1
|
50 |
+
propcache==0.3.1
|
51 |
+
pyarrow==20.0.0
|
52 |
+
pycparser==2.22 ; platform_python_implementation != 'PyPy'
|
53 |
+
pydantic==2.11.5
|
54 |
+
pydantic-core==2.33.2
|
55 |
+
pydub==0.25.1
|
56 |
+
pygments==2.19.1
|
57 |
+
python-dateutil==2.9.0.post0
|
58 |
+
python-multipart==0.0.20
|
59 |
+
pytz==2025.2
|
60 |
+
pyyaml==6.0.2
|
61 |
+
requests==2.32.3
|
62 |
+
rich==14.0.0
|
63 |
+
ruff==0.11.12 ; sys_platform != 'emscripten'
|
64 |
+
safehttpx==0.1.6
|
65 |
+
safetensors==0.5.3
|
66 |
+
semantic-version==2.10.0
|
67 |
+
semhash==0.3.0
|
68 |
+
setuptools==80.9.0
|
69 |
+
shellingham==1.5.4 ; sys_platform != 'emscripten'
|
70 |
+
simsimd==6.4.7
|
71 |
+
six==1.17.0
|
72 |
+
sniffio==1.3.1
|
73 |
+
starlette==0.46.2
|
74 |
+
tokenizers==0.21.1
|
75 |
+
tomlkit==0.13.2
|
76 |
+
tqdm==4.67.1
|
77 |
+
typer==0.16.0 ; sys_platform != 'emscripten'
|
78 |
+
typing-extensions==4.14.0
|
79 |
+
typing-inspection==0.4.1
|
80 |
+
tzdata==2025.2
|
81 |
+
urllib3==2.4.0
|
82 |
+
usearch==2.17.8
|
83 |
+
uvicorn==0.34.3 ; sys_platform != 'emscripten'
|
84 |
+
vicinity==0.4.1
|
85 |
+
websockets==15.0.1
|
86 |
+
xxhash==3.5.0
|
87 |
+
yarl==1.20.0
|
uv.lock
CHANGED
@@ -582,6 +582,18 @@ wheels = [
|
|
582 |
{ url = "https://files.pythonhosted.org/packages/9b/1b/b372308c263379ae3ebc440512432979458330113bdee26cef86c89bf48e/gradio_client-1.10.2-py3-none-any.whl", hash = "sha256:6de67b6224123d264c7887caa0586b2a9e2c369ec32ca38927cf8a841694edcd", size = 323311 },
|
583 |
]
|
584 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
585 |
[[package]]
|
586 |
name = "groovy"
|
587 |
version = "0.1.2"
|
@@ -1475,6 +1487,7 @@ source = { virtual = "." }
|
|
1475 |
dependencies = [
|
1476 |
{ name = "datasets" },
|
1477 |
{ name = "gradio", extra = ["oauth"] },
|
|
|
1478 |
{ name = "huggingface-hub" },
|
1479 |
{ name = "model2vec" },
|
1480 |
{ name = "numpy" },
|
@@ -1486,6 +1499,7 @@ dependencies = [
|
|
1486 |
requires-dist = [
|
1487 |
{ name = "datasets", specifier = ">=3.6.0" },
|
1488 |
{ name = "gradio", extras = ["oauth"], specifier = ">=5.32.1" },
|
|
|
1489 |
{ name = "huggingface-hub", specifier = ">=0.32.3" },
|
1490 |
{ name = "model2vec", specifier = ">=0.5.0" },
|
1491 |
{ name = "numpy", specifier = ">=2.2.6" },
|
|
|
582 |
{ url = "https://files.pythonhosted.org/packages/9b/1b/b372308c263379ae3ebc440512432979458330113bdee26cef86c89bf48e/gradio_client-1.10.2-py3-none-any.whl", hash = "sha256:6de67b6224123d264c7887caa0586b2a9e2c369ec32ca38927cf8a841694edcd", size = 323311 },
|
583 |
]
|
584 |
|
585 |
+
[[package]]
|
586 |
+
name = "gradio-huggingfacehub-search"
|
587 |
+
version = "0.0.12"
|
588 |
+
source = { registry = "https://pypi.org/simple" }
|
589 |
+
dependencies = [
|
590 |
+
{ name = "gradio" },
|
591 |
+
]
|
592 |
+
sdist = { url = "https://files.pythonhosted.org/packages/c2/aa/dbe96e59a31fbeacce39eb0a7828109dc3b8d0005a967f30acb48f3395b2/gradio_huggingfacehub_search-0.0.12.tar.gz", hash = "sha256:9da6b7b7c97f2ff8fcfad8d5789a68c79e995e0d39163bccfded422135df7288", size = 1199884 }
|
593 |
+
wheels = [
|
594 |
+
{ url = "https://files.pythonhosted.org/packages/fc/f6/5aeb2b22a28e7961c2b396e4847c82c634cf139c1a80be697b824ec5e525/gradio_huggingfacehub_search-0.0.12-py3-none-any.whl", hash = "sha256:41d8870fa6b2b715848fcadb5e773eab4da0b9fe431c463a7cce2d616ad8f743", size = 1128435 },
|
595 |
+
]
|
596 |
+
|
597 |
[[package]]
|
598 |
name = "groovy"
|
599 |
version = "0.1.2"
|
|
|
1487 |
dependencies = [
|
1488 |
{ name = "datasets" },
|
1489 |
{ name = "gradio", extra = ["oauth"] },
|
1490 |
+
{ name = "gradio-huggingfacehub-search" },
|
1491 |
{ name = "huggingface-hub" },
|
1492 |
{ name = "model2vec" },
|
1493 |
{ name = "numpy" },
|
|
|
1499 |
requires-dist = [
|
1500 |
{ name = "datasets", specifier = ">=3.6.0" },
|
1501 |
{ name = "gradio", extras = ["oauth"], specifier = ">=5.32.1" },
|
1502 |
+
{ name = "gradio-huggingfacehub-search", specifier = ">=0.0.12" },
|
1503 |
{ name = "huggingface-hub", specifier = ">=0.32.3" },
|
1504 |
{ name = "model2vec", specifier = ">=0.5.0" },
|
1505 |
{ name = "numpy", specifier = ">=2.2.6" },
|