GotoUsuke's picture
Upload folder using huggingface_hub
db4a26f verified
raw
history blame
3.08 kB
from __future__ import annotations
import re
from decimal import Decimal
from html import unescape
from math import atan2, cos, radians, sin, sqrt
from typing import Any
from urllib.parse import unquote
from .exceptions import DuckDuckGoSearchException
try:
HAS_ORJSON = True
import orjson
except ImportError:
HAS_ORJSON = False
import json
REGEX_STRIP_TAGS = re.compile("<.*?>")
def json_dumps(obj: Any) -> str:
try:
return (
orjson.dumps(obj, option=orjson.OPT_INDENT_2).decode()
if HAS_ORJSON
else json.dumps(obj, ensure_ascii=False, indent=2)
)
except Exception as ex:
raise DuckDuckGoSearchException(f"{type(ex).__name__}: {ex}") from ex
def json_loads(obj: str | bytes) -> Any:
try:
return orjson.loads(obj) if HAS_ORJSON else json.loads(obj)
except Exception as ex:
raise DuckDuckGoSearchException(f"{type(ex).__name__}: {ex}") from ex
def _extract_vqd(html_bytes: bytes, keywords: str) -> str:
"""Extract vqd from html bytes."""
for c1, c1_len, c2 in (
(b'vqd="', 5, b'"'),
(b"vqd=", 4, b"&"),
(b"vqd='", 5, b"'"),
):
try:
start = html_bytes.index(c1) + c1_len
end = html_bytes.index(c2, start)
return html_bytes[start:end].decode()
except ValueError:
pass
raise DuckDuckGoSearchException(f"_extract_vqd() {keywords=} Could not extract vqd.")
def _text_extract_json(html_bytes: bytes, keywords: str) -> list[dict[str, str]]:
"""text(backend="api") -> extract json from html."""
try:
start = html_bytes.index(b"DDG.pageLayout.load('d',") + 24
end = html_bytes.index(b");DDG.duckbar.load(", start)
data = html_bytes[start:end]
result: list[dict[str, str]] = json_loads(data)
return result
except Exception as ex:
raise DuckDuckGoSearchException(f"_text_extract_json() {keywords=} {type(ex).__name__}: {ex}") from ex
raise DuckDuckGoSearchException(f"_text_extract_json() {keywords=} return None")
def _normalize(raw_html: str) -> str:
"""Strip HTML tags from the raw_html string."""
return unescape(REGEX_STRIP_TAGS.sub("", raw_html)) if raw_html else ""
def _normalize_url(url: str) -> str:
"""Unquote URL and replace spaces with '+'."""
return unquote(url).replace(" ", "+") if url else ""
def _calculate_distance(lat1: Decimal, lon1: Decimal, lat2: Decimal, lon2: Decimal) -> float:
"""Calculate distance between two points in km. Haversine formula."""
R = 6371.0087714 # Earth's radius in km
rlat1, rlon1, rlat2, rlon2 = map(radians, [float(lat1), float(lon1), float(lat2), float(lon2)])
dlon, dlat = rlon2 - rlon1, rlat2 - rlat1
a = sin(dlat / 2) ** 2 + cos(rlat1) * cos(rlat2) * sin(dlon / 2) ** 2
c = 2 * atan2(sqrt(a), sqrt(1 - a))
return R * c
def _expand_proxy_tb_alias(proxy: str | None) -> str | None:
"""Expand "tb" to a full proxy URL if applicable."""
return "socks5://127.0.0.1:9150" if proxy == "tb" else proxy