|
from __future__ import annotations |
|
|
|
import re |
|
from decimal import Decimal |
|
from html import unescape |
|
from math import atan2, cos, radians, sin, sqrt |
|
from typing import Any |
|
from urllib.parse import unquote |
|
|
|
from .exceptions import DuckDuckGoSearchException |
|
|
|
try: |
|
HAS_ORJSON = True |
|
import orjson |
|
except ImportError: |
|
HAS_ORJSON = False |
|
import json |
|
|
|
REGEX_STRIP_TAGS = re.compile("<.*?>") |
|
|
|
|
|
def json_dumps(obj: Any) -> str: |
|
try: |
|
return ( |
|
orjson.dumps(obj, option=orjson.OPT_INDENT_2).decode() |
|
if HAS_ORJSON |
|
else json.dumps(obj, ensure_ascii=False, indent=2) |
|
) |
|
except Exception as ex: |
|
raise DuckDuckGoSearchException(f"{type(ex).__name__}: {ex}") from ex |
|
|
|
|
|
def json_loads(obj: str | bytes) -> Any: |
|
try: |
|
return orjson.loads(obj) if HAS_ORJSON else json.loads(obj) |
|
except Exception as ex: |
|
raise DuckDuckGoSearchException(f"{type(ex).__name__}: {ex}") from ex |
|
|
|
|
|
def _extract_vqd(html_bytes: bytes, keywords: str) -> str: |
|
"""Extract vqd from html bytes.""" |
|
for c1, c1_len, c2 in ( |
|
(b'vqd="', 5, b'"'), |
|
(b"vqd=", 4, b"&"), |
|
(b"vqd='", 5, b"'"), |
|
): |
|
try: |
|
start = html_bytes.index(c1) + c1_len |
|
end = html_bytes.index(c2, start) |
|
return html_bytes[start:end].decode() |
|
except ValueError: |
|
pass |
|
raise DuckDuckGoSearchException(f"_extract_vqd() {keywords=} Could not extract vqd.") |
|
|
|
|
|
def _text_extract_json(html_bytes: bytes, keywords: str) -> list[dict[str, str]]: |
|
"""text(backend="api") -> extract json from html.""" |
|
try: |
|
start = html_bytes.index(b"DDG.pageLayout.load('d',") + 24 |
|
end = html_bytes.index(b");DDG.duckbar.load(", start) |
|
data = html_bytes[start:end] |
|
result: list[dict[str, str]] = json_loads(data) |
|
return result |
|
except Exception as ex: |
|
raise DuckDuckGoSearchException(f"_text_extract_json() {keywords=} {type(ex).__name__}: {ex}") from ex |
|
raise DuckDuckGoSearchException(f"_text_extract_json() {keywords=} return None") |
|
|
|
|
|
def _normalize(raw_html: str) -> str: |
|
"""Strip HTML tags from the raw_html string.""" |
|
return unescape(REGEX_STRIP_TAGS.sub("", raw_html)) if raw_html else "" |
|
|
|
|
|
def _normalize_url(url: str) -> str: |
|
"""Unquote URL and replace spaces with '+'.""" |
|
return unquote(url).replace(" ", "+") if url else "" |
|
|
|
|
|
def _calculate_distance(lat1: Decimal, lon1: Decimal, lat2: Decimal, lon2: Decimal) -> float: |
|
"""Calculate distance between two points in km. Haversine formula.""" |
|
R = 6371.0087714 |
|
rlat1, rlon1, rlat2, rlon2 = map(radians, [float(lat1), float(lon1), float(lat2), float(lon2)]) |
|
dlon, dlat = rlon2 - rlon1, rlat2 - rlat1 |
|
a = sin(dlat / 2) ** 2 + cos(rlat1) * cos(rlat2) * sin(dlon / 2) ** 2 |
|
c = 2 * atan2(sqrt(a), sqrt(1 - a)) |
|
return R * c |
|
|
|
|
|
def _expand_proxy_tb_alias(proxy: str | None) -> str | None: |
|
"""Expand "tb" to a full proxy URL if applicable.""" |
|
return "socks5://127.0.0.1:9150" if proxy == "tb" else proxy |
|
|