nidhal baccouri
added class properties for reusability
a0092fa
raw
history blame
4.04 kB
"""
google translator API
"""
from typing import List, Optional
import requests
from bs4 import BeautifulSoup
from deep_translator.base import BaseTranslator
from deep_translator.constants import BASE_URLS
from deep_translator.exceptions import (
RequestError,
TooManyRequests,
TranslationNotFound,
)
from deep_translator.validate import is_empty, is_input_valid
class GoogleTranslator(BaseTranslator):
"""
class that wraps functions, which use Google Translate under the hood to translate text(s)
"""
def __init__(
self,
source: str = "auto",
target: str = "en",
proxies: Optional[dict] = None,
**kwargs
):
"""
@param source: source language to translate from
@param target: target language to translate to
"""
self.proxies = proxies
super().__init__(
base_url=BASE_URLS.get("GOOGLE_TRANSLATE"),
source=source,
target=target,
element_tag="div",
element_query={"class": "t0"},
payload_key="q", # key of text in the url
**kwargs
)
self._alt_element_query = {"class": "result-container"}
def translate(self, text: str, **kwargs) -> str:
"""
function to translate a text
@param text: desired text to translate
@return: str: translated text
"""
if is_input_valid(text):
text = text.strip()
if self._same_source_target() or is_empty(text):
return text
self._url_params["tl"] = self._target
self._url_params["sl"] = self._source
if self.payload_key:
self._url_params[self.payload_key] = text
response = requests.get(
self._base_url, params=self._url_params, proxies=self.proxies
)
if response.status_code == 429:
raise TooManyRequests()
if response.status_code != 200:
raise RequestError()
soup = BeautifulSoup(response.text, "html.parser")
element = soup.find(self._element_tag, self._element_query)
if not element:
element = soup.find(self._element_tag, self._alt_element_query)
if not element:
raise TranslationNotFound(text)
if element.get_text(strip=True) == text.strip():
to_translate_alpha = "".join(ch for ch in text.strip() if ch.isalnum())
translated_alpha = "".join(
ch for ch in element.get_text(strip=True) if ch.isalnum()
)
if (
to_translate_alpha
and translated_alpha
and to_translate_alpha == translated_alpha
):
self._url_params["tl"] = self._target
if "hl" not in self._url_params:
return text.strip()
del self._url_params["hl"]
return self.translate(text)
else:
return element.get_text(strip=True)
def translate_file(self, path: str, **kwargs) -> str:
"""
translate directly from file
@param path: path to the target file
@type path: str
@param kwargs: additional args
@return: str
"""
return self._translate_file(path, **kwargs)
def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
"""
translate a list of texts
@param batch: list of texts you want to translate
@return: list of translations
"""
return self._translate_batch(batch, **kwargs)
if __name__ == "__main__":
trans = GoogleTranslator(source='auto', target='de')
res = trans.translate("cute")
print("translation: ", res)
trans.target = "fr"
print(f"changed target to: {trans.target} => translation changed: {trans.translate('cute')}")