Spaces:

pmkhanh7890
/

handwriting-recognition

Sleeping

File size: 2,591 Bytes
import argparse
import os

import cv2
import requests
from requests import exceptions

ap = argparse.ArgumentParser()
ap.add_argument(
    "-q",
    "--query",
    required=True,
    help="search query to search Bing Image API for",
)
ap.add_argument(
    "-o",
    "--output",
    required=True,
    help="path to output directory of images",
)
args = vars(ap.parse_args())
API_KEY = "d8982f9e69a4437fa6e10715d1ed691d"
MAX_RESULTS = 500
GROUP_SIZE = 50
URL = "https://api.cognitive.microsoft.com/bing/v7.0/images/search"
EXCEPTIONS = {
    IOError,
    FileNotFoundError,
    exceptions.RequestException,
    exceptions.HTTPError,
    exceptions.ConnectionError,
    exceptions.Timeout,
}
term = args["query"]
headers = {"Ocp-Apim-Subscription-Key": API_KEY}
params = {"q": term, "offset": 0, "count": GROUP_SIZE}
print(f"[INFO] searching Bing API for '{term}'")
search = requests.get(URL, headers=headers, params=params)
search.raise_for_status()
results = search.json()
estNumResults = min(results["totalEstimatedMatches"], MAX_RESULTS)
print(
    "[INFO] {} total results for '{}'".format(
        estNumResults,
        term,
    ),
)
total = 0
for offset in range(0, estNumResults, GROUP_SIZE):
    print(
        "[INFO] making request for group {}-{} of {}...".format(
            offset,
            offset + GROUP_SIZE,
            estNumResults,
        ),
    )
    params["offset"] = offset
    search = requests.get(URL, headers=headers, params=params)
    search.raise_for_status()
    results = search.json()
    print(
        "[INFO] saving images for group {}-{} of {}...".format(
            offset,
            offset + GROUP_SIZE,
            estNumResults,
        ),
    )
    for v in results["value"]:
        try:
            print("[INFO] fetching: {}".format(v["contentUrl"]))
            r = requests.get(v["contentUrl"], timeout=30)
            ext = v["contentUrl"][v["contentUrl"].rfind(".") :]
            p = os.path.sep.join(
                [
                    args["output"],
                    "{}{}".format(
                        str(total).zfill(8),
                        ext,
                    ),
                ],
            )
            f = open(p, "wb")
            f.write(r.content)
            f.close()
        except Exception as e:
            if type(e) in EXCEPTIONS:
                print("[INFO] skipping: {}".format(v["contentUrl"]))
                continue
        image = cv2.imread(p)
        if image is None:
            print(f"[INFO] deleting: {p}")
            os.remove(p)
            continue
        total += 1