File size: 2,131 Bytes
fcd327a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import json
import os

from opensearchpy import OpenSearch
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")

host = "localhost"
port = 9200
OPENSEARCH_ADMIN_PASSWORD = os.getenv("OPENSEARCH_ADMIN_PASSWORD", "yw7L5u9nLs3a")
auth = (
    "admin",
    OPENSEARCH_ADMIN_PASSWORD,
)

# Create the client with SSL/TLS enabled, but hostname verification disabled.
client = OpenSearch(
    hosts=[{"host": host, "port": port}],
    http_compress=True,  # enables gzip compression for request bodies
    http_auth=auth,
    use_ssl=True,
    verify_certs=False,
    ssl_assert_hostname=False,
    ssl_show_warn=False,
)

with open("datafiniti_properties_sunnyvale_400.json", "r") as f:
    bulk_body = []
    for line in f:
        property = json.loads(line)
        try:
            print(f'indexing {property["address"]}')
            bathrooms = int(property["numBathroom"])
            beds = property["numBedroom"]
            price = property["mostRecentPriceAmount"]
            size = property["floorSizeValue"]
            address = ", ".join(
                [
                    property["address"],
                    property["city"],
                    property["province"],
                    property["postalCode"][:5],
                ]
            )

            descriptions = property["descriptions"]
            descriptions = sorted(
                descriptions, key=lambda x: x["dateSeen"], reverse=True
            )
            description = descriptions[0]["value"]

            row = {
                "bathrooms": bathrooms,
                "bedrooms": beds,
                "listingPrice": price,
                "squareFootage": size,
                "address": address,
                "publicDescription": description,
                "publicDescriptionKnn": model.encode(description).tolist(),
            }

            bulk_body.append({"create": {"_index": "datafiniti_props", "_id": address}})
            bulk_body.append(row)
        except:
            pass

    client.bulk(
        index="datafiniti_props",
        body=bulk_body,
    )