File size: 4,308 Bytes
f0aeabd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import os
from openai import OpenAI
import logging
from typing import List, Dict, Any
import json
import time
from pydantic import BaseModel
import time
from qdrant_client import QdrantClient, models
from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue, Range, MatchAny


qdrant_client = QdrantClient(
    url="https://00e40cf4-6976-43c1-aa08-be895735804b.europe-west3-0.gcp.cloud.qdrant.io:6333",
    api_key="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.lfvuJEBzsmB7vez0nhv7HBbUlW77eUAT8raSazqYXHA",
)

qdrant_client.set_model("sentence-transformers/all-MiniLM-L6-v2")
qdrant_client.set_sparse_model("prithivida/Splade_PP_en_v1")

def recommend_talent_tool(search_query: str, verticals=None, min_age=None, max_age=None, gender=None,
                          min_followers=None, max_followers=None,
                          min_overall_engagement=None, max_overall_engagement=None):
    conditions = []

    if verticals:
        if isinstance(verticals, str):
            verticals = [verticals]
        conditions.append(
            FieldCondition(
                key="verticals",
                match=MatchAny(any=verticals)
            )
        )

    if min_age is not None or max_age is not None:
        range_params = {}
        if min_age is not None:
            range_params["gte"] = int(min_age)  # Ensure it's an integer
        if max_age is not None:
            range_params["lte"] = int(max_age)
        conditions.append(
            FieldCondition(
                key="age",
                range=Range(**range_params)
            )
        )

    if gender:
        conditions.append(
            FieldCondition(
                key="gender",
                match=MatchValue(value=gender)
            )
        )

    if min_followers is not None or max_followers is not None:
        range_params = {}
        if min_followers is not None:
            range_params["gte"] = int(min_followers)  # Convert to int
        if max_followers is not None:
            range_params["lte"] = int(max_followers)
        conditions.append(
            FieldCondition(
                key="follower_count",
                range=Range(**range_params)
            )
        )

    if min_overall_engagement is not None or max_overall_engagement is not None:
        range_params = {}
        if min_overall_engagement is not None:
            range_params["gte"] = float(min_overall_engagement)  # Convert to float
        if max_overall_engagement is not None:
            range_params["lte"] = float(max_overall_engagement)
        conditions.append(
            FieldCondition(
                key="overall_engagement",
                range=Range(**range_params)
            )
        )

    query_filter = Filter(must=conditions) if conditions else None

    search_result = qdrant_client.query(
        collection_name="social_media_profiles",
        query_text=search_query,
        query_filter=query_filter,
        limit=10
    )

    # Extract metadata and scores
    results = []
    max_followers = max((hit.metadata.get("follower_count", 1) for hit in search_result), default=1)
    max_engagement = max((hit.metadata.get("overall_engagement", 1) for hit in search_result), default=1)

    W1, W2, W3 = 0.5, 2.5, 0.25

    for hit in search_result:
        metadata = hit.metadata
        vector_similarity = hit.score  # Qdrant should return a similarity score
        
        follower_count = metadata.get("follower_count", 0)
        overall_engagement = metadata.get("overall_engagement", 0)

        # Normalize follower count and engagement score
        normalized_followers = follower_count / max_followers if max_followers > 0 else 0
        normalized_engagement = overall_engagement / max_engagement if max_engagement > 0 else 0

        # Calculate the matching score
        matching_score = (
            W1 * vector_similarity +
            W2 * normalized_followers +
            W3 * normalized_engagement
        )

        results.append({"metadata": metadata, "matching_score": matching_score})

    # Sort by matching score in descending order
    sorted_results = sorted(results, key=lambda x: x["matching_score"], reverse=True)

    return [item["metadata"] for item in sorted_results]