Spaces:
Sleeping
Sleeping
File size: 4,308 Bytes
f0aeabd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import os
from openai import OpenAI
import logging
from typing import List, Dict, Any
import json
import time
from pydantic import BaseModel
import time
from qdrant_client import QdrantClient, models
from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue, Range, MatchAny
qdrant_client = QdrantClient(
url="https://00e40cf4-6976-43c1-aa08-be895735804b.europe-west3-0.gcp.cloud.qdrant.io:6333",
api_key="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.lfvuJEBzsmB7vez0nhv7HBbUlW77eUAT8raSazqYXHA",
)
qdrant_client.set_model("sentence-transformers/all-MiniLM-L6-v2")
qdrant_client.set_sparse_model("prithivida/Splade_PP_en_v1")
def recommend_talent_tool(search_query: str, verticals=None, min_age=None, max_age=None, gender=None,
min_followers=None, max_followers=None,
min_overall_engagement=None, max_overall_engagement=None):
conditions = []
if verticals:
if isinstance(verticals, str):
verticals = [verticals]
conditions.append(
FieldCondition(
key="verticals",
match=MatchAny(any=verticals)
)
)
if min_age is not None or max_age is not None:
range_params = {}
if min_age is not None:
range_params["gte"] = int(min_age) # Ensure it's an integer
if max_age is not None:
range_params["lte"] = int(max_age)
conditions.append(
FieldCondition(
key="age",
range=Range(**range_params)
)
)
if gender:
conditions.append(
FieldCondition(
key="gender",
match=MatchValue(value=gender)
)
)
if min_followers is not None or max_followers is not None:
range_params = {}
if min_followers is not None:
range_params["gte"] = int(min_followers) # Convert to int
if max_followers is not None:
range_params["lte"] = int(max_followers)
conditions.append(
FieldCondition(
key="follower_count",
range=Range(**range_params)
)
)
if min_overall_engagement is not None or max_overall_engagement is not None:
range_params = {}
if min_overall_engagement is not None:
range_params["gte"] = float(min_overall_engagement) # Convert to float
if max_overall_engagement is not None:
range_params["lte"] = float(max_overall_engagement)
conditions.append(
FieldCondition(
key="overall_engagement",
range=Range(**range_params)
)
)
query_filter = Filter(must=conditions) if conditions else None
search_result = qdrant_client.query(
collection_name="social_media_profiles",
query_text=search_query,
query_filter=query_filter,
limit=10
)
# Extract metadata and scores
results = []
max_followers = max((hit.metadata.get("follower_count", 1) for hit in search_result), default=1)
max_engagement = max((hit.metadata.get("overall_engagement", 1) for hit in search_result), default=1)
W1, W2, W3 = 0.5, 2.5, 0.25
for hit in search_result:
metadata = hit.metadata
vector_similarity = hit.score # Qdrant should return a similarity score
follower_count = metadata.get("follower_count", 0)
overall_engagement = metadata.get("overall_engagement", 0)
# Normalize follower count and engagement score
normalized_followers = follower_count / max_followers if max_followers > 0 else 0
normalized_engagement = overall_engagement / max_engagement if max_engagement > 0 else 0
# Calculate the matching score
matching_score = (
W1 * vector_similarity +
W2 * normalized_followers +
W3 * normalized_engagement
)
results.append({"metadata": metadata, "matching_score": matching_score})
# Sort by matching score in descending order
sorted_results = sorted(results, key=lambda x: x["matching_score"], reverse=True)
return [item["metadata"] for item in sorted_results]
|