# app.py import os import json import pandas as pd import google.generativeai as genai import gradio as gr # Configure Gemini genai.configure(api_key=os.environ["GEMINI_API_KEY"]) model = genai.GenerativeModel("gemini-pro") # Load and clean CSV df_raw = pd.read_csv("data/assessments.csv") def preprocess_data(df): def clean_duration(text): try: return int(text.split('=')[-1].strip()) except: return None def clean_support(val): return "Yes" if val == 'T' else "No" def clean_test_type(val): return [x.strip() for x in str(val).split('\n') if x.strip()] df_cleaned = pd.DataFrame({ "url": "https://www.shl.com" + df.iloc[:, 2].astype(str), "remote_support": df.iloc[:, 3].apply(clean_support), "adaptive_support": df.iloc[:, 4].apply(clean_support), "test_type": df.iloc[:, 5].apply(clean_test_type), "description": df.iloc[:, 6], "duration": df.iloc[:, 9].apply(clean_duration), }) return df_cleaned assessments = preprocess_data(df_raw) def recommend_assessments(query, top_k=10): prompt = f""" Given this job description: "{query}", recommend the top {top_k} relevant SHL assessments from the following list. Return the result as JSON with this format: {{ "recommended_assessments": [ {{ "url": ..., "adaptive_support": ..., "remote_support": ..., "description": ..., "duration": ..., "test_type": [...] }}, ... ] }} Data: {assessments.to_dict(orient='records')} """ response = model.generate_content(prompt) try: result = json.loads(response.text) return result except Exception as e: return {"error": f"Failed to parse response: {str(e)}\n{response.text}"} def predict(query): return recommend_assessments(query) iface = gr.Interface( fn=predict, inputs=gr.Textbox(label="Enter Job Description", lines=4), outputs="json", title="SHL Assessment Recommender (Gemini-powered)", description="Paste a job description and get the most relevant SHL assessments." ) if __name__ == "__main__": iface.launch()