block not found or malformed in response.")
response_lower = intent_decision_part.lower()
intent_score = 0.1
if 'intent: yes' in response_lower or 'intent:yes' in response_lower:
intent_score = 0.3
elif 'intent: no' in response_lower or 'intent:no' in response_lower:
intent_score = 0.0
print(f"[analyze_intent] Parsed Intent: {intent_score}, Decision part: {intent_decision_part[:100]}...")
return intent_score
except Exception as e:
st.warning(f"Error analyzing intent with Qwen3-1.7B: {str(e)}")
print(f"[analyze_intent] EXCEPTION: {str(e)}")
return 0.1
def calculate_final_scores(self, results_with_all_scores):
"""Stage 5: Calculate final combined scores"""
print("[calculate_final_scores] Method started.")
st.text("Final Ranking: Calculating combined scores...")
try:
final_results = []
for idx, cross_score, bm25_score, intent_score in results_with_all_scores:
# Normalize cross-encoder score to 0-1 range
normalized_cross = max(0, min(1, cross_score))
# Final Score = Cross-Encoder (0-1) + BM25 (0.1-0.2) + Intent (0-0.3)
final_score = normalized_cross + bm25_score + intent_score
final_results.append({
'index': idx,
'cross_encoder_score': normalized_cross,
'bm25_score': bm25_score,
'intent_score': intent_score,
'final_score': final_score
})
# Sort by final score
final_results.sort(key=lambda x: x['final_score'], reverse=True)
print("[calculate_final_scores] Final scores calculated and sorted.")
st.text("Final Ranking: Complete.")
return final_results
except Exception as e:
st.error(f"Error calculating final scores: {str(e)}")
return []
def extract_skills(self, text, job_description):
"""Extract skills from resume based on job description"""
if not text:
return []
# Common tech skills
common_skills = [
"python", "java", "javascript", "react", "angular", "vue", "node.js",
"express", "django", "flask", "spring", "sql", "nosql", "html", "css",
"aws", "azure", "gcp", "docker", "kubernetes", "jenkins", "git", "github",
"agile", "scrum", "jira", "ci/cd", "devops", "microservices", "rest", "api",
"machine learning", "deep learning", "data science", "artificial intelligence",
"tensorflow", "pytorch", "keras", "scikit-learn", "pandas", "numpy",
"matplotlib", "seaborn", "jupyter", "r", "sas", "spss", "tableau", "powerbi",
"excel", "mysql", "postgresql", "mongodb", "redis", "elasticsearch",
"kafka", "rabbitmq", "spark", "hadoop", "hive", "airflow", "linux", "unix"
]
# Extract potential skills from job description
job_words = set(word.lower() for word in word_tokenize(job_description) if len(word) > 2)
# Find matching skills
found_skills = []
text_lower = text.lower()
# Check common skills that appear in both resume and job description
for skill in common_skills:
if skill in text_lower and any(skill in job_word for job_word in job_words):
found_skills.append(skill)
# Check for skills mentioned in job description
for word in job_words:
if len(word) > 3 and word in text_lower and word not in found_skills:
# Basic filter to avoid common words
if word not in ['with', 'have', 'that', 'this', 'from', 'what', 'when', 'where']:
found_skills.append(word)
return list(set(found_skills))[:15] # Return top 15 unique skills
def create_download_link(df, filename="resume_screening_results.csv"):
"""Create download link for results"""
csv = df.to_csv(index=False)
b64 = base64.b64encode(csv.encode()).decode()
return f'๐ฅ Download Results CSV'
# Main App Interface
st.title("๐ฏ AI-Powered Resume Screener")
st.markdown("*Find the perfect candidates using BAAI/bge-large-en-v1.5 embeddings and Qwen3-1.7B for intent analysis*")
st.markdown("---")
# Initialize screener
screener = ResumeScreener()
# Job Description Input
st.header("๐ Step 1: Enter Job Description")
job_description = st.text_area(
"Enter the complete job description or requirements:",
height=150,
placeholder="Paste the job description here, including required skills, experience, and qualifications..."
)
# Resume Input Options
st.header("๐ Step 2: Upload Resumes")
# Show loaded resumes indicator
if st.session_state.resume_texts:
col1, col2 = st.columns([3, 1])
with col1:
st.info(f"๐ {len(st.session_state.resume_texts)} resumes loaded and ready for analysis")
with col2:
if st.button("๐๏ธ Clear Resumes", type="secondary", help="Clear all loaded resumes to start fresh"):
st.session_state.resume_texts = []
st.session_state.file_names = []
st.session_state.results = []
st.session_state.current_job_description = ""
st.rerun()
input_method = st.radio(
"Choose input method:",
["๐ Upload Files", "๐๏ธ Load from CSV Dataset", "๐ Load from Hugging Face Dataset"]
)
if input_method == "๐ Upload Files":
uploaded_files = st.file_uploader(
"Upload resume files",
type=["pdf", "docx", "txt"],
accept_multiple_files=True,
help="Supported formats: PDF, DOCX, TXT"
)
if uploaded_files:
with st.spinner(f"๐ Processing {len(uploaded_files)} files..."):
resume_texts = []
file_names = []
for file in uploaded_files:
file_type = file.name.split('.')[-1].lower()
with tempfile.NamedTemporaryFile(delete=False, suffix=f'.{file_type}') as tmp_file:
tmp_file.write(file.getvalue())
tmp_path = tmp_file.name
text = screener.extract_text_from_file(tmp_path, file_type)
if text.strip():
resume_texts.append(text)
file_names.append(file.name)
os.unlink(tmp_path)
st.session_state.resume_texts = resume_texts
st.session_state.file_names = file_names
if resume_texts:
st.success(f"โ
Successfully processed {len(resume_texts)} resumes")
elif input_method == "๐๏ธ Load from CSV Dataset":
csv_file = st.file_uploader("Upload CSV file with resume data", type=["csv"])
if csv_file:
try:
df = pd.read_csv(csv_file)
st.write("**CSV Preview:**")
st.dataframe(df.head())
text_column = st.selectbox(
"Select column containing resume text:",
df.columns.tolist()
)
name_column = st.selectbox(
"Select column for candidate names/IDs (optional):",
["Use Index"] + df.columns.tolist()
)
if st.button("๐ Process CSV Data"):
with st.spinner("๐ Processing CSV data..."):
resume_texts = []
file_names = []
for idx, row in df.iterrows():
text = str(row[text_column])
if text and text.strip() and text.lower() != 'nan':
resume_texts.append(text)
if name_column == "Use Index":
file_names.append(f"Resume_{idx}")
else:
file_names.append(str(row[name_column]))
st.session_state.resume_texts = resume_texts
st.session_state.file_names = file_names
if resume_texts:
st.success(f"โ
Successfully loaded {len(resume_texts)} resumes from CSV")
except Exception as e:
st.error(f"โ Error processing CSV: {str(e)}")
elif input_method == "๐ Load from Hugging Face Dataset":
st.markdown("**Popular Resume Datasets:**")
st.markdown("- `ahmedheakl/resume-atlas`")
st.markdown("- `InferenceFly/Resume-Dataset`")
col1, col2 = st.columns([2, 1])
with col1:
dataset_name = st.text_input(
"Dataset name:",
value="ahmedheakl/resume-atlas",
help="Enter Hugging Face dataset name"
)
with col2:
dataset_split = st.selectbox("Split:", ["train", "test", "validation"], index=0)
if st.button("๐ Load from Hugging Face"):
try:
with st.spinner(f"๐ Loading {dataset_name}..."):
dataset = load_dataset(dataset_name, split=dataset_split)
st.success(f"โ
Loaded dataset with {len(dataset)} entries")
st.write("**Dataset Preview:**")
preview_df = pd.DataFrame(dataset[:5])
st.dataframe(preview_df)
text_column = st.selectbox(
"Select column with resume text:",
dataset.column_names,
index=dataset.column_names.index('resume_text') if 'resume_text' in dataset.column_names else 0
)
category_column = None
if 'category' in dataset.column_names:
categories = list(set(dataset['category']))
category_column = st.selectbox(
"Filter by category (optional):",
["All"] + categories
)
max_samples = st.slider("Maximum samples to load:", 10, min(1000, len(dataset)), 100)
if st.button("๐ Process Dataset"):
with st.spinner("๐ Processing dataset..."):
resume_texts = []
file_names = []
filtered_dataset = dataset
if category_column and category_column != "All":
filtered_dataset = dataset.filter(lambda x: x['category'] == category_column)
sample_indices = list(range(min(max_samples, len(filtered_dataset))))
for idx in sample_indices:
item = filtered_dataset[idx]
text = str(item[text_column])
if text and text.strip() and text.lower() != 'nan':
resume_texts.append(text)
if 'id' in item:
file_names.append(f"Resume_{item['id']}")
else:
file_names.append(f"Resume_{idx}")
st.session_state.resume_texts = resume_texts
st.session_state.file_names = file_names
if resume_texts:
st.success(f"โ
Successfully loaded {len(resume_texts)} resumes")
except Exception as e:
st.error(f"โ Error loading dataset: {str(e)}")
# Processing and Results
st.header("๐ Step 3: Analyze Resumes")
# First button: Find top K candidates (fast ranking)
col1, col2 = st.columns([1, 1])
with col1:
if st.button("๐ Advanced Pipeline Analysis",
disabled=not (job_description and st.session_state.resume_texts),
type="primary",
help="Run the complete 5-stage advanced pipeline"):
print("--- Advanced Pipeline Analysis Button Clicked ---")
if len(st.session_state.resume_texts) == 0:
st.error("โ Please upload resumes first!")
elif not job_description.strip():
st.error("โ Please enter a job description!")
else:
print("[UI Button] Pre-checks passed. Starting spinner and pipeline.")
with st.spinner("๐ Running Advanced Pipeline Analysis..."):
st.text("Pipeline Initiated: Starting advanced analysis...")
try:
# Run the advanced pipeline
pipeline_results = screener.advanced_pipeline_ranking(
st.session_state.resume_texts, job_description
)
# Prepare results for display
results = []
for rank, result_data in enumerate(pipeline_results, 1):
idx = result_data['index']
name = st.session_state.file_names[idx]
text = st.session_state.resume_texts[idx]
# Extract skills
skills = screener.extract_skills(text, job_description)
results.append({
'rank': rank,
'name': name,
'final_score': result_data['final_score'],
'cross_encoder_score': result_data['cross_encoder_score'],
'bm25_score': result_data['bm25_score'],
'intent_score': result_data['intent_score'],
'skills': skills,
'text': text,
'text_preview': text[:500] + "..." if len(text) > 500 else text
})
# Store in session state
st.session_state.results = results
st.session_state.current_job_description = job_description
st.success(f"๐ Advanced pipeline complete! Found top {len(st.session_state.results)} candidates.")
st.text("Displaying Top Candidates...")
except Exception as e:
st.error(f"โ Error during analysis: {str(e)}")
# Display Results
if st.session_state.results:
st.header("๐ Top Candidates")
# Create tabs for different views
tab1, tab2, tab3 = st.tabs(["๐ Summary", "๐ Detailed Analysis", "๐ Visualizations"])
with tab1:
# Create summary dataframe with new scoring system
summary_data = []
for result in st.session_state.results:
# Map intent score to text
intent_text = "Yes" if result['intent_score'] == 0.3 else "Maybe" if result['intent_score'] == 0.1 else "No"
summary_data.append({
"Rank": result['rank'],
"Candidate": result['name'],
"Final Score": f"{result['final_score']:.2f}",
"Cross-Encoder": f"{result['cross_encoder_score']:.2f}",
"BM25": f"{result['bm25_score']:.2f}",
"Intent": f"{intent_text} ({result['intent_score']:.1f})",
"Top Skills": ", ".join(result['skills'][:5])
})
summary_df = pd.DataFrame(summary_data)
# Style the dataframe
def color_scores(val):
if isinstance(val, str) and any(char.isdigit() for char in val):
try:
# Extract numeric value
numeric_val = float(''.join(c for c in val if c.isdigit() or c == '.'))
if 'Final Score' in val or numeric_val >= 1.0:
if numeric_val >= 1.2:
return 'background-color: #d4edda'
elif numeric_val >= 1.0:
return 'background-color: #fff3cd'
else:
return 'background-color: #f8d7da'
else:
if numeric_val >= 0.7:
return 'background-color: #d4edda'
elif numeric_val >= 0.5:
return 'background-color: #fff3cd'
else:
return 'background-color: #f8d7da'
except:
pass
return ''
styled_df = summary_df.style.applymap(color_scores, subset=['Final Score', 'Cross-Encoder', 'BM25'])
st.dataframe(styled_df, use_container_width=True)
# Download link
detailed_data = []
for result in st.session_state.results:
intent_text = "Yes" if result['intent_score'] == 0.3 else "Maybe" if result['intent_score'] == 0.1 else "No"
detailed_data.append({
"Rank": result['rank'],
"Candidate": result['name'],
"Final_Score": result['final_score'],
"Cross_Encoder_Score": result['cross_encoder_score'],
"BM25_Score": result['bm25_score'],
"Intent_Score": result['intent_score'],
"Intent_Analysis": intent_text,
"Skills": "; ".join(result['skills']),
"Resume_Preview": result['text_preview']
})
download_df = pd.DataFrame(detailed_data)
st.markdown(create_download_link(download_df), unsafe_allow_html=True)
with tab2:
# Detailed results with new scoring breakdown
for result in st.session_state.results:
intent_text = "Yes" if result['intent_score'] == 0.3 else "Maybe" if result['intent_score'] == 0.1 else "No"
with st.expander(f"#{result['rank']}: {result['name']} (Final Score: {result['final_score']:.2f})"):
col1, col2 = st.columns([1, 2])
with col1:
st.metric("๐ Final Score", f"{result['final_score']:.2f}")
st.write("**๐ Score Breakdown:**")
st.metric("๐ฏ Cross-Encoder", f"{result['cross_encoder_score']:.2f}", help="Semantic relevance (0-1)")
st.metric("๐ค BM25 Keywords", f"{result['bm25_score']:.2f}", help="Keyword matching (0.1-0.2)")
st.metric("๐ค Intent Analysis", f"{intent_text} ({result['intent_score']:.1f})", help="Job seeking likelihood (0-0.3)")
st.write("**๐ฏ Matching Skills:**")
skills_per_column = 5
skill_cols = st.columns(2)
for idx, skill in enumerate(result['skills'][:10]):
with skill_cols[idx % 2]:
st.write(f"โข {skill}")
with col2:
st.write("**๐ Resume Preview:**")
st.text_area("", result['text_preview'], height=200, disabled=True, key=f"preview_{result['rank']}")
with tab3:
# Score visualization
if len(st.session_state.results) > 1:
# Bar chart
st.subheader("Score Comparison")
chart_data = pd.DataFrame({
'Candidate': [r['name'][:20] + '...' if len(r['name']) > 20 else r['name']
for r in st.session_state.results],
'Final Score': [r['final_score'] for r in st.session_state.results],
'Cross-Encoder': [r['cross_encoder_score'] for r in st.session_state.results],
'BM25': [r['bm25_score'] for r in st.session_state.results],
'Intent': [r['intent_score'] for r in st.session_state.results]
})
st.bar_chart(chart_data.set_index('Candidate'))
# Score distribution
col1, col2 = st.columns(2)
with col1:
st.subheader("Score Distribution")
score_ranges = {
'Excellent (โฅ1.2)': sum(1 for r in st.session_state.results if r['final_score'] >= 1.2),
'Good (1.0-1.2)': sum(1 for r in st.session_state.results if 1.0 <= r['final_score'] < 1.2),
'Fair (0.8-1.0)': sum(1 for r in st.session_state.results if 0.8 <= r['final_score'] < 1.0),
'Poor (<0.8)': sum(1 for r in st.session_state.results if r['final_score'] < 0.8),
}
dist_df = pd.DataFrame({
'Range': score_ranges.keys(),
'Count': score_ranges.values()
})
st.bar_chart(dist_df.set_index('Range'))
with col2:
st.subheader("Average Scores")
avg_final = np.mean([r['final_score'] for r in st.session_state.results])
avg_cross = np.mean([r['cross_encoder_score'] for r in st.session_state.results])
avg_bm25 = np.mean([r['bm25_score'] for r in st.session_state.results])
avg_intent = np.mean([r['intent_score'] for r in st.session_state.results])
st.metric("Average Final Score", f"{avg_final:.2f}")
st.metric("Average Cross-Encoder", f"{avg_cross:.2f}")
st.metric("Average BM25", f"{avg_bm25:.2f}")
st.metric("Average Intent", f"{avg_intent:.2f}")
# Memory cleanup
st.markdown("---")
st.subheader("๐งน Reset Application")
col1, col2, col3 = st.columns([1, 1, 3])
with col1:
if st.button("๐๏ธ Clear Resumes Only", type="secondary", help="Clear only the loaded resumes"):
st.session_state.resume_texts = []
st.session_state.file_names = []
st.session_state.results = []
st.session_state.current_job_description = ""
st.success("โ
Resumes cleared!")
st.rerun()
with col2:
if st.button("๐งน Clear Everything", type="primary", help="Clear all data and free memory"):
st.session_state.resume_texts = []
st.session_state.file_names = []
st.session_state.results = []
st.session_state.current_job_description = ""
if torch.cuda.is_available():
torch.cuda.empty_cache()
gc.collect()
st.success("โ
Everything cleared!")
st.rerun()
# Footer
st.markdown("---")
st.markdown(
"""
๐ Powered by BAAI/bge-large-en-v1.5 & Qwen3-1.7B | Built with Streamlit
""",
unsafe_allow_html=True
)