Spaces:

jacob-c
/

Resume_Screener_and_Skill_Extractor

Paused

App Files Files Community

root commited on May 27

Commit

19f7d68

1 Parent(s): da61f37

ss

Browse files

Files changed (1) hide show

app.py +60 -206

app.py CHANGED Viewed

@@ -59,10 +59,7 @@ with st.sidebar:
     # LLM Settings
     st.subheader("LLM Settings")
-    use_llm_explanations = st.checkbox("Generate AI Explanations", value=True)
-    if use_llm_explanations:
-        hf_token = st.text_input("Hugging Face Token (optional)", type="password",
-                                help="Enter your HF token for better rate limits")
     st.markdown("---")
     st.markdown("### 🤖 Advanced Pipeline")
@@ -74,7 +71,6 @@ with st.sidebar:
     st.markdown("### 📊 Models Used")
     st.markdown("- **Embedding**: BAAI/bge-large-en-v1.5")
     st.markdown("- **Cross-Encoder**: ms-marco-MiniLM-L6-v2")
-    st.markdown("- **LLM Explanations**: Qwen/Qwen3-14B")
     st.markdown("- **Intent Analysis**: Qwen/Qwen3-1.7B")
     st.markdown("### 📈 Scoring Formula")
     st.markdown("**Final Score = Cross-Encoder (0-1) + BM25 (0.1-0.2) + Intent (0-0.3)**")
@@ -90,24 +86,10 @@ if 'resume_texts' not in st.session_state:
     st.session_state.resume_texts = []
 if 'file_names' not in st.session_state:
     st.session_state.file_names = []
-if 'explanations_generated' not in st.session_state:
-    st.session_state.explanations_generated = False
 if 'current_job_description' not in st.session_state:
     st.session_state.current_job_description = ""
-# Load Qwen models with error handling
-try:
-    if 'qwen3_tokenizer' not in st.session_state:
-        st.session_state.qwen3_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-14B")
-    if 'qwen3_model' not in st.session_state:
-        st.session_state.qwen3_model = AutoModelForCausalLM.from_pretrained(
-            "Qwen/Qwen3-14B",
-            torch_dtype="auto",
-            device_map="auto"
-        )
-except Exception as e:
-    st.warning(f"⚠️ Could not load Qwen3-14B: {str(e)}")
-    st.session_state.qwen3_tokenizer = None
-    st.session_state.qwen3_model = None
 # Separate smaller model for intent analysis
 try:
@@ -566,76 +548,7 @@ Reason: [Brief justification]"""
         return explanation
-    def generate_llm_explanation(self, resume_text, job_description, score, skills, max_retries=3):
-        """Generate detailed explanation using Qwen3-14B"""
-        if not st.session_state.qwen3_model:
-            return self.generate_simple_explanation(score, score, score, skills)
-        # Truncate texts to manage token limits
-        resume_snippet = resume_text[:2000] if len(resume_text) > 2000 else resume_text
-        job_snippet = job_description[:1000] if len(job_description) > 1000 else job_description
-        prompt = f"""You are an expert HR analyst. Analyze this individual candidate's resume against the job requirements and write EXACTLY 150 words explaining why this specific candidate is suitable for the position.
-Structure your 150-word analysis as follows:
-1. Experience alignment (40-50 words)
-2. Key strengths and skills match (40-50 words)
-3. Unique value proposition (40-50 words)
-4. Overall recommendation (10-20 words)
-Job Requirements:
-{job_snippet}
-Candidate's Resume:
-{resume_snippet}
-Identified Matching Skills: {', '.join(skills[:10])}
-Compatibility Score: {score:.1%}
-Write a professional, detailed 150-word analysis for THIS INDIVIDUAL CANDIDATE:"""
-        for attempt in range(max_retries):
-            try:
-                response = generate_qwen3_response(
-                    prompt,
-                    st.session_state.qwen3_tokenizer,
-                    st.session_state.qwen3_model,
-                    max_new_tokens=200
-                )
-                # Extract the response and ensure it's about 150 words
-                explanation = response.strip()
-                word_count = len(explanation.split())
-                # If response is close to 150 words (130-170), accept it
-                if 130 <= word_count <= 170:
-                    return explanation
-                # If response is too short or too long, try again with adjusted prompt
-                if word_count < 130:
-                    # Response too short, try again
-                    continue
-                elif word_count > 170:
-                    # Response too long, truncate to approximately 150 words
-                    words = explanation.split()
-                    truncated = ' '.join(words[:150])
-                    # Add proper ending if truncated
-                    if not truncated.endswith('.'):
-                        truncated += '.'
-                    return truncated
-                return explanation
-            except Exception as e:
-                if attempt < max_retries - 1:
-                    time.sleep(2)  # Wait before retry
-                    continue
-                else:
-                    # Fallback to simple explanation
-                    return self.generate_simple_explanation(score, score, score, skills)
-        # If all retries failed, use simple explanation
-        return self.generate_simple_explanation(score, score, score, skills)
 def create_download_link(df, filename="resume_screening_results.csv"):
     """Create download link for results"""
@@ -672,7 +585,6 @@ if st.session_state.resume_texts:
             st.session_state.resume_texts = []
             st.session_state.file_names = []
             st.session_state.results = []
-            st.session_state.explanations_generated = False
             st.session_state.current_job_description = ""
             st.rerun()
@@ -835,120 +747,64 @@ elif input_method == "🔗 Load from Hugging Face Dataset":
 # Processing and Results
 st.header("🔍 Step 3: Analyze Resumes")
-# First button: Find top K candidates (fast ranking)
-col1, col2 = st.columns([1, 1])
-with col1:
-    if st.button("🚀 Advanced Pipeline Analysis",
-                 disabled=not (job_description and st.session_state.resume_texts),
-                 type="primary",
-                 help="Run the complete 5-stage advanced pipeline"):
-        if len(st.session_state.resume_texts) == 0:
-            st.error("❌ Please upload resumes first!")
-        elif not job_description.strip():
-            st.error("❌ Please enter a job description!")
-        else:
-            with st.spinner("🚀 Running Advanced Pipeline Analysis..."):
-                try:
-                    # Run the advanced pipeline
-                    pipeline_results = screener.advanced_pipeline_ranking(
-                        st.session_state.resume_texts, job_description, final_top_k=top_k
-                    )
-                    # Prepare results for display
-                    results = []
-                    for rank, result_data in enumerate(pipeline_results, 1):
-                        idx = result_data['index']
-                        name = st.session_state.file_names[idx]
-                        text = st.session_state.resume_texts[idx]
-                        # Extract skills
-                        skills = screener.extract_skills(text, job_description)
-                        results.append({
-                            'rank': rank,
-                            'name': name,
-                            'final_score': result_data['final_score'],
-                            'cross_encoder_score': result_data['cross_encoder_score'],
-                            'bm25_score': result_data['bm25_score'],
-                            'intent_score': result_data['intent_score'],
-                            'skills': skills,
-                            'text': text,
-                            'text_preview': text[:500] + "..." if len(text) > 500 else text,
-                            'explanation': None  # No detailed explanation yet
-                        })
-                    # Add simple explanations for now
-                    for result in results:
-                        result['explanation'] = screener.generate_simple_explanation(
-                            result['final_score'],
-                            result['cross_encoder_score'],
-                            result['bm25_score'],
-                            result['skills']
-                        )
-                    # Store in session state
-                    st.session_state.results = results
-                    st.session_state.explanations_generated = False
-                    st.session_state.current_job_description = job_description
-                    st.success(f"🚀 Advanced pipeline complete! Found top {len(st.session_state.results)} candidates.")
-                except Exception as e:
-                    st.error(f"❌ Error during analysis: {str(e)}")
-# Second button: Generate AI explanations (slower, optional)
-with col2:
-    # Show this button only if we have results and LLM is enabled
-    show_explanation_button = (
-        st.session_state.results and
-        use_llm_explanations and
-        st.session_state.qwen3_model and
-        not st.session_state.explanations_generated
-    )
-    if show_explanation_button:
-        if st.button("🤖 Generate AI Explanations",
-                     type="secondary",
-                     help="Generate detailed 150-word explanations using Qwen3-14B (takes longer)"):
-            with st.spinner("🤖 Generating detailed AI explanations..."):
-                try:
-                    explanation_progress = st.progress(0)
-                    explanation_text = st.empty()
-                    for i, result in enumerate(st.session_state.results):
-                        explanation_text.text(f"🤖 Generating AI explanation for candidate {i+1}/{len(st.session_state.results)}...")
-                        llm_explanation = screener.generate_llm_explanation(
-                            result['text'],
-                            st.session_state.current_job_description,
-                            result['final_score'],
-                            result['skills']
-                        )
-                        result['explanation'] = llm_explanation
-                        explanation_progress.progress((i + 1) / len(st.session_state.results))
-                    explanation_progress.empty()
-                    explanation_text.empty()
-                    # Mark explanations as generated
-                    st.session_state.explanations_generated = True
-                    st.success(f"🤖 AI explanations generated for all {len(st.session_state.results)} candidates!")
-                except Exception as e:
-                    st.error(f"❌ Error generating explanations: {str(e)}")
-    elif st.session_state.results and st.session_state.explanations_generated:
-        st.info("✅ AI explanations already generated!")
-    elif st.session_state.results and not use_llm_explanations:
-        st.info("💡 Enable 'Generate AI Explanations' in sidebar to use this feature")
-    elif st.session_state.results and not st.session_state.qwen3_model:
-        st.warning("⚠️ LLM model not available. Check your Hugging Face token.")
 # Display Results
 if st.session_state.results:
@@ -1110,7 +966,6 @@ with col1:
         st.session_state.resume_texts = []
         st.session_state.file_names = []
         st.session_state.results = []
-        st.session_state.explanations_generated = False
         st.session_state.current_job_description = ""
         st.success("✅ Resumes cleared!")
         st.rerun()
@@ -1120,7 +975,6 @@ with col2:
         st.session_state.resume_texts = []
         st.session_state.file_names = []
         st.session_state.results = []
-        st.session_state.explanations_generated = False
         st.session_state.current_job_description = ""
         if torch.cuda.is_available():
@@ -1134,7 +988,7 @@ st.markdown("---")
 st.markdown(
     """
     <div style='text-align: center; color: #666;'>
-        🚀 Powered by BAAI/bge-large-en-v1.5 & Qwen3-14B | Built with Streamlit
     </div>
     """,
     unsafe_allow_html=True

     # LLM Settings
     st.subheader("LLM Settings")
+    st.info("💡 Intent analysis using Qwen3-1.7B is always enabled")
     st.markdown("---")
     st.markdown("### 🤖 Advanced Pipeline")
     st.markdown("### 📊 Models Used")
     st.markdown("- **Embedding**: BAAI/bge-large-en-v1.5")
     st.markdown("- **Cross-Encoder**: ms-marco-MiniLM-L6-v2")
     st.markdown("- **Intent Analysis**: Qwen/Qwen3-1.7B")
     st.markdown("### 📈 Scoring Formula")
     st.markdown("**Final Score = Cross-Encoder (0-1) + BM25 (0.1-0.2) + Intent (0-0.3)**")
     st.session_state.resume_texts = []
 if 'file_names' not in st.session_state:
     st.session_state.file_names = []
 if 'current_job_description' not in st.session_state:
     st.session_state.current_job_description = ""
+# No need for Qwen3-14B model since we're not generating explanations
 # Separate smaller model for intent analysis
 try:
         return explanation
 def create_download_link(df, filename="resume_screening_results.csv"):
     """Create download link for results"""
             st.session_state.resume_texts = []
             st.session_state.file_names = []
             st.session_state.results = []
             st.session_state.current_job_description = ""
             st.rerun()
 # Processing and Results
 st.header("🔍 Step 3: Analyze Resumes")
+# Run Advanced Pipeline Analysis
+if st.button("🚀 Advanced Pipeline Analysis",
+             disabled=not (job_description and st.session_state.resume_texts),
+             type="primary",
+             help="Run the complete 5-stage advanced pipeline"):
+    if len(st.session_state.resume_texts) == 0:
+        st.error("❌ Please upload resumes first!")
+    elif not job_description.strip():
+        st.error("❌ Please enter a job description!")
+    else:
+        with st.spinner("🚀 Running Advanced Pipeline Analysis..."):
+            try:
+                # Run the advanced pipeline
+                pipeline_results = screener.advanced_pipeline_ranking(
+                    st.session_state.resume_texts, job_description, final_top_k=top_k
+                )
+                # Prepare results for display
+                results = []
+                for rank, result_data in enumerate(pipeline_results, 1):
+                    idx = result_data['index']
+                    name = st.session_state.file_names[idx]
+                    text = st.session_state.resume_texts[idx]
+                    # Extract skills
+                    skills = screener.extract_skills(text, job_description)
+                    results.append({
+                        'rank': rank,
+                        'name': name,
+                        'final_score': result_data['final_score'],
+                        'cross_encoder_score': result_data['cross_encoder_score'],
+                        'bm25_score': result_data['bm25_score'],
+                        'intent_score': result_data['intent_score'],
+                        'skills': skills,
+                        'text': text,
+                        'text_preview': text[:500] + "..." if len(text) > 500 else text,
+                        'explanation': None  # Will be filled with simple explanation
+                    })
+                # Add simple explanations
+                for result in results:
+                    result['explanation'] = screener.generate_simple_explanation(
+                        result['final_score'],
+                        result['cross_encoder_score'],
+                        result['bm25_score'],
+                        result['skills']
+                    )
+                # Store in session state
+                st.session_state.results = results
+                st.session_state.current_job_description = job_description
+                st.success(f"🚀 Advanced pipeline complete! Found top {len(st.session_state.results)} candidates.")
+            except Exception as e:
+                st.error(f"❌ Error during analysis: {str(e)}")
 # Display Results
 if st.session_state.results:
         st.session_state.resume_texts = []
         st.session_state.file_names = []
         st.session_state.results = []
         st.session_state.current_job_description = ""
         st.success("✅ Resumes cleared!")
         st.rerun()
         st.session_state.resume_texts = []
         st.session_state.file_names = []
         st.session_state.results = []
         st.session_state.current_job_description = ""
         if torch.cuda.is_available():
 st.markdown(
     """
     <div style='text-align: center; color: #666;'>
+        🚀 Powered by BAAI/bge-large-en-v1.5 & Qwen3-1.7B | Built with Streamlit
     </div>
     """,
     unsafe_allow_html=True