Spaces:

jacob-c
/

Resume_Screener_and_Skill_Extractor

Paused

App Files Files Community

root commited on May 27

Commit

d5266d0

1 Parent(s): 3adcf09

ss

Browse files

Files changed (2) hide show

app.py +72 -103
requirements.txt +6 -6

app.py CHANGED Viewed

@@ -99,7 +99,7 @@ if st.session_state.cross_encoder is None and st.session_state.cross_encoder_err
             print(f"❌ [Global Init] {error_msg}")
             st.session_state.cross_encoder_error = error_msg
-# Load Qwen3-4B Tokenizer
 if st.session_state.qwen3_4b_tokenizer is None and st.session_state.qwen3_4b_tokenizer_error is None:
     print("[Global Init] Loading Qwen3-4B Tokenizer...")
     try:
@@ -110,43 +110,28 @@ if st.session_state.qwen3_4b_tokenizer is None and st.session_state.qwen3_4b_tok
         print(f"❌ [Global Init] {error_msg}")
         st.session_state.qwen3_4b_tokenizer_error = error_msg
-# Load Qwen3-4B Model with 4-bit quantization
 if st.session_state.qwen3_4b_model is None and st.session_state.qwen3_4b_model_error is None:
     print("[Global Init] Loading Qwen3-4B Model with 4-bit quantization...")
     try:
-        # Configure 4-bit quantization for better performance and memory efficiency
         quantization_config = BitsAndBytesConfig(
             load_in_4bit=True,
             bnb_4bit_quant_type="nf4",
             bnb_4bit_compute_dtype=torch.float16,
             bnb_4bit_use_double_quant=True
         )
         st.session_state.qwen3_4b_model = AutoModelForCausalLM.from_pretrained(
-            "Qwen/Qwen3-4B",
             quantization_config=quantization_config,
             device_map="auto",
             trust_remote_code=True,
             torch_dtype=torch.float16,
             use_cache=True
         )
-        print("[Global Init] Qwen3-4B Model Loaded with 4-bit quantization and device_map='auto'.")
-    except Exception as e_quant:
-        error_str = str(e_quant) if e_quant else ""
-        print(f"⚠️ [Global Init] Failed to load Qwen3-4B with 4-bit quantization: {error_str}")
-        print("[Global Init] Retrying Qwen3-4B load without quantization...")
-        try:
-            st.session_state.qwen3_4b_model = AutoModelForCausalLM.from_pretrained(
-                "Qwen/Qwen3-4B",
-                torch_dtype="auto",
-                device_map="auto",
-                trust_remote_code=True
-            )
-            print("[Global Init] Qwen3-4B Model Loaded without quantization.")
-        except Exception as e_fallback:
-            error_msg = f"Failed to load Qwen3-4B Model (fallback): {str(e_fallback) if e_fallback else 'Unknown error'}"
-            print(f"❌ [Global Init] {error_msg}")
-            st.session_state.qwen3_4b_model_error = error_msg
 # --- End of Global Model Loading Section ---
@@ -193,7 +178,7 @@ class ResumeScreener: # Ensure this class definition is BEFORE it's instantiated
     #      calculate_bm25_scores, advanced_pipeline_ranking, faiss_recall, cross_encoder_rerank,
     #      add_bm25_scores, add_intent_scores, analyze_intent, calculate_final_scores, extract_skills)
     # Make sure all methods are correctly indented within the class
     def extract_text_from_file(self, file_path, file_type):
         # ... (implementation)
         try:
@@ -223,7 +208,7 @@ class ResumeScreener: # Ensure this class definition is BEFORE it's instantiated
         except Exception as e:
             st.error(f"Error extracting text from {file_path}: {str(e)}")
             return ""
     def get_embedding(self, text):
         if self.embedding_model is None:
             st.error("Embedding model is not available!")
@@ -237,7 +222,7 @@ class ResumeScreener: # Ensure this class definition is BEFORE it's instantiated
         except Exception as e:
             st.error(f"Error generating embedding: {str(e)}")
             return np.zeros(1024)
     def calculate_bm25_scores(self, resume_texts, job_description):
         try:
             job_tokens = word_tokenize(job_description.lower())
@@ -250,7 +235,7 @@ class ResumeScreener: # Ensure this class definition is BEFORE it's instantiated
         except Exception as e:
             st.error(f"Error calculating BM25 scores: {str(e)}")
             return [0.0] * len(resume_texts)
     def advanced_pipeline_ranking(self, resume_texts, job_description):
         print("[Pipeline] Advanced Pipeline Ranking started.")
         if not resume_texts:
@@ -416,7 +401,7 @@ Respond with exactly one of these formats:
                 intent_score = 0.0
             elif 'intent: maybe' in response_lower or 'intent:maybe' in response_lower:
                 intent_score = 0.1
             return intent_score
         except Exception as e:
             st.warning(f"Error analyzing intent with Qwen3-4B: {str(e)}")
@@ -457,35 +442,19 @@ def create_download_link(df, filename="resume_screening_results.csv"):
     return f'<a href="data:file/csv;base64,{b64}" download="{filename}" class="download-btn">📥 Download Results CSV</a>'
 # --- Sidebar Configuration (Must be after global model loading and class defs if it uses them) ---
-with st.sidebar:
-    st.title("⚙️ Configuration")
-    # Advanced options
-    st.subheader("Advanced Options")
-    # Ensure top_k is in session_state if it's used by advanced_pipeline_ranking before button press
-    if 'top_k' not in st.session_state:
-        st.session_state.top_k = 5 # Default value
-    st.session_state.top_k = st.selectbox("Number of results to display", [1,2,3,4,5], index=st.session_state.top_k-1, key="top_k_selector")
-    # LLM Settings
-    st.subheader("LLM Settings")
-    # use_llm_explanations = st.checkbox("Generate AI Explanations", value=True) # This was removed earlier
-    # if use_llm_explanations:
-    #     hf_token = st.text_input("Hugging Face Token (optional)", type="password",
-    #                             help="Enter your HF token for better rate limits")
-    st.markdown("---")
-    st.markdown("### 🤖 Advanced Pipeline")
-    st.markdown("- **Stage 1**: FAISS Recall (Top 50)")
-    st.markdown("- **Stage 2**: Cross-Encoder Re-ranking (Top 20)")
-    st.markdown("- **Stage 3**: BM25 Keyword Matching")
-    st.markdown("- **Stage 4**: LLM Intent Analysis (Qwen3-4B)")
-    st.markdown("- **Final**: Combined Scoring") # Updated this line
-    st.markdown("### 📊 Models Used")
-    st.markdown("- **Embedding**: BAAI/bge-large-en-v1.5")
-    st.markdown("- **Cross-Encoder**: ms-marco-MiniLM-L6-v2")
-    st.markdown("- **LLM**: Qwen/Qwen3-4B (4-bit quantized)")
-    st.markdown("### 📈 Scoring Formula")
-    st.markdown("**Final Score = Cross-Encoder (0-1) + BM25 (0.1-0.2) + Intent (0-0.3)**")
 # --- Main App Interface (Must be after global model loading and class defs) ---
 st.title("🎯 AI-Powered Resume Screener")
@@ -722,52 +691,52 @@ with col1:
                  type="primary",
                  help="Run the complete 5-stage advanced pipeline"):
         print("--- Advanced Pipeline Analysis Button Clicked ---")
-        if len(st.session_state.resume_texts) == 0:
-            st.error("❌ Please upload resumes first!")
-        elif not job_description.strip():
-            st.error("❌ Please enter a job description!")
-        else:
-            print("[UI Button] Pre-checks passed. Starting spinner and pipeline.")
-            with st.spinner("🚀 Running Advanced Pipeline Analysis..."):
-                st.text("Pipeline Initiated: Starting advanced analysis...")
-                try:
-                    # Run the advanced pipeline
-                    pipeline_results = screener.advanced_pipeline_ranking(
-                        st.session_state.resume_texts, job_description
-                    )
-                    # Prepare results for display
-                    results = []
-                    for rank, result_data in enumerate(pipeline_results, 1):
-                        idx = result_data['index']
-                        name = st.session_state.file_names[idx]
-                        text = st.session_state.resume_texts[idx]
-                        # Extract skills
-                        skills = screener.extract_skills(text, job_description)
-                        results.append({
-                            'rank': rank,
-                            'name': name,
-                            'final_score': result_data['final_score'],
-                            'cross_encoder_score': result_data['cross_encoder_score'],
-                            'bm25_score': result_data['bm25_score'],
-                            'intent_score': result_data['intent_score'],
-                            'skills': skills,
-                            'text': text,
-                            'text_preview': text[:500] + "..." if len(text) > 500 else text
-                        })
-                    # Store in session state
-                    st.session_state.results = results
-                    st.session_state.current_job_description = job_description
-                    st.success(f"🚀 Advanced pipeline complete! Found top {len(st.session_state.results)} candidates.")
-                    st.text("Displaying Top Candidates...")
-                except Exception as e:
-                    st.error(f"❌ Error during analysis: {str(e)}")
 # Display Results
 if st.session_state.results:
@@ -793,7 +762,7 @@ if st.session_state.results:
                 "Top Skills": ", ".join(result['skills'][:5])
             })
-        summary_df = pd.DataFrame(summary_data)
         # Style the dataframe
         def color_scores(val):

             print(f"❌ [Global Init] {error_msg}")
             st.session_state.cross_encoder_error = error_msg
+# Load Qwen3-4B Tokenizer and Model with 4-bit quantization (QwQ-32B style)
 if st.session_state.qwen3_4b_tokenizer is None and st.session_state.qwen3_4b_tokenizer_error is None:
     print("[Global Init] Loading Qwen3-4B Tokenizer...")
     try:
         print(f"❌ [Global Init] {error_msg}")
         st.session_state.qwen3_4b_tokenizer_error = error_msg
 if st.session_state.qwen3_4b_model is None and st.session_state.qwen3_4b_model_error is None:
     print("[Global Init] Loading Qwen3-4B Model with 4-bit quantization...")
     try:
         quantization_config = BitsAndBytesConfig(
             load_in_4bit=True,
             bnb_4bit_quant_type="nf4",
             bnb_4bit_compute_dtype=torch.float16,
             bnb_4bit_use_double_quant=True
         )
         st.session_state.qwen3_4b_model = AutoModelForCausalLM.from_pretrained(
+            "Qwen/Qwen3-4B",
             quantization_config=quantization_config,
             device_map="auto",
             trust_remote_code=True,
             torch_dtype=torch.float16,
             use_cache=True
         )
+        print("[Global Init] Qwen3-4B Model Loaded with 4-bit quantization.")
+    except Exception as e:
+        error_msg = f"Failed to load Qwen3-4B Model: {str(e) if e else 'Unknown error'}"
+        print(f"❌ [Global Init] {error_msg}")
+        st.session_state.qwen3_4b_model_error = error_msg
 # --- End of Global Model Loading Section ---
     #      calculate_bm25_scores, advanced_pipeline_ranking, faiss_recall, cross_encoder_rerank,
     #      add_bm25_scores, add_intent_scores, analyze_intent, calculate_final_scores, extract_skills)
     # Make sure all methods are correctly indented within the class
     def extract_text_from_file(self, file_path, file_type):
         # ... (implementation)
         try:
         except Exception as e:
             st.error(f"Error extracting text from {file_path}: {str(e)}")
             return ""
     def get_embedding(self, text):
         if self.embedding_model is None:
             st.error("Embedding model is not available!")
         except Exception as e:
             st.error(f"Error generating embedding: {str(e)}")
             return np.zeros(1024)
     def calculate_bm25_scores(self, resume_texts, job_description):
         try:
             job_tokens = word_tokenize(job_description.lower())
         except Exception as e:
             st.error(f"Error calculating BM25 scores: {str(e)}")
             return [0.0] * len(resume_texts)
     def advanced_pipeline_ranking(self, resume_texts, job_description):
         print("[Pipeline] Advanced Pipeline Ranking started.")
         if not resume_texts:
                 intent_score = 0.0
             elif 'intent: maybe' in response_lower or 'intent:maybe' in response_lower:
                 intent_score = 0.1
             return intent_score
         except Exception as e:
             st.warning(f"Error analyzing intent with Qwen3-4B: {str(e)}")
     return f'<a href="data:file/csv;base64,{b64}" download="{filename}" class="download-btn">📥 Download Results CSV</a>'
 # --- Sidebar Configuration (Must be after global model loading and class defs if it uses them) ---
+st.markdown("---")
+st.markdown("### 🤖 Advanced Pipeline")
+st.markdown("- **Stage 1**: FAISS Recall (Top 50)")
+st.markdown("- **Stage 2**: Cross-Encoder Re-ranking (Top 20)")
+st.markdown("- **Stage 3**: BM25 Keyword Matching")
+st.markdown("- **Stage 4**: LLM Intent Analysis (Qwen3-4B)")
+st.markdown("- **Final**: Combined Scoring") # Updated this line
+st.markdown("### 📊 Models Used")
+st.markdown("- **Embedding**: BAAI/bge-large-en-v1.5")
+st.markdown("- **Cross-Encoder**: ms-marco-MiniLM-L6-v2")
+st.markdown("- **LLM**: Qwen/Qwen3-4B (4-bit quantized)")
+st.markdown("### 📈 Scoring Formula")
+st.markdown("**Final Score = Cross-Encoder (0-1) + BM25 (0.1-0.2) + Intent (0-0.3)**")
 # --- Main App Interface (Must be after global model loading and class defs) ---
 st.title("🎯 AI-Powered Resume Screener")
                  type="primary",
                  help="Run the complete 5-stage advanced pipeline"):
         print("--- Advanced Pipeline Analysis Button Clicked ---")
+    if len(st.session_state.resume_texts) == 0:
+        st.error("❌ Please upload resumes first!")
+    elif not job_description.strip():
+        st.error("❌ Please enter a job description!")
+    else:
+        print("[UI Button] Pre-checks passed. Starting spinner and pipeline.")
+        with st.spinner("🚀 Running Advanced Pipeline Analysis..."):
+            st.text("Pipeline Initiated: Starting advanced analysis...")
+        try:
+            # Run the advanced pipeline
+            pipeline_results = screener.advanced_pipeline_ranking(
+                st.session_state.resume_texts, job_description
+            )
+            # Prepare results for display
+            results = []
+            for rank, result_data in enumerate(pipeline_results, 1):
+                idx = result_data['index']
+                name = st.session_state.file_names[idx]
+                text = st.session_state.resume_texts[idx]
+                # Extract skills
+                skills = screener.extract_skills(text, job_description)
+                results.append({
+                    'rank': rank,
+                    'name': name,
+                    'final_score': result_data['final_score'],
+                    'cross_encoder_score': result_data['cross_encoder_score'],
+                    'bm25_score': result_data['bm25_score'],
+                    'intent_score': result_data['intent_score'],
+                    'skills': skills,
+                    'text': text,
+                    'text_preview': text[:500] + "..." if len(text) > 500 else text
+                })
+            # Store in session state
+            st.session_state.results = results
+            st.session_state.current_job_description = job_description
+            st.success(f"🚀 Advanced pipeline complete! Found top {len(st.session_state.results)} candidates.")
+            st.text("Displaying Top Candidates...")
+        except Exception as e:
+            st.error(f"❌ Error during analysis: {str(e)}")
 # Display Results
 if st.session_state.results:
                 "Top Skills": ", ".join(result['skills'][:5])
             })
+                        summary_df = pd.DataFrame(summary_data)
         # Style the dataframe
         def color_scores(val):

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 streamlit==1.31.0
-transformers>=4.51.0
-torch==2.3.0
 pdfplumber==0.10.1
 PyPDF2==3.0.1
 python-docx==1.0.1
@@ -10,9 +10,9 @@ rank-bm25==0.2.2
 pandas==2.1.3
 numpy==1.24.3
 tqdm==4.66.1
-huggingface-hub==0.30.0
-bitsandbytes==0.44.1
-accelerate==0.27.2
 datasets==2.18.0
 sentence-transformers==2.7.0
-einops

 streamlit==1.31.0
+transformers>=4.55.0
+torch>=2.3.0
 pdfplumber==0.10.1
 PyPDF2==3.0.1
 python-docx==1.0.1
 pandas==2.1.3
 numpy==1.24.3
 tqdm==4.66.1
+huggingface-hub>=0.27.0
+bitsandbytes>=0.44.1
+accelerate>=0.27.2
 datasets==2.18.0
 sentence-transformers==2.7.0
+einops