root commited on
Commit
d5266d0
Β·
1 Parent(s): 3adcf09
Files changed (2) hide show
  1. app.py +72 -103
  2. requirements.txt +6 -6
app.py CHANGED
@@ -99,7 +99,7 @@ if st.session_state.cross_encoder is None and st.session_state.cross_encoder_err
99
  print(f"❌ [Global Init] {error_msg}")
100
  st.session_state.cross_encoder_error = error_msg
101
 
102
- # Load Qwen3-4B Tokenizer
103
  if st.session_state.qwen3_4b_tokenizer is None and st.session_state.qwen3_4b_tokenizer_error is None:
104
  print("[Global Init] Loading Qwen3-4B Tokenizer...")
105
  try:
@@ -110,43 +110,28 @@ if st.session_state.qwen3_4b_tokenizer is None and st.session_state.qwen3_4b_tok
110
  print(f"❌ [Global Init] {error_msg}")
111
  st.session_state.qwen3_4b_tokenizer_error = error_msg
112
 
113
- # Load Qwen3-4B Model with 4-bit quantization
114
  if st.session_state.qwen3_4b_model is None and st.session_state.qwen3_4b_model_error is None:
115
  print("[Global Init] Loading Qwen3-4B Model with 4-bit quantization...")
116
  try:
117
- # Configure 4-bit quantization for better performance and memory efficiency
118
  quantization_config = BitsAndBytesConfig(
119
  load_in_4bit=True,
120
  bnb_4bit_quant_type="nf4",
121
  bnb_4bit_compute_dtype=torch.float16,
122
  bnb_4bit_use_double_quant=True
123
  )
124
-
125
  st.session_state.qwen3_4b_model = AutoModelForCausalLM.from_pretrained(
126
- "Qwen/Qwen3-4B",
127
  quantization_config=quantization_config,
128
  device_map="auto",
129
  trust_remote_code=True,
130
  torch_dtype=torch.float16,
131
  use_cache=True
132
  )
133
- print("[Global Init] Qwen3-4B Model Loaded with 4-bit quantization and device_map='auto'.")
134
- except Exception as e_quant:
135
- error_str = str(e_quant) if e_quant else ""
136
- print(f"⚠️ [Global Init] Failed to load Qwen3-4B with 4-bit quantization: {error_str}")
137
- print("[Global Init] Retrying Qwen3-4B load without quantization...")
138
- try:
139
- st.session_state.qwen3_4b_model = AutoModelForCausalLM.from_pretrained(
140
- "Qwen/Qwen3-4B",
141
- torch_dtype="auto",
142
- device_map="auto",
143
- trust_remote_code=True
144
- )
145
- print("[Global Init] Qwen3-4B Model Loaded without quantization.")
146
- except Exception as e_fallback:
147
- error_msg = f"Failed to load Qwen3-4B Model (fallback): {str(e_fallback) if e_fallback else 'Unknown error'}"
148
- print(f"❌ [Global Init] {error_msg}")
149
- st.session_state.qwen3_4b_model_error = error_msg
150
 
151
  # --- End of Global Model Loading Section ---
152
 
@@ -193,7 +178,7 @@ class ResumeScreener: # Ensure this class definition is BEFORE it's instantiated
193
  # calculate_bm25_scores, advanced_pipeline_ranking, faiss_recall, cross_encoder_rerank,
194
  # add_bm25_scores, add_intent_scores, analyze_intent, calculate_final_scores, extract_skills)
195
  # Make sure all methods are correctly indented within the class
196
-
197
  def extract_text_from_file(self, file_path, file_type):
198
  # ... (implementation)
199
  try:
@@ -223,7 +208,7 @@ class ResumeScreener: # Ensure this class definition is BEFORE it's instantiated
223
  except Exception as e:
224
  st.error(f"Error extracting text from {file_path}: {str(e)}")
225
  return ""
226
-
227
  def get_embedding(self, text):
228
  if self.embedding_model is None:
229
  st.error("Embedding model is not available!")
@@ -237,7 +222,7 @@ class ResumeScreener: # Ensure this class definition is BEFORE it's instantiated
237
  except Exception as e:
238
  st.error(f"Error generating embedding: {str(e)}")
239
  return np.zeros(1024)
240
-
241
  def calculate_bm25_scores(self, resume_texts, job_description):
242
  try:
243
  job_tokens = word_tokenize(job_description.lower())
@@ -250,7 +235,7 @@ class ResumeScreener: # Ensure this class definition is BEFORE it's instantiated
250
  except Exception as e:
251
  st.error(f"Error calculating BM25 scores: {str(e)}")
252
  return [0.0] * len(resume_texts)
253
-
254
  def advanced_pipeline_ranking(self, resume_texts, job_description):
255
  print("[Pipeline] Advanced Pipeline Ranking started.")
256
  if not resume_texts:
@@ -416,7 +401,7 @@ Respond with exactly one of these formats:
416
  intent_score = 0.0
417
  elif 'intent: maybe' in response_lower or 'intent:maybe' in response_lower:
418
  intent_score = 0.1
419
-
420
  return intent_score
421
  except Exception as e:
422
  st.warning(f"Error analyzing intent with Qwen3-4B: {str(e)}")
@@ -457,35 +442,19 @@ def create_download_link(df, filename="resume_screening_results.csv"):
457
  return f'<a href="data:file/csv;base64,{b64}" download="{filename}" class="download-btn">πŸ“₯ Download Results CSV</a>'
458
 
459
  # --- Sidebar Configuration (Must be after global model loading and class defs if it uses them) ---
460
- with st.sidebar:
461
- st.title("βš™οΈ Configuration")
462
- # Advanced options
463
- st.subheader("Advanced Options")
464
- # Ensure top_k is in session_state if it's used by advanced_pipeline_ranking before button press
465
- if 'top_k' not in st.session_state:
466
- st.session_state.top_k = 5 # Default value
467
- st.session_state.top_k = st.selectbox("Number of results to display", [1,2,3,4,5], index=st.session_state.top_k-1, key="top_k_selector")
468
-
469
- # LLM Settings
470
- st.subheader("LLM Settings")
471
- # use_llm_explanations = st.checkbox("Generate AI Explanations", value=True) # This was removed earlier
472
- # if use_llm_explanations:
473
- # hf_token = st.text_input("Hugging Face Token (optional)", type="password",
474
- # help="Enter your HF token for better rate limits")
475
-
476
- st.markdown("---")
477
- st.markdown("### πŸ€– Advanced Pipeline")
478
- st.markdown("- **Stage 1**: FAISS Recall (Top 50)")
479
- st.markdown("- **Stage 2**: Cross-Encoder Re-ranking (Top 20)")
480
- st.markdown("- **Stage 3**: BM25 Keyword Matching")
481
- st.markdown("- **Stage 4**: LLM Intent Analysis (Qwen3-4B)")
482
- st.markdown("- **Final**: Combined Scoring") # Updated this line
483
- st.markdown("### πŸ“Š Models Used")
484
- st.markdown("- **Embedding**: BAAI/bge-large-en-v1.5")
485
- st.markdown("- **Cross-Encoder**: ms-marco-MiniLM-L6-v2")
486
- st.markdown("- **LLM**: Qwen/Qwen3-4B (4-bit quantized)")
487
- st.markdown("### πŸ“ˆ Scoring Formula")
488
- st.markdown("**Final Score = Cross-Encoder (0-1) + BM25 (0.1-0.2) + Intent (0-0.3)**")
489
 
490
  # --- Main App Interface (Must be after global model loading and class defs) ---
491
  st.title("🎯 AI-Powered Resume Screener")
@@ -722,52 +691,52 @@ with col1:
722
  type="primary",
723
  help="Run the complete 5-stage advanced pipeline"):
724
  print("--- Advanced Pipeline Analysis Button Clicked ---")
725
- if len(st.session_state.resume_texts) == 0:
726
- st.error("❌ Please upload resumes first!")
727
- elif not job_description.strip():
728
- st.error("❌ Please enter a job description!")
729
- else:
730
- print("[UI Button] Pre-checks passed. Starting spinner and pipeline.")
731
- with st.spinner("πŸš€ Running Advanced Pipeline Analysis..."):
732
- st.text("Pipeline Initiated: Starting advanced analysis...")
733
- try:
734
- # Run the advanced pipeline
735
- pipeline_results = screener.advanced_pipeline_ranking(
736
- st.session_state.resume_texts, job_description
737
- )
738
-
739
- # Prepare results for display
740
- results = []
741
-
742
- for rank, result_data in enumerate(pipeline_results, 1):
743
- idx = result_data['index']
744
- name = st.session_state.file_names[idx]
745
- text = st.session_state.resume_texts[idx]
746
-
747
- # Extract skills
748
- skills = screener.extract_skills(text, job_description)
749
-
750
- results.append({
751
- 'rank': rank,
752
- 'name': name,
753
- 'final_score': result_data['final_score'],
754
- 'cross_encoder_score': result_data['cross_encoder_score'],
755
- 'bm25_score': result_data['bm25_score'],
756
- 'intent_score': result_data['intent_score'],
757
- 'skills': skills,
758
- 'text': text,
759
- 'text_preview': text[:500] + "..." if len(text) > 500 else text
760
- })
761
-
762
- # Store in session state
763
- st.session_state.results = results
764
- st.session_state.current_job_description = job_description
765
-
766
- st.success(f"πŸš€ Advanced pipeline complete! Found top {len(st.session_state.results)} candidates.")
767
- st.text("Displaying Top Candidates...")
768
-
769
- except Exception as e:
770
- st.error(f"❌ Error during analysis: {str(e)}")
771
 
772
  # Display Results
773
  if st.session_state.results:
@@ -793,7 +762,7 @@ if st.session_state.results:
793
  "Top Skills": ", ".join(result['skills'][:5])
794
  })
795
 
796
- summary_df = pd.DataFrame(summary_data)
797
 
798
  # Style the dataframe
799
  def color_scores(val):
 
99
  print(f"❌ [Global Init] {error_msg}")
100
  st.session_state.cross_encoder_error = error_msg
101
 
102
+ # Load Qwen3-4B Tokenizer and Model with 4-bit quantization (QwQ-32B style)
103
  if st.session_state.qwen3_4b_tokenizer is None and st.session_state.qwen3_4b_tokenizer_error is None:
104
  print("[Global Init] Loading Qwen3-4B Tokenizer...")
105
  try:
 
110
  print(f"❌ [Global Init] {error_msg}")
111
  st.session_state.qwen3_4b_tokenizer_error = error_msg
112
 
 
113
  if st.session_state.qwen3_4b_model is None and st.session_state.qwen3_4b_model_error is None:
114
  print("[Global Init] Loading Qwen3-4B Model with 4-bit quantization...")
115
  try:
 
116
  quantization_config = BitsAndBytesConfig(
117
  load_in_4bit=True,
118
  bnb_4bit_quant_type="nf4",
119
  bnb_4bit_compute_dtype=torch.float16,
120
  bnb_4bit_use_double_quant=True
121
  )
 
122
  st.session_state.qwen3_4b_model = AutoModelForCausalLM.from_pretrained(
123
+ "Qwen/Qwen3-4B",
124
  quantization_config=quantization_config,
125
  device_map="auto",
126
  trust_remote_code=True,
127
  torch_dtype=torch.float16,
128
  use_cache=True
129
  )
130
+ print("[Global Init] Qwen3-4B Model Loaded with 4-bit quantization.")
131
+ except Exception as e:
132
+ error_msg = f"Failed to load Qwen3-4B Model: {str(e) if e else 'Unknown error'}"
133
+ print(f"❌ [Global Init] {error_msg}")
134
+ st.session_state.qwen3_4b_model_error = error_msg
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
  # --- End of Global Model Loading Section ---
137
 
 
178
  # calculate_bm25_scores, advanced_pipeline_ranking, faiss_recall, cross_encoder_rerank,
179
  # add_bm25_scores, add_intent_scores, analyze_intent, calculate_final_scores, extract_skills)
180
  # Make sure all methods are correctly indented within the class
181
+
182
  def extract_text_from_file(self, file_path, file_type):
183
  # ... (implementation)
184
  try:
 
208
  except Exception as e:
209
  st.error(f"Error extracting text from {file_path}: {str(e)}")
210
  return ""
211
+
212
  def get_embedding(self, text):
213
  if self.embedding_model is None:
214
  st.error("Embedding model is not available!")
 
222
  except Exception as e:
223
  st.error(f"Error generating embedding: {str(e)}")
224
  return np.zeros(1024)
225
+
226
  def calculate_bm25_scores(self, resume_texts, job_description):
227
  try:
228
  job_tokens = word_tokenize(job_description.lower())
 
235
  except Exception as e:
236
  st.error(f"Error calculating BM25 scores: {str(e)}")
237
  return [0.0] * len(resume_texts)
238
+
239
  def advanced_pipeline_ranking(self, resume_texts, job_description):
240
  print("[Pipeline] Advanced Pipeline Ranking started.")
241
  if not resume_texts:
 
401
  intent_score = 0.0
402
  elif 'intent: maybe' in response_lower or 'intent:maybe' in response_lower:
403
  intent_score = 0.1
404
+
405
  return intent_score
406
  except Exception as e:
407
  st.warning(f"Error analyzing intent with Qwen3-4B: {str(e)}")
 
442
  return f'<a href="data:file/csv;base64,{b64}" download="{filename}" class="download-btn">πŸ“₯ Download Results CSV</a>'
443
 
444
  # --- Sidebar Configuration (Must be after global model loading and class defs if it uses them) ---
445
+ st.markdown("---")
446
+ st.markdown("### πŸ€– Advanced Pipeline")
447
+ st.markdown("- **Stage 1**: FAISS Recall (Top 50)")
448
+ st.markdown("- **Stage 2**: Cross-Encoder Re-ranking (Top 20)")
449
+ st.markdown("- **Stage 3**: BM25 Keyword Matching")
450
+ st.markdown("- **Stage 4**: LLM Intent Analysis (Qwen3-4B)")
451
+ st.markdown("- **Final**: Combined Scoring") # Updated this line
452
+ st.markdown("### πŸ“Š Models Used")
453
+ st.markdown("- **Embedding**: BAAI/bge-large-en-v1.5")
454
+ st.markdown("- **Cross-Encoder**: ms-marco-MiniLM-L6-v2")
455
+ st.markdown("- **LLM**: Qwen/Qwen3-4B (4-bit quantized)")
456
+ st.markdown("### πŸ“ˆ Scoring Formula")
457
+ st.markdown("**Final Score = Cross-Encoder (0-1) + BM25 (0.1-0.2) + Intent (0-0.3)**")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
 
459
  # --- Main App Interface (Must be after global model loading and class defs) ---
460
  st.title("🎯 AI-Powered Resume Screener")
 
691
  type="primary",
692
  help="Run the complete 5-stage advanced pipeline"):
693
  print("--- Advanced Pipeline Analysis Button Clicked ---")
694
+ if len(st.session_state.resume_texts) == 0:
695
+ st.error("❌ Please upload resumes first!")
696
+ elif not job_description.strip():
697
+ st.error("❌ Please enter a job description!")
698
+ else:
699
+ print("[UI Button] Pre-checks passed. Starting spinner and pipeline.")
700
+ with st.spinner("πŸš€ Running Advanced Pipeline Analysis..."):
701
+ st.text("Pipeline Initiated: Starting advanced analysis...")
702
+ try:
703
+ # Run the advanced pipeline
704
+ pipeline_results = screener.advanced_pipeline_ranking(
705
+ st.session_state.resume_texts, job_description
706
+ )
707
+
708
+ # Prepare results for display
709
+ results = []
710
+
711
+ for rank, result_data in enumerate(pipeline_results, 1):
712
+ idx = result_data['index']
713
+ name = st.session_state.file_names[idx]
714
+ text = st.session_state.resume_texts[idx]
715
+
716
+ # Extract skills
717
+ skills = screener.extract_skills(text, job_description)
718
+
719
+ results.append({
720
+ 'rank': rank,
721
+ 'name': name,
722
+ 'final_score': result_data['final_score'],
723
+ 'cross_encoder_score': result_data['cross_encoder_score'],
724
+ 'bm25_score': result_data['bm25_score'],
725
+ 'intent_score': result_data['intent_score'],
726
+ 'skills': skills,
727
+ 'text': text,
728
+ 'text_preview': text[:500] + "..." if len(text) > 500 else text
729
+ })
730
+
731
+ # Store in session state
732
+ st.session_state.results = results
733
+ st.session_state.current_job_description = job_description
734
+
735
+ st.success(f"πŸš€ Advanced pipeline complete! Found top {len(st.session_state.results)} candidates.")
736
+ st.text("Displaying Top Candidates...")
737
+
738
+ except Exception as e:
739
+ st.error(f"❌ Error during analysis: {str(e)}")
740
 
741
  # Display Results
742
  if st.session_state.results:
 
762
  "Top Skills": ", ".join(result['skills'][:5])
763
  })
764
 
765
+ summary_df = pd.DataFrame(summary_data)
766
 
767
  # Style the dataframe
768
  def color_scores(val):
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  streamlit==1.31.0
2
- transformers>=4.51.0
3
- torch==2.3.0
4
  pdfplumber==0.10.1
5
  PyPDF2==3.0.1
6
  python-docx==1.0.1
@@ -10,9 +10,9 @@ rank-bm25==0.2.2
10
  pandas==2.1.3
11
  numpy==1.24.3
12
  tqdm==4.66.1
13
- huggingface-hub==0.30.0
14
- bitsandbytes==0.44.1
15
- accelerate==0.27.2
16
  datasets==2.18.0
17
  sentence-transformers==2.7.0
18
- einops
 
1
  streamlit==1.31.0
2
+ transformers>=4.55.0
3
+ torch>=2.3.0
4
  pdfplumber==0.10.1
5
  PyPDF2==3.0.1
6
  python-docx==1.0.1
 
10
  pandas==2.1.3
11
  numpy==1.24.3
12
  tqdm==4.66.1
13
+ huggingface-hub>=0.27.0
14
+ bitsandbytes>=0.44.1
15
+ accelerate>=0.27.2
16
  datasets==2.18.0
17
  sentence-transformers==2.7.0
18
+ einops