root
commited on
Commit
Β·
d5266d0
1
Parent(s):
3adcf09
ss
Browse files- app.py +72 -103
- requirements.txt +6 -6
app.py
CHANGED
@@ -99,7 +99,7 @@ if st.session_state.cross_encoder is None and st.session_state.cross_encoder_err
|
|
99 |
print(f"β [Global Init] {error_msg}")
|
100 |
st.session_state.cross_encoder_error = error_msg
|
101 |
|
102 |
-
# Load Qwen3-4B Tokenizer
|
103 |
if st.session_state.qwen3_4b_tokenizer is None and st.session_state.qwen3_4b_tokenizer_error is None:
|
104 |
print("[Global Init] Loading Qwen3-4B Tokenizer...")
|
105 |
try:
|
@@ -110,43 +110,28 @@ if st.session_state.qwen3_4b_tokenizer is None and st.session_state.qwen3_4b_tok
|
|
110 |
print(f"β [Global Init] {error_msg}")
|
111 |
st.session_state.qwen3_4b_tokenizer_error = error_msg
|
112 |
|
113 |
-
# Load Qwen3-4B Model with 4-bit quantization
|
114 |
if st.session_state.qwen3_4b_model is None and st.session_state.qwen3_4b_model_error is None:
|
115 |
print("[Global Init] Loading Qwen3-4B Model with 4-bit quantization...")
|
116 |
try:
|
117 |
-
# Configure 4-bit quantization for better performance and memory efficiency
|
118 |
quantization_config = BitsAndBytesConfig(
|
119 |
load_in_4bit=True,
|
120 |
bnb_4bit_quant_type="nf4",
|
121 |
bnb_4bit_compute_dtype=torch.float16,
|
122 |
bnb_4bit_use_double_quant=True
|
123 |
)
|
124 |
-
|
125 |
st.session_state.qwen3_4b_model = AutoModelForCausalLM.from_pretrained(
|
126 |
-
"Qwen/Qwen3-4B",
|
127 |
quantization_config=quantization_config,
|
128 |
device_map="auto",
|
129 |
trust_remote_code=True,
|
130 |
torch_dtype=torch.float16,
|
131 |
use_cache=True
|
132 |
)
|
133 |
-
print("[Global Init] Qwen3-4B Model Loaded with 4-bit quantization
|
134 |
-
except Exception as
|
135 |
-
|
136 |
-
print(f"
|
137 |
-
|
138 |
-
try:
|
139 |
-
st.session_state.qwen3_4b_model = AutoModelForCausalLM.from_pretrained(
|
140 |
-
"Qwen/Qwen3-4B",
|
141 |
-
torch_dtype="auto",
|
142 |
-
device_map="auto",
|
143 |
-
trust_remote_code=True
|
144 |
-
)
|
145 |
-
print("[Global Init] Qwen3-4B Model Loaded without quantization.")
|
146 |
-
except Exception as e_fallback:
|
147 |
-
error_msg = f"Failed to load Qwen3-4B Model (fallback): {str(e_fallback) if e_fallback else 'Unknown error'}"
|
148 |
-
print(f"β [Global Init] {error_msg}")
|
149 |
-
st.session_state.qwen3_4b_model_error = error_msg
|
150 |
|
151 |
# --- End of Global Model Loading Section ---
|
152 |
|
@@ -193,7 +178,7 @@ class ResumeScreener: # Ensure this class definition is BEFORE it's instantiated
|
|
193 |
# calculate_bm25_scores, advanced_pipeline_ranking, faiss_recall, cross_encoder_rerank,
|
194 |
# add_bm25_scores, add_intent_scores, analyze_intent, calculate_final_scores, extract_skills)
|
195 |
# Make sure all methods are correctly indented within the class
|
196 |
-
|
197 |
def extract_text_from_file(self, file_path, file_type):
|
198 |
# ... (implementation)
|
199 |
try:
|
@@ -223,7 +208,7 @@ class ResumeScreener: # Ensure this class definition is BEFORE it's instantiated
|
|
223 |
except Exception as e:
|
224 |
st.error(f"Error extracting text from {file_path}: {str(e)}")
|
225 |
return ""
|
226 |
-
|
227 |
def get_embedding(self, text):
|
228 |
if self.embedding_model is None:
|
229 |
st.error("Embedding model is not available!")
|
@@ -237,7 +222,7 @@ class ResumeScreener: # Ensure this class definition is BEFORE it's instantiated
|
|
237 |
except Exception as e:
|
238 |
st.error(f"Error generating embedding: {str(e)}")
|
239 |
return np.zeros(1024)
|
240 |
-
|
241 |
def calculate_bm25_scores(self, resume_texts, job_description):
|
242 |
try:
|
243 |
job_tokens = word_tokenize(job_description.lower())
|
@@ -250,7 +235,7 @@ class ResumeScreener: # Ensure this class definition is BEFORE it's instantiated
|
|
250 |
except Exception as e:
|
251 |
st.error(f"Error calculating BM25 scores: {str(e)}")
|
252 |
return [0.0] * len(resume_texts)
|
253 |
-
|
254 |
def advanced_pipeline_ranking(self, resume_texts, job_description):
|
255 |
print("[Pipeline] Advanced Pipeline Ranking started.")
|
256 |
if not resume_texts:
|
@@ -416,7 +401,7 @@ Respond with exactly one of these formats:
|
|
416 |
intent_score = 0.0
|
417 |
elif 'intent: maybe' in response_lower or 'intent:maybe' in response_lower:
|
418 |
intent_score = 0.1
|
419 |
-
|
420 |
return intent_score
|
421 |
except Exception as e:
|
422 |
st.warning(f"Error analyzing intent with Qwen3-4B: {str(e)}")
|
@@ -457,35 +442,19 @@ def create_download_link(df, filename="resume_screening_results.csv"):
|
|
457 |
return f'<a href="data:file/csv;base64,{b64}" download="{filename}" class="download-btn">π₯ Download Results CSV</a>'
|
458 |
|
459 |
# --- Sidebar Configuration (Must be after global model loading and class defs if it uses them) ---
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
# hf_token = st.text_input("Hugging Face Token (optional)", type="password",
|
474 |
-
# help="Enter your HF token for better rate limits")
|
475 |
-
|
476 |
-
st.markdown("---")
|
477 |
-
st.markdown("### π€ Advanced Pipeline")
|
478 |
-
st.markdown("- **Stage 1**: FAISS Recall (Top 50)")
|
479 |
-
st.markdown("- **Stage 2**: Cross-Encoder Re-ranking (Top 20)")
|
480 |
-
st.markdown("- **Stage 3**: BM25 Keyword Matching")
|
481 |
-
st.markdown("- **Stage 4**: LLM Intent Analysis (Qwen3-4B)")
|
482 |
-
st.markdown("- **Final**: Combined Scoring") # Updated this line
|
483 |
-
st.markdown("### π Models Used")
|
484 |
-
st.markdown("- **Embedding**: BAAI/bge-large-en-v1.5")
|
485 |
-
st.markdown("- **Cross-Encoder**: ms-marco-MiniLM-L6-v2")
|
486 |
-
st.markdown("- **LLM**: Qwen/Qwen3-4B (4-bit quantized)")
|
487 |
-
st.markdown("### π Scoring Formula")
|
488 |
-
st.markdown("**Final Score = Cross-Encoder (0-1) + BM25 (0.1-0.2) + Intent (0-0.3)**")
|
489 |
|
490 |
# --- Main App Interface (Must be after global model loading and class defs) ---
|
491 |
st.title("π― AI-Powered Resume Screener")
|
@@ -722,52 +691,52 @@ with col1:
|
|
722 |
type="primary",
|
723 |
help="Run the complete 5-stage advanced pipeline"):
|
724 |
print("--- Advanced Pipeline Analysis Button Clicked ---")
|
725 |
-
|
726 |
-
|
727 |
-
|
728 |
-
|
729 |
-
|
730 |
-
|
731 |
-
|
732 |
-
|
733 |
-
|
734 |
-
|
735 |
-
|
736 |
-
|
737 |
-
|
738 |
-
|
739 |
-
|
740 |
-
|
741 |
-
|
742 |
-
|
743 |
-
|
744 |
-
|
745 |
-
|
746 |
-
|
747 |
-
|
748 |
-
|
749 |
-
|
750 |
-
|
751 |
-
|
752 |
-
|
753 |
-
|
754 |
-
|
755 |
-
|
756 |
-
|
757 |
-
|
758 |
-
|
759 |
-
|
760 |
-
|
761 |
-
|
762 |
-
|
763 |
-
|
764 |
-
|
765 |
-
|
766 |
-
|
767 |
-
|
768 |
-
|
769 |
-
|
770 |
-
|
771 |
|
772 |
# Display Results
|
773 |
if st.session_state.results:
|
@@ -793,7 +762,7 @@ if st.session_state.results:
|
|
793 |
"Top Skills": ", ".join(result['skills'][:5])
|
794 |
})
|
795 |
|
796 |
-
|
797 |
|
798 |
# Style the dataframe
|
799 |
def color_scores(val):
|
|
|
99 |
print(f"β [Global Init] {error_msg}")
|
100 |
st.session_state.cross_encoder_error = error_msg
|
101 |
|
102 |
+
# Load Qwen3-4B Tokenizer and Model with 4-bit quantization (QwQ-32B style)
|
103 |
if st.session_state.qwen3_4b_tokenizer is None and st.session_state.qwen3_4b_tokenizer_error is None:
|
104 |
print("[Global Init] Loading Qwen3-4B Tokenizer...")
|
105 |
try:
|
|
|
110 |
print(f"β [Global Init] {error_msg}")
|
111 |
st.session_state.qwen3_4b_tokenizer_error = error_msg
|
112 |
|
|
|
113 |
if st.session_state.qwen3_4b_model is None and st.session_state.qwen3_4b_model_error is None:
|
114 |
print("[Global Init] Loading Qwen3-4B Model with 4-bit quantization...")
|
115 |
try:
|
|
|
116 |
quantization_config = BitsAndBytesConfig(
|
117 |
load_in_4bit=True,
|
118 |
bnb_4bit_quant_type="nf4",
|
119 |
bnb_4bit_compute_dtype=torch.float16,
|
120 |
bnb_4bit_use_double_quant=True
|
121 |
)
|
|
|
122 |
st.session_state.qwen3_4b_model = AutoModelForCausalLM.from_pretrained(
|
123 |
+
"Qwen/Qwen3-4B",
|
124 |
quantization_config=quantization_config,
|
125 |
device_map="auto",
|
126 |
trust_remote_code=True,
|
127 |
torch_dtype=torch.float16,
|
128 |
use_cache=True
|
129 |
)
|
130 |
+
print("[Global Init] Qwen3-4B Model Loaded with 4-bit quantization.")
|
131 |
+
except Exception as e:
|
132 |
+
error_msg = f"Failed to load Qwen3-4B Model: {str(e) if e else 'Unknown error'}"
|
133 |
+
print(f"β [Global Init] {error_msg}")
|
134 |
+
st.session_state.qwen3_4b_model_error = error_msg
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
|
136 |
# --- End of Global Model Loading Section ---
|
137 |
|
|
|
178 |
# calculate_bm25_scores, advanced_pipeline_ranking, faiss_recall, cross_encoder_rerank,
|
179 |
# add_bm25_scores, add_intent_scores, analyze_intent, calculate_final_scores, extract_skills)
|
180 |
# Make sure all methods are correctly indented within the class
|
181 |
+
|
182 |
def extract_text_from_file(self, file_path, file_type):
|
183 |
# ... (implementation)
|
184 |
try:
|
|
|
208 |
except Exception as e:
|
209 |
st.error(f"Error extracting text from {file_path}: {str(e)}")
|
210 |
return ""
|
211 |
+
|
212 |
def get_embedding(self, text):
|
213 |
if self.embedding_model is None:
|
214 |
st.error("Embedding model is not available!")
|
|
|
222 |
except Exception as e:
|
223 |
st.error(f"Error generating embedding: {str(e)}")
|
224 |
return np.zeros(1024)
|
225 |
+
|
226 |
def calculate_bm25_scores(self, resume_texts, job_description):
|
227 |
try:
|
228 |
job_tokens = word_tokenize(job_description.lower())
|
|
|
235 |
except Exception as e:
|
236 |
st.error(f"Error calculating BM25 scores: {str(e)}")
|
237 |
return [0.0] * len(resume_texts)
|
238 |
+
|
239 |
def advanced_pipeline_ranking(self, resume_texts, job_description):
|
240 |
print("[Pipeline] Advanced Pipeline Ranking started.")
|
241 |
if not resume_texts:
|
|
|
401 |
intent_score = 0.0
|
402 |
elif 'intent: maybe' in response_lower or 'intent:maybe' in response_lower:
|
403 |
intent_score = 0.1
|
404 |
+
|
405 |
return intent_score
|
406 |
except Exception as e:
|
407 |
st.warning(f"Error analyzing intent with Qwen3-4B: {str(e)}")
|
|
|
442 |
return f'<a href="data:file/csv;base64,{b64}" download="{filename}" class="download-btn">π₯ Download Results CSV</a>'
|
443 |
|
444 |
# --- Sidebar Configuration (Must be after global model loading and class defs if it uses them) ---
|
445 |
+
st.markdown("---")
|
446 |
+
st.markdown("### π€ Advanced Pipeline")
|
447 |
+
st.markdown("- **Stage 1**: FAISS Recall (Top 50)")
|
448 |
+
st.markdown("- **Stage 2**: Cross-Encoder Re-ranking (Top 20)")
|
449 |
+
st.markdown("- **Stage 3**: BM25 Keyword Matching")
|
450 |
+
st.markdown("- **Stage 4**: LLM Intent Analysis (Qwen3-4B)")
|
451 |
+
st.markdown("- **Final**: Combined Scoring") # Updated this line
|
452 |
+
st.markdown("### π Models Used")
|
453 |
+
st.markdown("- **Embedding**: BAAI/bge-large-en-v1.5")
|
454 |
+
st.markdown("- **Cross-Encoder**: ms-marco-MiniLM-L6-v2")
|
455 |
+
st.markdown("- **LLM**: Qwen/Qwen3-4B (4-bit quantized)")
|
456 |
+
st.markdown("### π Scoring Formula")
|
457 |
+
st.markdown("**Final Score = Cross-Encoder (0-1) + BM25 (0.1-0.2) + Intent (0-0.3)**")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
458 |
|
459 |
# --- Main App Interface (Must be after global model loading and class defs) ---
|
460 |
st.title("π― AI-Powered Resume Screener")
|
|
|
691 |
type="primary",
|
692 |
help="Run the complete 5-stage advanced pipeline"):
|
693 |
print("--- Advanced Pipeline Analysis Button Clicked ---")
|
694 |
+
if len(st.session_state.resume_texts) == 0:
|
695 |
+
st.error("β Please upload resumes first!")
|
696 |
+
elif not job_description.strip():
|
697 |
+
st.error("β Please enter a job description!")
|
698 |
+
else:
|
699 |
+
print("[UI Button] Pre-checks passed. Starting spinner and pipeline.")
|
700 |
+
with st.spinner("π Running Advanced Pipeline Analysis..."):
|
701 |
+
st.text("Pipeline Initiated: Starting advanced analysis...")
|
702 |
+
try:
|
703 |
+
# Run the advanced pipeline
|
704 |
+
pipeline_results = screener.advanced_pipeline_ranking(
|
705 |
+
st.session_state.resume_texts, job_description
|
706 |
+
)
|
707 |
+
|
708 |
+
# Prepare results for display
|
709 |
+
results = []
|
710 |
+
|
711 |
+
for rank, result_data in enumerate(pipeline_results, 1):
|
712 |
+
idx = result_data['index']
|
713 |
+
name = st.session_state.file_names[idx]
|
714 |
+
text = st.session_state.resume_texts[idx]
|
715 |
+
|
716 |
+
# Extract skills
|
717 |
+
skills = screener.extract_skills(text, job_description)
|
718 |
+
|
719 |
+
results.append({
|
720 |
+
'rank': rank,
|
721 |
+
'name': name,
|
722 |
+
'final_score': result_data['final_score'],
|
723 |
+
'cross_encoder_score': result_data['cross_encoder_score'],
|
724 |
+
'bm25_score': result_data['bm25_score'],
|
725 |
+
'intent_score': result_data['intent_score'],
|
726 |
+
'skills': skills,
|
727 |
+
'text': text,
|
728 |
+
'text_preview': text[:500] + "..." if len(text) > 500 else text
|
729 |
+
})
|
730 |
+
|
731 |
+
# Store in session state
|
732 |
+
st.session_state.results = results
|
733 |
+
st.session_state.current_job_description = job_description
|
734 |
+
|
735 |
+
st.success(f"π Advanced pipeline complete! Found top {len(st.session_state.results)} candidates.")
|
736 |
+
st.text("Displaying Top Candidates...")
|
737 |
+
|
738 |
+
except Exception as e:
|
739 |
+
st.error(f"β Error during analysis: {str(e)}")
|
740 |
|
741 |
# Display Results
|
742 |
if st.session_state.results:
|
|
|
762 |
"Top Skills": ", ".join(result['skills'][:5])
|
763 |
})
|
764 |
|
765 |
+
summary_df = pd.DataFrame(summary_data)
|
766 |
|
767 |
# Style the dataframe
|
768 |
def color_scores(val):
|
requirements.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
streamlit==1.31.0
|
2 |
-
transformers>=4.
|
3 |
-
torch
|
4 |
pdfplumber==0.10.1
|
5 |
PyPDF2==3.0.1
|
6 |
python-docx==1.0.1
|
@@ -10,9 +10,9 @@ rank-bm25==0.2.2
|
|
10 |
pandas==2.1.3
|
11 |
numpy==1.24.3
|
12 |
tqdm==4.66.1
|
13 |
-
huggingface-hub
|
14 |
-
bitsandbytes
|
15 |
-
accelerate
|
16 |
datasets==2.18.0
|
17 |
sentence-transformers==2.7.0
|
18 |
-
einops
|
|
|
1 |
streamlit==1.31.0
|
2 |
+
transformers>=4.55.0
|
3 |
+
torch>=2.3.0
|
4 |
pdfplumber==0.10.1
|
5 |
PyPDF2==3.0.1
|
6 |
python-docx==1.0.1
|
|
|
10 |
pandas==2.1.3
|
11 |
numpy==1.24.3
|
12 |
tqdm==4.66.1
|
13 |
+
huggingface-hub>=0.27.0
|
14 |
+
bitsandbytes>=0.44.1
|
15 |
+
accelerate>=0.27.2
|
16 |
datasets==2.18.0
|
17 |
sentence-transformers==2.7.0
|
18 |
+
einops
|