root
commited on
Commit
Β·
f82b542
1
Parent(s):
847b129
ss
Browse files
app.py
CHANGED
@@ -13,7 +13,6 @@ import pdfplumber
|
|
13 |
import PyPDF2
|
14 |
from docx import Document
|
15 |
import csv
|
16 |
-
from datasets import load_dataset
|
17 |
import gc
|
18 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
19 |
import time
|
@@ -611,7 +610,7 @@ if st.session_state.resume_texts:
|
|
611 |
|
612 |
input_method = st.radio(
|
613 |
"Choose input method:",
|
614 |
-
["π Upload Files", "ποΈ Load from CSV Dataset"
|
615 |
)
|
616 |
|
617 |
if input_method == "π Upload Files":
|
@@ -690,81 +689,6 @@ elif input_method == "ποΈ Load from CSV Dataset":
|
|
690 |
except Exception as e:
|
691 |
st.error(f"β Error processing CSV: {str(e)}")
|
692 |
|
693 |
-
elif input_method == "π Load from Hugging Face Dataset":
|
694 |
-
st.markdown("**Popular Resume Datasets:**")
|
695 |
-
st.markdown("- `ahmedheakl/resume-atlas`")
|
696 |
-
st.markdown("- `InferenceFly/Resume-Dataset`")
|
697 |
-
|
698 |
-
col1, col2 = st.columns([2, 1])
|
699 |
-
with col1:
|
700 |
-
dataset_name = st.text_input(
|
701 |
-
"Dataset name:",
|
702 |
-
value="ahmedheakl/resume-atlas",
|
703 |
-
help="Enter Hugging Face dataset name"
|
704 |
-
)
|
705 |
-
with col2:
|
706 |
-
dataset_split = st.selectbox("Split:", ["train", "test", "validation"], index=0)
|
707 |
-
|
708 |
-
if st.button("π Load from Hugging Face"):
|
709 |
-
try:
|
710 |
-
with st.spinner(f"π Loading {dataset_name}..."):
|
711 |
-
dataset = load_dataset(dataset_name, split=dataset_split)
|
712 |
-
|
713 |
-
st.success(f"β
Loaded dataset with {len(dataset)} entries")
|
714 |
-
st.write("**Dataset Preview:**")
|
715 |
-
|
716 |
-
preview_df = pd.DataFrame(dataset[:5])
|
717 |
-
st.dataframe(preview_df)
|
718 |
-
|
719 |
-
text_column = st.selectbox(
|
720 |
-
"Select column with resume text:",
|
721 |
-
dataset.column_names,
|
722 |
-
index=dataset.column_names.index('resume_text') if 'resume_text' in dataset.column_names else 0
|
723 |
-
)
|
724 |
-
|
725 |
-
category_column = None
|
726 |
-
if 'category' in dataset.column_names:
|
727 |
-
categories = list(set(dataset['category']))
|
728 |
-
category_column = st.selectbox(
|
729 |
-
"Filter by category (optional):",
|
730 |
-
["All"] + categories
|
731 |
-
)
|
732 |
-
|
733 |
-
max_samples = st.slider("Maximum samples to load:", 10, min(1000, len(dataset)), 100)
|
734 |
-
|
735 |
-
if st.button("π Process Dataset"):
|
736 |
-
with st.spinner("π Processing dataset..."):
|
737 |
-
resume_texts = []
|
738 |
-
file_names = []
|
739 |
-
|
740 |
-
filtered_dataset = dataset
|
741 |
-
|
742 |
-
if category_column and category_column != "All":
|
743 |
-
filtered_dataset = dataset.filter(lambda x: x['category'] == category_column)
|
744 |
-
|
745 |
-
sample_indices = list(range(min(max_samples, len(filtered_dataset))))
|
746 |
-
|
747 |
-
for idx in sample_indices:
|
748 |
-
item = filtered_dataset[idx]
|
749 |
-
text = str(item[text_column])
|
750 |
-
|
751 |
-
if text and text.strip() and text.lower() != 'nan':
|
752 |
-
resume_texts.append(text)
|
753 |
-
|
754 |
-
if 'id' in item:
|
755 |
-
file_names.append(f"Resume_{item['id']}")
|
756 |
-
else:
|
757 |
-
file_names.append(f"Resume_{idx}")
|
758 |
-
|
759 |
-
st.session_state.resume_texts = resume_texts
|
760 |
-
st.session_state.file_names = file_names
|
761 |
-
|
762 |
-
if resume_texts:
|
763 |
-
st.success(f"β
Successfully loaded {len(resume_texts)} resumes")
|
764 |
-
|
765 |
-
except Exception as e:
|
766 |
-
st.error(f"β Error loading dataset: {str(e)}")
|
767 |
-
|
768 |
# Processing and Results
|
769 |
st.header("π Step 3: Analyze Resumes")
|
770 |
|
|
|
13 |
import PyPDF2
|
14 |
from docx import Document
|
15 |
import csv
|
|
|
16 |
import gc
|
17 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
18 |
import time
|
|
|
610 |
|
611 |
input_method = st.radio(
|
612 |
"Choose input method:",
|
613 |
+
["π Upload Files", "ποΈ Load from CSV Dataset"]
|
614 |
)
|
615 |
|
616 |
if input_method == "π Upload Files":
|
|
|
689 |
except Exception as e:
|
690 |
st.error(f"β Error processing CSV: {str(e)}")
|
691 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
692 |
# Processing and Results
|
693 |
st.header("π Step 3: Analyze Resumes")
|
694 |
|