root commited on
Commit
f82b542
Β·
1 Parent(s): 847b129
Files changed (1) hide show
  1. app.py +1 -77
app.py CHANGED
@@ -13,7 +13,6 @@ import pdfplumber
13
  import PyPDF2
14
  from docx import Document
15
  import csv
16
- from datasets import load_dataset
17
  import gc
18
  from transformers import AutoModelForCausalLM, AutoTokenizer
19
  import time
@@ -611,7 +610,7 @@ if st.session_state.resume_texts:
611
 
612
  input_method = st.radio(
613
  "Choose input method:",
614
- ["πŸ“ Upload Files", "πŸ—‚οΈ Load from CSV Dataset", "πŸ”— Load from Hugging Face Dataset"]
615
  )
616
 
617
  if input_method == "πŸ“ Upload Files":
@@ -690,81 +689,6 @@ elif input_method == "πŸ—‚οΈ Load from CSV Dataset":
690
  except Exception as e:
691
  st.error(f"❌ Error processing CSV: {str(e)}")
692
 
693
- elif input_method == "πŸ”— Load from Hugging Face Dataset":
694
- st.markdown("**Popular Resume Datasets:**")
695
- st.markdown("- `ahmedheakl/resume-atlas`")
696
- st.markdown("- `InferenceFly/Resume-Dataset`")
697
-
698
- col1, col2 = st.columns([2, 1])
699
- with col1:
700
- dataset_name = st.text_input(
701
- "Dataset name:",
702
- value="ahmedheakl/resume-atlas",
703
- help="Enter Hugging Face dataset name"
704
- )
705
- with col2:
706
- dataset_split = st.selectbox("Split:", ["train", "test", "validation"], index=0)
707
-
708
- if st.button("πŸ”— Load from Hugging Face"):
709
- try:
710
- with st.spinner(f"πŸ”„ Loading {dataset_name}..."):
711
- dataset = load_dataset(dataset_name, split=dataset_split)
712
-
713
- st.success(f"βœ… Loaded dataset with {len(dataset)} entries")
714
- st.write("**Dataset Preview:**")
715
-
716
- preview_df = pd.DataFrame(dataset[:5])
717
- st.dataframe(preview_df)
718
-
719
- text_column = st.selectbox(
720
- "Select column with resume text:",
721
- dataset.column_names,
722
- index=dataset.column_names.index('resume_text') if 'resume_text' in dataset.column_names else 0
723
- )
724
-
725
- category_column = None
726
- if 'category' in dataset.column_names:
727
- categories = list(set(dataset['category']))
728
- category_column = st.selectbox(
729
- "Filter by category (optional):",
730
- ["All"] + categories
731
- )
732
-
733
- max_samples = st.slider("Maximum samples to load:", 10, min(1000, len(dataset)), 100)
734
-
735
- if st.button("πŸš€ Process Dataset"):
736
- with st.spinner("πŸ”„ Processing dataset..."):
737
- resume_texts = []
738
- file_names = []
739
-
740
- filtered_dataset = dataset
741
-
742
- if category_column and category_column != "All":
743
- filtered_dataset = dataset.filter(lambda x: x['category'] == category_column)
744
-
745
- sample_indices = list(range(min(max_samples, len(filtered_dataset))))
746
-
747
- for idx in sample_indices:
748
- item = filtered_dataset[idx]
749
- text = str(item[text_column])
750
-
751
- if text and text.strip() and text.lower() != 'nan':
752
- resume_texts.append(text)
753
-
754
- if 'id' in item:
755
- file_names.append(f"Resume_{item['id']}")
756
- else:
757
- file_names.append(f"Resume_{idx}")
758
-
759
- st.session_state.resume_texts = resume_texts
760
- st.session_state.file_names = file_names
761
-
762
- if resume_texts:
763
- st.success(f"βœ… Successfully loaded {len(resume_texts)} resumes")
764
-
765
- except Exception as e:
766
- st.error(f"❌ Error loading dataset: {str(e)}")
767
-
768
  # Processing and Results
769
  st.header("πŸ” Step 3: Analyze Resumes")
770
 
 
13
  import PyPDF2
14
  from docx import Document
15
  import csv
 
16
  import gc
17
  from transformers import AutoModelForCausalLM, AutoTokenizer
18
  import time
 
610
 
611
  input_method = st.radio(
612
  "Choose input method:",
613
+ ["πŸ“ Upload Files", "πŸ—‚οΈ Load from CSV Dataset"]
614
  )
615
 
616
  if input_method == "πŸ“ Upload Files":
 
689
  except Exception as e:
690
  st.error(f"❌ Error processing CSV: {str(e)}")
691
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
692
  # Processing and Results
693
  st.header("πŸ” Step 3: Analyze Resumes")
694