Spaces:

jacob-c
/

Resume_Screener_and_Skill_Extractor

Paused

App Files Files Community

root commited on May 28

Commit

f82b542

1 Parent(s): 847b129

ss

Browse files

Files changed (1) hide show

app.py +1 -77

app.py CHANGED Viewed

@@ -13,7 +13,6 @@ import pdfplumber
 import PyPDF2
 from docx import Document
 import csv
-from datasets import load_dataset
 import gc
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import time
@@ -611,7 +610,7 @@ if st.session_state.resume_texts:
 input_method = st.radio(
     "Choose input method:",
-    ["📁 Upload Files", "🗂️ Load from CSV Dataset", "🔗 Load from Hugging Face Dataset"]
 )
 if input_method == "📁 Upload Files":
@@ -690,81 +689,6 @@ elif input_method == "🗂️ Load from CSV Dataset":
         except Exception as e:
             st.error(f"❌ Error processing CSV: {str(e)}")
-elif input_method == "🔗 Load from Hugging Face Dataset":
-    st.markdown("**Popular Resume Datasets:**")
-    st.markdown("- `ahmedheakl/resume-atlas`")
-    st.markdown("- `InferenceFly/Resume-Dataset`")
-    col1, col2 = st.columns([2, 1])
-    with col1:
-        dataset_name = st.text_input(
-            "Dataset name:",
-            value="ahmedheakl/resume-atlas",
-            help="Enter Hugging Face dataset name"
-        )
-    with col2:
-        dataset_split = st.selectbox("Split:", ["train", "test", "validation"], index=0)
-    if st.button("🔗 Load from Hugging Face"):
-        try:
-            with st.spinner(f"🔄 Loading {dataset_name}..."):
-                dataset = load_dataset(dataset_name, split=dataset_split)
-            st.success(f"✅ Loaded dataset with {len(dataset)} entries")
-            st.write("**Dataset Preview:**")
-            preview_df = pd.DataFrame(dataset[:5])
-            st.dataframe(preview_df)
-            text_column = st.selectbox(
-                "Select column with resume text:",
-                dataset.column_names,
-                index=dataset.column_names.index('resume_text') if 'resume_text' in dataset.column_names else 0
-            )
-            category_column = None
-            if 'category' in dataset.column_names:
-                categories = list(set(dataset['category']))
-                category_column = st.selectbox(
-                    "Filter by category (optional):",
-                    ["All"] + categories
-                )
-            max_samples = st.slider("Maximum samples to load:", 10, min(1000, len(dataset)), 100)
-            if st.button("🚀 Process Dataset"):
-                with st.spinner("🔄 Processing dataset..."):
-                    resume_texts = []
-                    file_names = []
-                    filtered_dataset = dataset
-                    if category_column and category_column != "All":
-                        filtered_dataset = dataset.filter(lambda x: x['category'] == category_column)
-                    sample_indices = list(range(min(max_samples, len(filtered_dataset))))
-                    for idx in sample_indices:
-                        item = filtered_dataset[idx]
-                        text = str(item[text_column])
-                        if text and text.strip() and text.lower() != 'nan':
-                            resume_texts.append(text)
-                            if 'id' in item:
-                                file_names.append(f"Resume_{item['id']}")
-                            else:
-                                file_names.append(f"Resume_{idx}")
-                    st.session_state.resume_texts = resume_texts
-                    st.session_state.file_names = file_names
-                if resume_texts:
-                    st.success(f"✅ Successfully loaded {len(resume_texts)} resumes")
-        except Exception as e:
-            st.error(f"❌ Error loading dataset: {str(e)}")
 # Processing and Results
 st.header("🔍 Step 3: Analyze Resumes")

 import PyPDF2
 from docx import Document
 import csv
 import gc
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import time
 input_method = st.radio(
     "Choose input method:",
+    ["📁 Upload Files", "🗂️ Load from CSV Dataset"]
 )
 if input_method == "📁 Upload Files":
         except Exception as e:
             st.error(f"❌ Error processing CSV: {str(e)}")
 # Processing and Results
 st.header("🔍 Step 3: Analyze Resumes")