Spaces:

shoaibmoghal
/

resume-parser-fastapi

Sleeping

App Files Files Community

shoaibmoghal commited on Feb 14

Commit

aa060d7

verified ·

1 Parent(s): 89f42e9

Upload 3 files

Browse files

Files changed (3) hide show

Dockerfile +19 -0
app.py +90 -0
requirements.txt +5 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,19 @@

+# Use official Python image
+FROM python:3.10
+# Set working directory
+WORKDIR /app
+# Copy files
+COPY requirements.txt requirements.txt
+COPY app.py app.py
+COPY models models
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Expose API port
+EXPOSE 7860
+# Run FastAPI
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,90 @@

+from fastapi import FastAPI, UploadFile, File
+import json, re, io
+from llama_cpp import Llama
+from PyPDF2 import PdfReader
+from docx import Document
+import os
+# ✅ Define Model Path Inside Hugging Face Space
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+MODEL_PATH = os.path.join(BASE_DIR, "models", "capybarahermes-2.5-mistral-7b.Q5_K_M.gguf")
+print(f"🔹 Loading Mistral 7B from: {MODEL_PATH} (This may take a while)")
+llm = Llama(model_path=MODEL_PATH, n_ctx=4096, n_gpu_layers=0)  # Set n_gpu_layers=0 for CPU if no GPU
+print("✅ Model loaded successfully!")
+app = FastAPI(title="Resume Parsing API")
+def extract_text_from_resume(uploaded_file):
+    file_content = uploaded_file.file.read()
+    file_stream = io.BytesIO(file_content)
+    if uploaded_file.filename.endswith(".pdf"):
+        reader = PdfReader(file_stream)
+        return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
+    elif uploaded_file.filename.endswith(".docx"):
+        doc = Document(file_stream)
+        return "\n".join([para.text for para in doc.paragraphs])
+    return None
+def extract_email_phone(text):
+    email_pattern = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
+    phone_pattern = r"\+?\d{1,3}?[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}"
+    email_match = re.search(email_pattern, text)
+    phone_match = re.search(phone_pattern, text)
+    return {
+        "email": email_match.group() if email_match else "Email not found",
+        "phone": phone_match.group() if phone_match else "Phone not found"
+    }
+def analyze_resume(text):
+    truncated_text = text[:3500]  # Keep within context limit
+    prompt = f"""
+    Extract these details from the resume:
+    1. Full Name
+    2. Work Experience
+    3. Qualifications
+    4. List of Skills
+    Resume Text: {truncated_text}
+    Format response as JSON:
+    {{
+        "name": "Candidate Name",
+        "experience": [
+            {{
+                "company": "Company Name",
+                "role": "Job Title",
+                "duration": "Start Date - End Date",
+                "responsibilities": "Brief work responsibilities"
+            }}
+        ],
+        "qualifications": "Degree, Certifications",
+        "skills": ["List of skills"]
+    }}
+    """
+    response = llm(prompt, max_tokens=700)
+    output = response["choices"][0]["text"].strip()
+    print("🔹 Raw LLaMA Output:\n", output)
+    try:
+        return json.loads(output)
+    except json.JSONDecodeError:
+        return {"error": "Failed to parse JSON", "raw_output": output}
+@app.post("/parse-resume/")
+async def parse_resume(file: UploadFile = File(...)):
+    text = extract_text_from_resume(file)
+    if not text:
+        return {"error": "Unsupported file format"}
+    extracted_info = extract_email_phone(text)
+    llm_data = analyze_resume(text)
+    extracted_info.update(llm_data)
+    return {"success": True, "data": extracted_info}

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+fastapi
+uvicorn
+llama-cpp-python
+pypdf2
+python-docx