shoaibmoghal commited on
Commit
aa060d7
·
verified ·
1 Parent(s): 89f42e9

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +19 -0
  2. app.py +90 -0
  3. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use official Python image
2
+ FROM python:3.10
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Copy files
8
+ COPY requirements.txt requirements.txt
9
+ COPY app.py app.py
10
+ COPY models models
11
+
12
+ # Install dependencies
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+
15
+ # Expose API port
16
+ EXPOSE 7860
17
+
18
+ # Run FastAPI
19
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File
2
+ import json, re, io
3
+ from llama_cpp import Llama
4
+ from PyPDF2 import PdfReader
5
+ from docx import Document
6
+ import os
7
+
8
+ # ✅ Define Model Path Inside Hugging Face Space
9
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
10
+ MODEL_PATH = os.path.join(BASE_DIR, "models", "capybarahermes-2.5-mistral-7b.Q5_K_M.gguf")
11
+
12
+ print(f"🔹 Loading Mistral 7B from: {MODEL_PATH} (This may take a while)")
13
+
14
+ llm = Llama(model_path=MODEL_PATH, n_ctx=4096, n_gpu_layers=0) # Set n_gpu_layers=0 for CPU if no GPU
15
+ print("✅ Model loaded successfully!")
16
+
17
+ app = FastAPI(title="Resume Parsing API")
18
+
19
+ def extract_text_from_resume(uploaded_file):
20
+ file_content = uploaded_file.file.read()
21
+ file_stream = io.BytesIO(file_content)
22
+
23
+ if uploaded_file.filename.endswith(".pdf"):
24
+ reader = PdfReader(file_stream)
25
+ return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
26
+ elif uploaded_file.filename.endswith(".docx"):
27
+ doc = Document(file_stream)
28
+ return "\n".join([para.text for para in doc.paragraphs])
29
+ return None
30
+
31
+ def extract_email_phone(text):
32
+ email_pattern = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
33
+ phone_pattern = r"\+?\d{1,3}?[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}"
34
+
35
+ email_match = re.search(email_pattern, text)
36
+ phone_match = re.search(phone_pattern, text)
37
+
38
+ return {
39
+ "email": email_match.group() if email_match else "Email not found",
40
+ "phone": phone_match.group() if phone_match else "Phone not found"
41
+ }
42
+
43
+ def analyze_resume(text):
44
+ truncated_text = text[:3500] # Keep within context limit
45
+
46
+ prompt = f"""
47
+ Extract these details from the resume:
48
+ 1. Full Name
49
+ 2. Work Experience
50
+ 3. Qualifications
51
+ 4. List of Skills
52
+
53
+ Resume Text: {truncated_text}
54
+
55
+ Format response as JSON:
56
+ {{
57
+ "name": "Candidate Name",
58
+ "experience": [
59
+ {{
60
+ "company": "Company Name",
61
+ "role": "Job Title",
62
+ "duration": "Start Date - End Date",
63
+ "responsibilities": "Brief work responsibilities"
64
+ }}
65
+ ],
66
+ "qualifications": "Degree, Certifications",
67
+ "skills": ["List of skills"]
68
+ }}
69
+ """
70
+
71
+ response = llm(prompt, max_tokens=700)
72
+ output = response["choices"][0]["text"].strip()
73
+ print("🔹 Raw LLaMA Output:\n", output)
74
+
75
+ try:
76
+ return json.loads(output)
77
+ except json.JSONDecodeError:
78
+ return {"error": "Failed to parse JSON", "raw_output": output}
79
+
80
+ @app.post("/parse-resume/")
81
+ async def parse_resume(file: UploadFile = File(...)):
82
+ text = extract_text_from_resume(file)
83
+ if not text:
84
+ return {"error": "Unsupported file format"}
85
+
86
+ extracted_info = extract_email_phone(text)
87
+ llm_data = analyze_resume(text)
88
+
89
+ extracted_info.update(llm_data)
90
+ return {"success": True, "data": extracted_info}
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ llama-cpp-python
4
+ pypdf2
5
+ python-docx