Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -10,9 +10,9 @@ import io
|
|
10 |
# Sentence-BERT λͺ¨λΈ λ‘λ
|
11 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
12 |
|
13 |
-
# μΆμ² κ²°κ³Όλ₯Ό CSV νμΌλ‘ μ μ₯νλ ν¨μ (
|
14 |
def save_recommendations_to_csv(recommendations):
|
15 |
-
output = io.
|
16 |
writer = csv.writer(output)
|
17 |
writer.writerow(["Employee ID", "Employee Name", "Recommended Programs"])
|
18 |
|
@@ -21,19 +21,15 @@ def save_recommendations_to_csv(recommendations):
|
|
21 |
writer.writerow(rec)
|
22 |
|
23 |
output.seek(0)
|
24 |
-
return output
|
25 |
|
26 |
# μλμΌλ‘ μ΄μ λ§€μΉνλ ν¨μ
|
27 |
def auto_match_columns(df, required_cols):
|
28 |
-
"""
|
29 |
-
λ°μ΄ν°νλ μκ³Ό νμν μ΄ μ΄λ¦ λͺ©λ‘μ λ°μμ, μ μ¬ν μ΄ μ΄λ¦μ μλμΌλ‘ λ§€μΉν©λλ€.
|
30 |
-
νμμ λ°λΌ μ΄ μ΄λ¦μ μ ννμ§ λͺ»ν κ²½μ° Noneμ λ°νν©λλ€.
|
31 |
-
"""
|
32 |
matched_cols = {}
|
33 |
for req_col in required_cols:
|
34 |
matched_col = None
|
35 |
for col in df.columns:
|
36 |
-
if req_col in col.lower():
|
37 |
matched_col = col
|
38 |
break
|
39 |
matched_cols[req_col] = matched_col
|
@@ -41,54 +37,44 @@ def auto_match_columns(df, required_cols):
|
|
41 |
|
42 |
# μ§μ λ° νλ‘κ·Έλ¨ λ°μ΄ν°μ μ΄μ μλμΌλ‘ λ§€μΉνκ±°λ, μ ννκ² νλ ν¨μ
|
43 |
def validate_and_get_columns(employee_df, program_df):
|
44 |
-
# νμν μ΄
|
45 |
required_employee_cols = ["employee_id", "employee_name", "current_skills"]
|
46 |
required_program_cols = ["program_name", "skills_acquired", "duration"]
|
47 |
|
48 |
-
# μλμΌλ‘ λ§€μΉ μλ
|
49 |
employee_cols = auto_match_columns(employee_df, required_employee_cols)
|
50 |
program_cols = auto_match_columns(program_df, required_program_cols)
|
51 |
|
52 |
-
# μ§μ λ°μ΄ν° μ΄ μ€ μλ λ§€μΉ μ€ν¨ μ μ¬μ©μμκ² μ ννλλ‘ μ λ
|
53 |
for key, value in employee_cols.items():
|
54 |
if value is None:
|
55 |
return f"μ§μ λ°μ΄ν°μμ '{key}' μ΄μ μ νν μ μμ΅λλ€. μ¬λ°λ₯Έ μ΄μ μ ννμΈμ.", None, None
|
56 |
|
57 |
-
# νλ‘κ·Έλ¨ λ°μ΄ν° μ΄ μ€ μλ λ§€μΉ μ€ν¨ μ μ¬μ©μμκ² μ ννλλ‘ μ λ
|
58 |
for key, value in program_cols.items():
|
59 |
if value is None:
|
60 |
return f"νλ‘κ·Έλ¨ λ°μ΄ν°μμ '{key}' μ΄μ μ νν μ μμ΅λλ€. μ¬λ°λ₯Έ μ΄μ μ ννμΈμ.", None, None
|
61 |
|
62 |
-
# μλ λ§€μΉλ μ΄ λ°ν
|
63 |
return None, employee_cols, program_cols
|
64 |
|
65 |
# μ§μ λ°μ΄ν°λ₯Ό λΆμνμ¬ κ΅μ‘ νλ‘κ·Έλ¨μ μΆμ²νκ³ κ·Έλνλ₯Ό 그리λ ν¨μ
|
66 |
def analyze_data(employee_file, program_file):
|
67 |
-
# μ§μ λ°μ΄ν°μ νλ‘κ·Έλ¨ λ°μ΄ν° μ½κΈ°
|
68 |
employee_df = pd.read_csv(employee_file.name)
|
69 |
program_df = pd.read_csv(program_file.name)
|
70 |
|
71 |
-
# μ΄ μλ λ§€μΉ μλ
|
72 |
error_msg, employee_cols, program_cols = validate_and_get_columns(employee_df, program_df)
|
73 |
if error_msg:
|
74 |
return error_msg, None, None
|
75 |
|
76 |
-
# μ§μμ μλκ³Ό νλ‘κ·Έλ¨ λͺ©νλ₯Ό 벑ν°ν
|
77 |
employee_skills = employee_df[employee_cols["current_skills"]].tolist()
|
78 |
program_skills = program_df[program_cols["skills_acquired"]].tolist()
|
79 |
employee_embeddings = model.encode(employee_skills)
|
80 |
program_embeddings = model.encode(program_skills)
|
81 |
|
82 |
-
# μ μ¬λ κ³μ°
|
83 |
similarities = cosine_similarity(employee_embeddings, program_embeddings)
|
84 |
|
85 |
-
# μ§μλ³ μΆμ² νλ‘κ·Έλ¨ μμ±
|
86 |
recommendations = []
|
87 |
recommendation_rows = [] # CSVλ‘ μ μ₯ν λ°μ΄ν°
|
88 |
for i, employee in employee_df.iterrows():
|
89 |
recommended_programs = []
|
90 |
for j, program in program_df.iterrows():
|
91 |
-
if similarities[i][j] > 0.5:
|
92 |
recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
|
93 |
|
94 |
if recommended_programs:
|
@@ -100,7 +86,6 @@ def analyze_data(employee_file, program_file):
|
|
100 |
|
101 |
recommendations.append(recommendation)
|
102 |
|
103 |
-
# λ€νΈμν¬ κ·Έλν μμ±
|
104 |
G = nx.Graph()
|
105 |
for employee in employee_df[employee_cols['employee_name']]:
|
106 |
G.add_node(employee, type='employee')
|
@@ -113,14 +98,12 @@ def analyze_data(employee_file, program_file):
|
|
113 |
if similarities[i][j] > 0.5:
|
114 |
G.add_edge(employee[employee_cols['employee_name']], program[program_cols['program_name']])
|
115 |
|
116 |
-
# κ·Έλν μκ°ν
|
117 |
plt.figure(figsize=(10, 8))
|
118 |
pos = nx.spring_layout(G)
|
119 |
nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=3000, font_size=10, font_weight='bold', edge_color='gray')
|
120 |
plt.title("μ§μκ³Ό νλ‘κ·Έλ¨ κ°μ κ΄κ³", fontsize=14, fontweight='bold')
|
121 |
plt.tight_layout()
|
122 |
|
123 |
-
# CSV νμΌλ‘ μΆμ² κ²°κ³Ό λ°ν
|
124 |
csv_output = save_recommendations_to_csv(recommendation_rows)
|
125 |
|
126 |
return "\n".join(recommendations), plt.gcf(), csv_output
|
@@ -142,7 +125,6 @@ with gr.Blocks(css=".gradio-button {background-color: #6c757d; color: white;} .g
|
|
142 |
chart_output = gr.Plot(label="μκ°ν μ°¨νΈ")
|
143 |
csv_download = gr.File(label="μΆμ² κ²°κ³Ό λ€μ΄λ‘λ")
|
144 |
|
145 |
-
# λΆμ λ²νΌ ν΄λ¦ μ μ°¨νΈμ νμΌ λ€μ΄λ‘λλ₯Ό μ
λ°μ΄νΈ
|
146 |
analyze_button.click(analyze_data, inputs=[employee_file, program_file], outputs=[output_text, chart_output, csv_download])
|
147 |
|
148 |
# Gradio μΈν°νμ΄μ€ μ€ν
|
|
|
10 |
# Sentence-BERT λͺ¨λΈ λ‘λ
|
11 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
12 |
|
13 |
+
# μΆμ² κ²°κ³Όλ₯Ό CSV νμΌλ‘ μ μ₯νλ ν¨μ (StringIOλ‘ μμ )
|
14 |
def save_recommendations_to_csv(recommendations):
|
15 |
+
output = io.StringIO()
|
16 |
writer = csv.writer(output)
|
17 |
writer.writerow(["Employee ID", "Employee Name", "Recommended Programs"])
|
18 |
|
|
|
21 |
writer.writerow(rec)
|
22 |
|
23 |
output.seek(0)
|
24 |
+
return output.getvalue()
|
25 |
|
26 |
# μλμΌλ‘ μ΄μ λ§€μΉνλ ν¨μ
|
27 |
def auto_match_columns(df, required_cols):
|
|
|
|
|
|
|
|
|
28 |
matched_cols = {}
|
29 |
for req_col in required_cols:
|
30 |
matched_col = None
|
31 |
for col in df.columns:
|
32 |
+
if req_col in col.lower():
|
33 |
matched_col = col
|
34 |
break
|
35 |
matched_cols[req_col] = matched_col
|
|
|
37 |
|
38 |
# μ§μ λ° νλ‘κ·Έλ¨ λ°μ΄ν°μ μ΄μ μλμΌλ‘ λ§€μΉνκ±°λ, μ ννκ² νλ ν¨μ
|
39 |
def validate_and_get_columns(employee_df, program_df):
|
|
|
40 |
required_employee_cols = ["employee_id", "employee_name", "current_skills"]
|
41 |
required_program_cols = ["program_name", "skills_acquired", "duration"]
|
42 |
|
|
|
43 |
employee_cols = auto_match_columns(employee_df, required_employee_cols)
|
44 |
program_cols = auto_match_columns(program_df, required_program_cols)
|
45 |
|
|
|
46 |
for key, value in employee_cols.items():
|
47 |
if value is None:
|
48 |
return f"μ§μ λ°μ΄ν°μμ '{key}' μ΄μ μ νν μ μμ΅λλ€. μ¬λ°λ₯Έ μ΄μ μ ννμΈμ.", None, None
|
49 |
|
|
|
50 |
for key, value in program_cols.items():
|
51 |
if value is None:
|
52 |
return f"νλ‘κ·Έλ¨ λ°μ΄ν°μμ '{key}' μ΄μ μ νν μ μμ΅λλ€. μ¬λ°λ₯Έ μ΄μ μ ννμΈμ.", None, None
|
53 |
|
|
|
54 |
return None, employee_cols, program_cols
|
55 |
|
56 |
# μ§μ λ°μ΄ν°λ₯Ό λΆμνμ¬ κ΅μ‘ νλ‘κ·Έλ¨μ μΆμ²νκ³ κ·Έλνλ₯Ό 그리λ ν¨μ
|
57 |
def analyze_data(employee_file, program_file):
|
|
|
58 |
employee_df = pd.read_csv(employee_file.name)
|
59 |
program_df = pd.read_csv(program_file.name)
|
60 |
|
|
|
61 |
error_msg, employee_cols, program_cols = validate_and_get_columns(employee_df, program_df)
|
62 |
if error_msg:
|
63 |
return error_msg, None, None
|
64 |
|
|
|
65 |
employee_skills = employee_df[employee_cols["current_skills"]].tolist()
|
66 |
program_skills = program_df[program_cols["skills_acquired"]].tolist()
|
67 |
employee_embeddings = model.encode(employee_skills)
|
68 |
program_embeddings = model.encode(program_skills)
|
69 |
|
|
|
70 |
similarities = cosine_similarity(employee_embeddings, program_embeddings)
|
71 |
|
|
|
72 |
recommendations = []
|
73 |
recommendation_rows = [] # CSVλ‘ μ μ₯ν λ°μ΄ν°
|
74 |
for i, employee in employee_df.iterrows():
|
75 |
recommended_programs = []
|
76 |
for j, program in program_df.iterrows():
|
77 |
+
if similarities[i][j] > 0.5:
|
78 |
recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
|
79 |
|
80 |
if recommended_programs:
|
|
|
86 |
|
87 |
recommendations.append(recommendation)
|
88 |
|
|
|
89 |
G = nx.Graph()
|
90 |
for employee in employee_df[employee_cols['employee_name']]:
|
91 |
G.add_node(employee, type='employee')
|
|
|
98 |
if similarities[i][j] > 0.5:
|
99 |
G.add_edge(employee[employee_cols['employee_name']], program[program_cols['program_name']])
|
100 |
|
|
|
101 |
plt.figure(figsize=(10, 8))
|
102 |
pos = nx.spring_layout(G)
|
103 |
nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=3000, font_size=10, font_weight='bold', edge_color='gray')
|
104 |
plt.title("μ§μκ³Ό νλ‘κ·Έλ¨ κ°μ κ΄κ³", fontsize=14, fontweight='bold')
|
105 |
plt.tight_layout()
|
106 |
|
|
|
107 |
csv_output = save_recommendations_to_csv(recommendation_rows)
|
108 |
|
109 |
return "\n".join(recommendations), plt.gcf(), csv_output
|
|
|
125 |
chart_output = gr.Plot(label="μκ°ν μ°¨νΈ")
|
126 |
csv_download = gr.File(label="μΆμ² κ²°κ³Ό λ€μ΄λ‘λ")
|
127 |
|
|
|
128 |
analyze_button.click(analyze_data, inputs=[employee_file, program_file], outputs=[output_text, chart_output, csv_download])
|
129 |
|
130 |
# Gradio μΈν°νμ΄μ€ μ€ν
|