soojeongcrystal commited on
Commit
6ec37d8
Β·
verified Β·
1 Parent(s): 65f9910

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -23
app.py CHANGED
@@ -10,9 +10,9 @@ import io
10
  # Sentence-BERT λͺ¨λΈ λ‘œλ“œ
11
  model = SentenceTransformer('all-MiniLM-L6-v2')
12
 
13
- # μΆ”μ²œ κ²°κ³Όλ₯Ό CSV 파일둜 μ €μž₯ν•˜λŠ” ν•¨μˆ˜ (BytesIO둜 μˆ˜μ •)
14
  def save_recommendations_to_csv(recommendations):
15
- output = io.BytesIO()
16
  writer = csv.writer(output)
17
  writer.writerow(["Employee ID", "Employee Name", "Recommended Programs"])
18
 
@@ -21,19 +21,15 @@ def save_recommendations_to_csv(recommendations):
21
  writer.writerow(rec)
22
 
23
  output.seek(0)
24
- return output
25
 
26
  # μžλ™μœΌλ‘œ 열을 λ§€μΉ­ν•˜λŠ” ν•¨μˆ˜
27
  def auto_match_columns(df, required_cols):
28
- """
29
- λ°μ΄ν„°ν”„λ ˆμž„κ³Ό ν•„μš”ν•œ μ—΄ 이름 λͺ©λ‘μ„ λ°›μ•„μ„œ, μœ μ‚¬ν•œ μ—΄ 이름을 μžλ™μœΌλ‘œ λ§€μΉ­ν•©λ‹ˆλ‹€.
30
- ν•„μš”μ— 따라 μ—΄ 이름을 μ„ νƒν•˜μ§€ λͺ»ν•œ 경우 None을 λ°˜ν™˜ν•©λ‹ˆλ‹€.
31
- """
32
  matched_cols = {}
33
  for req_col in required_cols:
34
  matched_col = None
35
  for col in df.columns:
36
- if req_col in col.lower(): # μœ μ‚¬ν•œ μ—΄ 이름을 λ§€μΉ­
37
  matched_col = col
38
  break
39
  matched_cols[req_col] = matched_col
@@ -41,54 +37,44 @@ def auto_match_columns(df, required_cols):
41
 
42
  # 직원 및 ν”„λ‘œκ·Έλž¨ λ°μ΄ν„°μ˜ 열을 μžλ™μœΌλ‘œ λ§€μΉ­ν•˜κ±°λ‚˜, μ„ νƒν•˜κ²Œ ν•˜λŠ” ν•¨μˆ˜
43
  def validate_and_get_columns(employee_df, program_df):
44
- # ν•„μš”ν•œ μ—΄
45
  required_employee_cols = ["employee_id", "employee_name", "current_skills"]
46
  required_program_cols = ["program_name", "skills_acquired", "duration"]
47
 
48
- # μžλ™μœΌλ‘œ λ§€μΉ­ μ‹œλ„
49
  employee_cols = auto_match_columns(employee_df, required_employee_cols)
50
  program_cols = auto_match_columns(program_df, required_program_cols)
51
 
52
- # 직원 데이터 μ—΄ 쀑 μžλ™ λ§€μΉ­ μ‹€νŒ¨ μ‹œ μ‚¬μš©μžμ—κ²Œ μ„ νƒν•˜λ„λ‘ μœ λ„
53
  for key, value in employee_cols.items():
54
  if value is None:
55
  return f"직원 λ°μ΄ν„°μ—μ„œ '{key}' 열을 선택할 수 μ—†μŠ΅λ‹ˆλ‹€. μ˜¬λ°”λ₯Έ 열을 μ„ νƒν•˜μ„Έμš”.", None, None
56
 
57
- # ν”„λ‘œκ·Έλž¨ 데이터 μ—΄ 쀑 μžλ™ λ§€μΉ­ μ‹€νŒ¨ μ‹œ μ‚¬μš©μžμ—κ²Œ μ„ νƒν•˜λ„λ‘ μœ λ„
58
  for key, value in program_cols.items():
59
  if value is None:
60
  return f"ν”„λ‘œκ·Έλž¨ λ°μ΄ν„°μ—μ„œ '{key}' 열을 선택할 수 μ—†μŠ΅λ‹ˆλ‹€. μ˜¬λ°”λ₯Έ 열을 μ„ νƒν•˜μ„Έμš”.", None, None
61
 
62
- # μžλ™ 맀칭된 μ—΄ λ°˜ν™˜
63
  return None, employee_cols, program_cols
64
 
65
  # 직원 데이터λ₯Ό λΆ„μ„ν•˜μ—¬ ꡐ윑 ν”„λ‘œκ·Έλž¨μ„ μΆ”μ²œν•˜κ³  κ·Έλž˜ν”„λ₯Ό κ·Έλ¦¬λŠ” ν•¨μˆ˜
66
  def analyze_data(employee_file, program_file):
67
- # 직원 데이터와 ν”„λ‘œκ·Έλž¨ 데이터 읽기
68
  employee_df = pd.read_csv(employee_file.name)
69
  program_df = pd.read_csv(program_file.name)
70
 
71
- # μ—΄ μžλ™ λ§€μΉ­ μ‹œλ„
72
  error_msg, employee_cols, program_cols = validate_and_get_columns(employee_df, program_df)
73
  if error_msg:
74
  return error_msg, None, None
75
 
76
- # μ§μ›μ˜ μ—­λŸ‰κ³Ό ν”„λ‘œκ·Έλž¨ λͺ©ν‘œλ₯Ό 벑터화
77
  employee_skills = employee_df[employee_cols["current_skills"]].tolist()
78
  program_skills = program_df[program_cols["skills_acquired"]].tolist()
79
  employee_embeddings = model.encode(employee_skills)
80
  program_embeddings = model.encode(program_skills)
81
 
82
- # μœ μ‚¬λ„ 계산
83
  similarities = cosine_similarity(employee_embeddings, program_embeddings)
84
 
85
- # 직원별 μΆ”μ²œ ν”„λ‘œκ·Έλž¨ 생성
86
  recommendations = []
87
  recommendation_rows = [] # CSV둜 μ €μž₯ν•  데이터
88
  for i, employee in employee_df.iterrows():
89
  recommended_programs = []
90
  for j, program in program_df.iterrows():
91
- if similarities[i][j] > 0.5: # μœ μ‚¬λ„ μž„κ³„κ°’
92
  recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
93
 
94
  if recommended_programs:
@@ -100,7 +86,6 @@ def analyze_data(employee_file, program_file):
100
 
101
  recommendations.append(recommendation)
102
 
103
- # λ„€νŠΈμ›Œν¬ κ·Έλž˜ν”„ 생성
104
  G = nx.Graph()
105
  for employee in employee_df[employee_cols['employee_name']]:
106
  G.add_node(employee, type='employee')
@@ -113,14 +98,12 @@ def analyze_data(employee_file, program_file):
113
  if similarities[i][j] > 0.5:
114
  G.add_edge(employee[employee_cols['employee_name']], program[program_cols['program_name']])
115
 
116
- # κ·Έλž˜ν”„ μ‹œκ°ν™”
117
  plt.figure(figsize=(10, 8))
118
  pos = nx.spring_layout(G)
119
  nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=3000, font_size=10, font_weight='bold', edge_color='gray')
120
  plt.title("직원과 ν”„λ‘œκ·Έλž¨ κ°„μ˜ 관계", fontsize=14, fontweight='bold')
121
  plt.tight_layout()
122
 
123
- # CSV 파일둜 μΆ”μ²œ κ²°κ³Ό λ°˜ν™˜
124
  csv_output = save_recommendations_to_csv(recommendation_rows)
125
 
126
  return "\n".join(recommendations), plt.gcf(), csv_output
@@ -142,7 +125,6 @@ with gr.Blocks(css=".gradio-button {background-color: #6c757d; color: white;} .g
142
  chart_output = gr.Plot(label="μ‹œκ°ν™” 차트")
143
  csv_download = gr.File(label="μΆ”μ²œ κ²°κ³Ό λ‹€μš΄λ‘œλ“œ")
144
 
145
- # 뢄석 λ²„νŠΌ 클릭 μ‹œ μ°¨νŠΈμ™€ 파일 λ‹€μš΄λ‘œλ“œλ₯Ό μ—…λ°μ΄νŠΈ
146
  analyze_button.click(analyze_data, inputs=[employee_file, program_file], outputs=[output_text, chart_output, csv_download])
147
 
148
  # Gradio μΈν„°νŽ˜μ΄μŠ€ μ‹€ν–‰
 
10
  # Sentence-BERT λͺ¨λΈ λ‘œλ“œ
11
  model = SentenceTransformer('all-MiniLM-L6-v2')
12
 
13
+ # μΆ”μ²œ κ²°κ³Όλ₯Ό CSV 파일둜 μ €μž₯ν•˜λŠ” ν•¨μˆ˜ (StringIO둜 μˆ˜μ •)
14
  def save_recommendations_to_csv(recommendations):
15
+ output = io.StringIO()
16
  writer = csv.writer(output)
17
  writer.writerow(["Employee ID", "Employee Name", "Recommended Programs"])
18
 
 
21
  writer.writerow(rec)
22
 
23
  output.seek(0)
24
+ return output.getvalue()
25
 
26
  # μžλ™μœΌλ‘œ 열을 λ§€μΉ­ν•˜λŠ” ν•¨μˆ˜
27
  def auto_match_columns(df, required_cols):
 
 
 
 
28
  matched_cols = {}
29
  for req_col in required_cols:
30
  matched_col = None
31
  for col in df.columns:
32
+ if req_col in col.lower():
33
  matched_col = col
34
  break
35
  matched_cols[req_col] = matched_col
 
37
 
38
  # 직원 및 ν”„λ‘œκ·Έλž¨ λ°μ΄ν„°μ˜ 열을 μžλ™μœΌλ‘œ λ§€μΉ­ν•˜κ±°λ‚˜, μ„ νƒν•˜κ²Œ ν•˜λŠ” ν•¨μˆ˜
39
  def validate_and_get_columns(employee_df, program_df):
 
40
  required_employee_cols = ["employee_id", "employee_name", "current_skills"]
41
  required_program_cols = ["program_name", "skills_acquired", "duration"]
42
 
 
43
  employee_cols = auto_match_columns(employee_df, required_employee_cols)
44
  program_cols = auto_match_columns(program_df, required_program_cols)
45
 
 
46
  for key, value in employee_cols.items():
47
  if value is None:
48
  return f"직원 λ°μ΄ν„°μ—μ„œ '{key}' 열을 선택할 수 μ—†μŠ΅λ‹ˆλ‹€. μ˜¬λ°”λ₯Έ 열을 μ„ νƒν•˜μ„Έμš”.", None, None
49
 
 
50
  for key, value in program_cols.items():
51
  if value is None:
52
  return f"ν”„λ‘œκ·Έλž¨ λ°μ΄ν„°μ—μ„œ '{key}' 열을 선택할 수 μ—†μŠ΅λ‹ˆλ‹€. μ˜¬λ°”λ₯Έ 열을 μ„ νƒν•˜μ„Έμš”.", None, None
53
 
 
54
  return None, employee_cols, program_cols
55
 
56
  # 직원 데이터λ₯Ό λΆ„μ„ν•˜μ—¬ ꡐ윑 ν”„λ‘œκ·Έλž¨μ„ μΆ”μ²œν•˜κ³  κ·Έλž˜ν”„λ₯Ό κ·Έλ¦¬λŠ” ν•¨μˆ˜
57
  def analyze_data(employee_file, program_file):
 
58
  employee_df = pd.read_csv(employee_file.name)
59
  program_df = pd.read_csv(program_file.name)
60
 
 
61
  error_msg, employee_cols, program_cols = validate_and_get_columns(employee_df, program_df)
62
  if error_msg:
63
  return error_msg, None, None
64
 
 
65
  employee_skills = employee_df[employee_cols["current_skills"]].tolist()
66
  program_skills = program_df[program_cols["skills_acquired"]].tolist()
67
  employee_embeddings = model.encode(employee_skills)
68
  program_embeddings = model.encode(program_skills)
69
 
 
70
  similarities = cosine_similarity(employee_embeddings, program_embeddings)
71
 
 
72
  recommendations = []
73
  recommendation_rows = [] # CSV둜 μ €μž₯ν•  데이터
74
  for i, employee in employee_df.iterrows():
75
  recommended_programs = []
76
  for j, program in program_df.iterrows():
77
+ if similarities[i][j] > 0.5:
78
  recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
79
 
80
  if recommended_programs:
 
86
 
87
  recommendations.append(recommendation)
88
 
 
89
  G = nx.Graph()
90
  for employee in employee_df[employee_cols['employee_name']]:
91
  G.add_node(employee, type='employee')
 
98
  if similarities[i][j] > 0.5:
99
  G.add_edge(employee[employee_cols['employee_name']], program[program_cols['program_name']])
100
 
 
101
  plt.figure(figsize=(10, 8))
102
  pos = nx.spring_layout(G)
103
  nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=3000, font_size=10, font_weight='bold', edge_color='gray')
104
  plt.title("직원과 ν”„λ‘œκ·Έλž¨ κ°„μ˜ 관계", fontsize=14, fontweight='bold')
105
  plt.tight_layout()
106
 
 
107
  csv_output = save_recommendations_to_csv(recommendation_rows)
108
 
109
  return "\n".join(recommendations), plt.gcf(), csv_output
 
125
  chart_output = gr.Plot(label="μ‹œκ°ν™” 차트")
126
  csv_download = gr.File(label="μΆ”μ²œ κ²°κ³Ό λ‹€μš΄λ‘œλ“œ")
127
 
 
128
  analyze_button.click(analyze_data, inputs=[employee_file, program_file], outputs=[output_text, chart_output, csv_download])
129
 
130
  # Gradio μΈν„°νŽ˜μ΄μŠ€ μ‹€ν–‰