Spaces:

soojeongcrystal
/

hybridRAG

Sleeping

App Files Files Community

soojeongcrystal commited on Sep 7, 2024

Commit

6ec37d8

verified ·

1 Parent(s): 65f9910

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -23

app.py CHANGED Viewed

@@ -10,9 +10,9 @@ import io
 # Sentence-BERT 모델 로드
 model = SentenceTransformer('all-MiniLM-L6-v2')
-# 추천 결과를 CSV 파일로 저장하는 함수 (BytesIO로 수정)
 def save_recommendations_to_csv(recommendations):
-    output = io.BytesIO()
     writer = csv.writer(output)
     writer.writerow(["Employee ID", "Employee Name", "Recommended Programs"])
@@ -21,19 +21,15 @@ def save_recommendations_to_csv(recommendations):
         writer.writerow(rec)
     output.seek(0)
-    return output
 # 자동으로 열을 매칭하는 함수
 def auto_match_columns(df, required_cols):
-    """
-    데이터프레임과 필요한 열 이름 목록을 받아서, 유사한 열 이름을 자동으로 매칭합니다.
-    필요에 따라 열 이름을 선택하지 못한 경우 None을 반환합니다.
-    """
     matched_cols = {}
     for req_col in required_cols:
         matched_col = None
         for col in df.columns:
-            if req_col in col.lower():  # 유사한 열 이름을 매칭
                 matched_col = col
                 break
         matched_cols[req_col] = matched_col
@@ -41,54 +37,44 @@ def auto_match_columns(df, required_cols):
 # 직원 및 프로그램 데이터의 열을 자동으로 매칭하거나, 선택하게 하는 함수
 def validate_and_get_columns(employee_df, program_df):
-    # 필요한 열
     required_employee_cols = ["employee_id", "employee_name", "current_skills"]
     required_program_cols = ["program_name", "skills_acquired", "duration"]
-    # 자동으로 매칭 시도
     employee_cols = auto_match_columns(employee_df, required_employee_cols)
     program_cols = auto_match_columns(program_df, required_program_cols)
-    # 직원 데이터 열 중 자동 매칭 실패 시 사용자에게 선택하도록 유도
     for key, value in employee_cols.items():
         if value is None:
             return f"직원 데이터에서 '{key}' 열을 선택할 수 없습니다. 올바른 열을 선택하세요.", None, None
-    # 프로그램 데이터 열 중 자동 매칭 실패 시 사용자에게 선택하도록 유도
     for key, value in program_cols.items():
         if value is None:
             return f"프로그램 데이터에서 '{key}' 열을 선택할 수 없습니다. 올바른 열을 선택하세요.", None, None
-    # 자동 매칭된 열 반환
     return None, employee_cols, program_cols
 # 직원 데이터를 분석하여 교육 프로그램을 추천하고 그래프를 그리는 함수
 def analyze_data(employee_file, program_file):
-    # 직원 데이터와 프로그램 데이터 읽기
     employee_df = pd.read_csv(employee_file.name)
     program_df = pd.read_csv(program_file.name)
-    # 열 자동 매칭 시도
     error_msg, employee_cols, program_cols = validate_and_get_columns(employee_df, program_df)
     if error_msg:
         return error_msg, None, None
-    # 직원의 역량과 프로그램 목표를 벡터화
     employee_skills = employee_df[employee_cols["current_skills"]].tolist()
     program_skills = program_df[program_cols["skills_acquired"]].tolist()
     employee_embeddings = model.encode(employee_skills)
     program_embeddings = model.encode(program_skills)
-    # 유사도 계산
     similarities = cosine_similarity(employee_embeddings, program_embeddings)
-    # 직원별 추천 프로그램 생성
     recommendations = []
     recommendation_rows = []  # CSV로 저장할 데이터
     for i, employee in employee_df.iterrows():
         recommended_programs = []
         for j, program in program_df.iterrows():
-            if similarities[i][j] > 0.5:  # 유사도 임계값
                 recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
         if recommended_programs:
@@ -100,7 +86,6 @@ def analyze_data(employee_file, program_file):
         recommendations.append(recommendation)
-    # 네트워크 그래프 생성
     G = nx.Graph()
     for employee in employee_df[employee_cols['employee_name']]:
         G.add_node(employee, type='employee')
@@ -113,14 +98,12 @@ def analyze_data(employee_file, program_file):
             if similarities[i][j] > 0.5:
                 G.add_edge(employee[employee_cols['employee_name']], program[program_cols['program_name']])
-    # 그래프 시각화
     plt.figure(figsize=(10, 8))
     pos = nx.spring_layout(G)
     nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=3000, font_size=10, font_weight='bold', edge_color='gray')
     plt.title("직원과 프로그램 간의 관계", fontsize=14, fontweight='bold')
     plt.tight_layout()
-    # CSV 파일로 추천 결과 반환
     csv_output = save_recommendations_to_csv(recommendation_rows)
     return "\n".join(recommendations), plt.gcf(), csv_output
@@ -142,7 +125,6 @@ with gr.Blocks(css=".gradio-button {background-color: #6c757d; color: white;} .g
             chart_output = gr.Plot(label="시각화 차트")
             csv_download = gr.File(label="추천 결과 다운로드")
-    # 분석 버튼 클릭 시 차트와 파일 다운로드를 업데이트
     analyze_button.click(analyze_data, inputs=[employee_file, program_file], outputs=[output_text, chart_output, csv_download])
 # Gradio 인터페이스 실행

 # Sentence-BERT 모델 로드
 model = SentenceTransformer('all-MiniLM-L6-v2')
+# 추천 결과를 CSV 파일로 저장하는 함수 (StringIO로 수정)
 def save_recommendations_to_csv(recommendations):
+    output = io.StringIO()
     writer = csv.writer(output)
     writer.writerow(["Employee ID", "Employee Name", "Recommended Programs"])
         writer.writerow(rec)
     output.seek(0)
+    return output.getvalue()
 # 자동으로 열을 매칭하는 함수
 def auto_match_columns(df, required_cols):
     matched_cols = {}
     for req_col in required_cols:
         matched_col = None
         for col in df.columns:
+            if req_col in col.lower():
                 matched_col = col
                 break
         matched_cols[req_col] = matched_col
 # 직원 및 프로그램 데이터의 열을 자동으로 매칭하거나, 선택하게 하는 함수
 def validate_and_get_columns(employee_df, program_df):
     required_employee_cols = ["employee_id", "employee_name", "current_skills"]
     required_program_cols = ["program_name", "skills_acquired", "duration"]
     employee_cols = auto_match_columns(employee_df, required_employee_cols)
     program_cols = auto_match_columns(program_df, required_program_cols)
     for key, value in employee_cols.items():
         if value is None:
             return f"직원 데이터에서 '{key}' 열을 선택할 수 없습니다. 올바른 열을 선택하세요.", None, None
     for key, value in program_cols.items():
         if value is None:
             return f"프로그램 데이터에서 '{key}' 열을 선택할 수 없습니다. 올바른 열을 선택하세요.", None, None
     return None, employee_cols, program_cols
 # 직원 데이터를 분석하여 교육 프로그램을 추천하고 그래프를 그리는 함수
 def analyze_data(employee_file, program_file):
     employee_df = pd.read_csv(employee_file.name)
     program_df = pd.read_csv(program_file.name)
     error_msg, employee_cols, program_cols = validate_and_get_columns(employee_df, program_df)
     if error_msg:
         return error_msg, None, None
     employee_skills = employee_df[employee_cols["current_skills"]].tolist()
     program_skills = program_df[program_cols["skills_acquired"]].tolist()
     employee_embeddings = model.encode(employee_skills)
     program_embeddings = model.encode(program_skills)
     similarities = cosine_similarity(employee_embeddings, program_embeddings)
     recommendations = []
     recommendation_rows = []  # CSV로 저장할 데이터
     for i, employee in employee_df.iterrows():
         recommended_programs = []
         for j, program in program_df.iterrows():
+            if similarities[i][j] > 0.5:
                 recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
         if recommended_programs:
         recommendations.append(recommendation)
     G = nx.Graph()
     for employee in employee_df[employee_cols['employee_name']]:
         G.add_node(employee, type='employee')
             if similarities[i][j] > 0.5:
                 G.add_edge(employee[employee_cols['employee_name']], program[program_cols['program_name']])
     plt.figure(figsize=(10, 8))
     pos = nx.spring_layout(G)
     nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=3000, font_size=10, font_weight='bold', edge_color='gray')
     plt.title("직원과 프로그램 간의 관계", fontsize=14, fontweight='bold')
     plt.tight_layout()
     csv_output = save_recommendations_to_csv(recommendation_rows)
     return "\n".join(recommendations), plt.gcf(), csv_output
             chart_output = gr.Plot(label="시각화 차트")
             csv_download = gr.File(label="추천 결과 다운로드")
     analyze_button.click(analyze_data, inputs=[employee_file, program_file], outputs=[output_text, chart_output, csv_download])
 # Gradio 인터페이스 실행