File size: 16,848 Bytes
3082988
 
1045f2f
 
3082988
 
 
ca5eff5
3082988
 
ae88d11
 
 
 
 
 
 
 
 
 
 
 
0feee88
 
 
3082988
 
 
cb51c2a
3082988
 
 
ad85e45
3082988
ad85e45
 
98697be
ad85e45
 
 
 
 
 
 
 
 
ae88d11
 
 
ad85e45
 
 
3082988
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae74b40
 
 
 
 
 
 
ae88d11
0f5901a
ad85e45
1045f2f
ad85e45
ed31199
 
 
ae88d11
 
 
ad85e45
 
 
 
ed31199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9bcc136
ad85e45
ae88d11
69f55cc
ae88d11
 
69f55cc
ad85e45
 
69f55cc
ad85e45
 
3082988
3baef55
3082988
 
 
 
 
9bcc136
3082988
9bcc136
 
 
3082988
3baef55
 
3082988
 
 
3baef55
3082988
 
06204b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae88d11
65c5a55
ae88d11
06204b4
 
 
69f55cc
06204b4
 
 
 
 
dfbaf58
06204b4
 
 
 
 
 
 
3082988
ae88d11
3082988
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca8cbdc
53b7ea4
3082988
 
be02948
ad85e45
3082988
 
 
 
dfa0c21
 
ae88d11
1382d6a
 
dfa0c21
ae74b40
c3f6e3b
3082988
 
3baef55
ad85e45
3baef55
da6067d
f68c4e9
9bcc136
3baef55
3082988
 
 
 
 
ae74b40
06204b4
d61bf9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
06204b4
3082988
 
 
 
 
 
 
 
 
06204b4
 
ae74b40
06204b4
 
 
 
 
3082988
ae74b40
bc96a89
 
ae74b40
bc96a89
 
4ac5107
3082988
 
 
 
 
 
ae74b40
 
 
 
 
 
 
 
 
 
 
 
 
3082988
 
b9f41e5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from openpyxl import load_workbook
import pandas as pd
import gradio as gr
import numpy as np

# Chroma DB 로드
###기본(BGE)
model_huggingface_ori = HuggingFaceEmbeddings(model_name='BAAI/bge-m3')
persist_directory_ori = './chroma_kpi_250416'
kpi_pool_ori = Chroma(persist_directory=persist_directory_ori, embedding_function=model_huggingface_ori)
print(kpi_pool_ori._collection.count())

persist_directory_ori2 = './chroma_ncs_241230'
ncs_db_ori = Chroma(persist_directory=persist_directory_ori2, embedding_function=model_huggingface_ori)
print(ncs_db_ori._collection.count())


###

model_huggingface = HuggingFaceEmbeddings(model_name='snunlp/KR-SBERT-V40K-klueNLI-augSTS')
persist_directory1 = './chroma_kpi_250528_SBERT'
kpi_pool = Chroma(persist_directory=persist_directory1, embedding_function=model_huggingface)
print(kpi_pool._collection.count())

persist_directory2 = './chroma_NCS_SBERT_snunlp'
ncs_db = Chroma(persist_directory=persist_directory2, embedding_function=model_huggingface)
print(ncs_db._collection.count())

####

model_huggingface2 = HuggingFaceEmbeddings(model_name='jhgan/ko-sbert-sts')
persist_directory_jhgan1 = './chroma_kpi_250528_SBERTjhgan'
kpi_pool2 = Chroma(persist_directory=persist_directory_jhgan1, embedding_function=model_huggingface2)
print(kpi_pool2._collection.count())

persist_directory_jhgan2 = './chroma_NCS_SBERT'
ncs_db2 = Chroma(persist_directory=persist_directory_jhgan2, embedding_function=model_huggingface2)
print(ncs_db2._collection.count())



def search_unit(unit_query, mode):
    if mode == "BGE":
        results = ncs_db_ori.similarity_search_with_relevance_scores(unit_query, k=7)
    if mode == "SBERT-snunlp":
        results = ncs_db.similarity_search_with_relevance_scores(unit_query, k=7)
    else:
        results = ncs_db2.similarity_search_with_relevance_scores(unit_query, k=7)

    # 검색 결과 텍스트 포맷팅
    text = ""
    for i, doc in enumerate(results):
        # 텍스트와 메타데이터 처리
        unit = doc[0].page_content.replace(", ", "  /  ")
        job_name = doc[0].metadata['세분류코드명']

        # 코드 계층 처리
        code = [
            doc[0].metadata['대분류코드명'],
            doc[0].metadata['중분류코드명'],
            doc[0].metadata['소분류코드명']
        ]
        code_str = " > ".join(code)

        # 텍스트 포맷팅
        similarity = round(doc[1], 3)
        text += f"""
<span style="font-size: 18px;">**[{i+1}] {job_name}**</span>
<span style="font-size: 13px;"> &nbsp; | &nbsp; {code_str} &nbsp; | &nbsp; similarity {similarity} </span>
<br> {unit} <br>
"""
    return text

def search_unit_all(unit_query):
    text_bge = search_unit(unit_query, "BGE")
    text_snu = search_unit(unit_query, "SBERT-snunlp")
    text_jh = search_unit(unit_query, "SBERT-jhgan")

    return text_bge, text_snu, text_jh

downsample_model_ori = SentenceTransformer('BAAI/bge-m3')
downsample_model = SentenceTransformer('snunlp/KR-SBERT-V40K-klueNLI-augSTS')
downsample_model_2 = SentenceTransformer('jhgan/ko-sbert-sts')

def filter_semantically_similar_texts_by_embedding(df, mode, embedding_field='embedding_input', similarity_threshold=0.8):
    texts = df[embedding_field].tolist()

    # 텍스트를 임베딩
    if mode == "BGE":
        embeddings = downsample_model_ori.encode(texts)
    elif mode == "SBERT-snunlp":
        embeddings = downsample_model.encode(texts)
    else:
        embeddings = downsample_model_2.encode(texts)
    
    # 코사인 유사도 행렬 계산
    similarity_matrix = cosine_similarity(embeddings)
    np.fill_diagonal(similarity_matrix, 0)

    # 유사도가 threshold 이상인 항목 필터링
    filtered_indices = []
    excluded_indices = set()

    for i in range(len(texts)):
        if i not in excluded_indices:
            filtered_indices.append(i)
            similar_indices = np.where(similarity_matrix[i] > similarity_threshold)[0]
            excluded_indices.update(similar_indices)

    return df.iloc[filtered_indices].reset_index(drop=True)
    

def search_kpi(kpi_query, kpi_count, mode):
    if mode == "BGE":
        print("BGE 검색 시작")
        results = kpi_pool_ori.similarity_search_with_relevance_scores(kpi_query, k=50)
    elif mode == "SBERT-snunlp":
        print("SBERT-snunlp 검색 시작")
        results = kpi_pool.similarity_search_with_relevance_scores(kpi_query, k=50)
    else:
        print("SBERT-jhgan 검색 시작")
        results = kpi_pool2.similarity_search_with_relevance_scores(kpi_query, k=50)


    # 메타데이터 + 점수 추출
    records = [
        {**doc.metadata, '유사도점수': score}
        for doc, score in results
    ]

    # DataFrame으로 변환
    df = pd.DataFrame(records)
    df['카테고리'] = df['BSC 관점'] + " > " + df['전략방향']
    df = df.drop_duplicates(subset=['정의', '산식']).head(15)
    df = df.iloc[:kpi_count]

    # 카테고리 생성 (BSC 관점 + 전략방향)
    df['카테고리'] = df['BSC 관점'] + " > " + df['전략방향']
    visible_df = df[['지표명', '산식', '비고', '카테고리']].copy()
    kpi_list = list(range(1, len(visible_df) + 1))
    kpi_df = df[['지표명', '정의', '산식', '유형', '비고', 'BSC 관점', '전략방향', '전략과제']].copy()

    return gr.update(visible=True), gr.update(choices=kpi_list), visible_df, kpi_df, kpi_list, gr.update(visible=False)

def search_kpi_one(kpi_query, kpi_count, mode):
    if mode == "BGE":
        print("BGE 검색 시작")
        results = kpi_pool_ori.similarity_search_with_relevance_scores(kpi_query, k=50)
    elif mode == "SBERT-snunlp":
        print("SBERT-snunlp 검색 시작")
        results = kpi_pool.similarity_search_with_relevance_scores(kpi_query, k=50)
    else:
        print("SBERT-jhgan 검색 시작")
        results = kpi_pool2.similarity_search_with_relevance_scores(kpi_query, k=50)

    # 메타데이터 + 점수 추출
    records = [
        {**doc.metadata, '유사도점수': score}
        for doc, score in results
    ]

    # DataFrame으로 변환
    df = pd.DataFrame(records)
    df['카테고리'] = df['BSC 관점'] + " > " + df['전략방향']
    df = df.drop_duplicates(subset=['정의', '산식']).head(15)
    df = df.iloc[:kpi_count]

    # 카테고리 생성 (BSC 관점 + 전략방향)
    df['카테고리'] = df['BSC 관점'] + " > " + df['전략방향']
    visible_df = df[['지표명', '산식', '비고', '카테고리']].copy()
    kpi_list = list(range(1, len(visible_df) + 1))
    kpi_df = df[['지표명', '정의', '산식', '유형', '비고', 'BSC 관점', '전략방향', '전략과제']].copy()

    return visible_df, kpi_df, kpi_list
    
def search_kpi_all_models(kpi_query, kpi_count):
    print("함수 호출, 테이블 생성 시작")
    # 각 모델별 결과
    visible_bge, kpi_bge, list_bge = search_kpi_one(kpi_query, kpi_count, "BGE")
    visible_sn, kpi_sn, list_sn = search_kpi_one(kpi_query, kpi_count, "SBERT-snunlp")
    visible_jh, kpi_jh, list_jh = search_kpi_one(kpi_query, kpi_count, "SBERT-jhgan")
    print("함수 종료")

    visible_df = [visible_bge, visible_sn, visible_jh]
    kpi_df = [kpi_bge, kpi_sn, kpi_jh]
    kpi_list = [list_bge, list_sn, list_jh]
    
    return (
        gr.update(visible=True), 
        visible_df[0],                  # kpi_table1
        visible_df[1],                  # kpi_table2
        visible_df[2],                  # kpi_table3
  #      kpi_df, 
        gr.update(visible=False)
    )

    
# 셀 주소와 값을 매핑한 딕셔너리 생성
def make_excel_table(dataframe, start_cell):
    table_dict = {}

    # 시작 셀 좌표 계산
    start_row = int(''.join(filter(str.isdigit, start_cell)))  # 시작 행 (숫자)
    start_col = ord(start_cell[0].upper()) - ord('A') + 1  # 시작 열 (문자 -> 숫자)

    # 데이터프레임 반복 처리
    for row_index, row in enumerate(dataframe.itertuples(index=False), start=start_row):
        for col_index, value in enumerate(row, start=start_col):
            # 셀 주소 계산 (예: B5, C5, ...)
            cell = f"{chr(ord('A') + col_index - 1)}{row_index}"
            table_dict[cell] = value

    return table_dict


# 다운로드 파일 생성 함수
def generate_excel(df, kpi_list, kpi_query):

    # 인덱스(-1 보정)로 DataFrame 필터링
    filtered_df = df.iloc[[int(i) - 1 for i in kpi_list]] if kpi_list else pd.DataFrame(columns=df.columns)

    # 엑셀 파일 열기
    file_path = "./template.xlsx"
    workbook = load_workbook(file_path)
    sheet = workbook.active

    update_values = make_excel_table(filtered_df, 'B4')
    for cell, value in update_values.items():
        sheet[cell].value = value

    # 워크시트 기본 확대 수준 설정(%)
    sheet.sheet_view.zoomScaleNormal = 85

    # 파일 저장
    output_file = f"KPI_POOL_{kpi_query}.xlsx"
    workbook.save(output_file)

    return gr.update(value=output_file, visible=True)

def toggle_selection(current_selection, kpi_list):
    if set(current_selection) == set(kpi_list):  # 이미 전체 선택된 경우
        return []
    else:  # 전체 선택 안 된 경우
        return kpi_list


css = """
/* 데이터프레임 스타일 */
.gradio-container table {
    table-layout: fixed;
    width: 100%;
}
.gradio-container td{
    white-space: nowrap !important;
    overflow-x: auto !important;
    text-align: left;
    font-size: 13px;
    letter-spacing: -1px !important;
}
/* 헤더 기본 스타일 */
.gradio-container th[aria-sort]::after {
    visibility: hidden !important;  /* 아이콘만 감춤 */
}
.gradio-container th .header-content {
    justify-content: center !important;
    text-align: center;
    font-size: 13px;
    letter-spacing: -1px !important;
}
.gradio-container th span {
    text-align: center !important;
    display: block !important;
    width: 100%;
}
.v_check { padding-top: 39px !important;
    margin-right: 0px !important;
    padding-right: 0px !important;
}
.v_check div { display: block !important; }
.v_check label {
   max-width: 100%; /* 전체 너비 유지 */
   padding: 3px; /* 내부 여백 조정 */
   margin-bottom: 7.2px; /* 라벨 간 간격 설정 */
   border: 1px solid transparent !important;
   letter-spacing: -1px !important; /* 자간 좁게 설정*/
   justify-content: center;
}
div.svelte-1nguped {
    background: transparent !important;
    border: none !important;
}
.left-padding { padding-left: 43px !important;  /* 왼쪽 패딩 추가 */ }

.custom-markdown h3 {
    font-size: 18px; /* 본문 및 목록 글자 크기 */
}
.custom-markdown blockquote {
    margin-bottom: 8px !important;
}
.custom-markdown p, .custom-markdown li {
    margin-top: 8px !important;
    font-size: 15px; /* 본문 및 목록 글자 크기 */
    line-height: 1.5;
}
.custom-markdown a {
    font-size: 15px;
    color: #000000;
}
"""

guide = """> ### 저작권 및 유의사항 안내
- 본 앱은 시앤피컨설팅이 개발한 KPI POOL 검색 도구로, AI 기반 추천 결과는 참고용으로 제공됩니다.
- AI 기반 추천 알고리즘은 전문 컨설팅을 대체할 수 없으며, 반드시 조직의 전략, 평가 목적, 데이터 수집 가능성 등과의 적합성 검토가 필요합니다.
- KPI 설정과 적용에 대한 개별 맞춤 검토는 시앤피컨설팅의 전문 컨설턴트에게 문의해 주세요.
<br><br>
> ### Contact Us
시앤피컨설팅그룹 일터혁신본부  |  Tel. 02-6257-1448  |  http://www.cnp.re.kr  |  hpw@cnp.re.kr
"""

empty_df = pd.DataFrame(columns=["지표명", "산식", "비고", "카테고리"])

with gr.Blocks(css=css) as demo:

    df_state = gr.State()
    check_state = gr.State()
    with gr.Row():
        gr.Markdown("""
        - BGE : 기존 검색방법. 키워드 검색 최적화.
        - SBERT-snunlp : 의미 중심 검색.
        - SBERT-jhgan : 유사도 중심 검색.
        """)
        #mode = gr.Dropdown(choices={"BGE","SBERT-snunlp","SBERT-jhgan"}, label="모델을 선택하세요")
        gr.Markdown("                                                              ")
    with gr.Tab("KPI Pool 검색"):
        with gr.Column(elem_classes="left-padding"):
            with gr.Row():
                
                kpi_query = gr.Textbox(scale=30, submit_btn=True,
                                    label= "성과평가를 진행할 [핵심업무 or 핵심성공요인]을 입력해주세요😊!  (검색 키워드는 직무기술서 또는 NCS 능력단위 참고)",
                                    placeholder="예: 자금조달 or 자금조달의 적절성")
                kpi_count = gr.Slider(label="KPI 출력 개수", value = 15, minimum=5, maximum=15, step=1, scale=7)
                
            copyright = gr.Markdown(guide, visible=True, elem_classes="custom-markdown")

        with gr.Column(visible=False) as output_area:
            with gr.Group():
                with gr.Row():
                    #kpi_checkbox = gr.CheckboxGroup(choices=[], interactive=True, elem_classes="v_check", container=False, min_width=5, scale=1)

                    with gr.Group():
                        gr.Markdown("BGE")
                        kpi_table1 = gr.Dataframe(
                            scale=33, value=[], show_label=False, max_height=650,
                            column_widths=[80, 180, 160, 70],
                            datatype=["str", "str", "str", "str"]
                        )
                    with gr.Group():
                        gr.Markdown("SBERT-snunlp")
                        kpi_table2 = gr.Dataframe(
                            scale=33, value=[], show_label=False, max_height=650,
                            column_widths=[80, 180, 160, 70],
                            datatype=["str", "str", "str", "str"]
                        )
                    with gr.Group():
                        gr.Markdown("SBERT-jhgan")
                        kpi_table3 = gr.Dataframe(
                            scale=33, value=[], show_label=False, max_height=650,
                            column_widths=[80, 180, 160, 70],
                            datatype=["str", "str", "str", "str"]
                        )

            with gr.Row():
                gr.Column(scale=2)
                select_button = gr.Button("All")
                download_button = gr.Button("Download")
                clear_button = gr.Button("Clear")
                gr.Column(scale=2)

            file_download = gr.Files(label="Download", interactive=False, visible=False)
        
        #kpi_query.submit(search_kpi, inputs = [kpi_query, kpi_count, mode], outputs = [output_area, kpi_checkbox, kpi_table, df_state, check_state, copyright])
        kpi_query.submit(search_kpi_all_models, inputs = [kpi_query, kpi_count], outputs = [output_area, kpi_table1, kpi_table2, kpi_table3, copyright])
        #select_button.click(fn=toggle_selection, inputs=[kpi_checkbox, check_state], outputs=kpi_checkbox, show_progress='hidden')
        #download_button.click(
        #    generate_excel,
        #    inputs=[df_state, kpi_checkbox, kpi_query],
        #    outputs=[file_download]
        #)
        clear_button.click(
            fn=lambda: (None, None, gr.update(visible=False),
                        gr.update(choices=[], value=[]), gr.update(value=[]),gr.update(value=[]),gr.update(value=[]), 
                        gr.update(value=None, visible=False), gr.update(visible=True)),
            outputs=[df_state, kpi_query, output_area,
                     kpi_checkbox, kpi_table1, kpi_table2, kpi_table3, 
                     file_download, copyright],
            show_progress='hidden'
        )


    with gr.Tab("[참고] NCS 능력단위"):
        unit_query = gr.Textbox(label="업종 or 직종 + 직무명을 입력하세요😊", scale=1, submit_btn=True,
                            placeholder="예: 의약품 법률자문, 공공행정 경영기획, 재무회계 자금")
        with gr.Row():
            with gr.Group():
                gr.Markdown("BGE")
                unit_result1 = gr.Markdown()
            with gr.Group():
                gr.Markdown("SBERT-snunlp")
            unit_result2 = gr.Markdown()
            with gr.Group():
                gr.Markdown("SBERT-jhgan")
            unit_result3 = gr.Markdown()
            
        #unit_query.submit(search_unit, inputs=[unit_query, mode], outputs=[unit_result])
        unit_query.submit(search_unit_all, inputs=unit_query, outputs=[unit_result1, unit_result2, unit_result3])


demo.launch(debug=True)