llm_eval_system / examples /test_metrics /cod_chat_metric_test.py
HoneyTian's picture
update
dbd1ddd
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
from datetime import datetime
import json
import os
from pathlib import Path
import sys
import time
from zoneinfo import ZoneInfo # Python 3.9+ 自带,无需安装
pwd = os.path.abspath(os.path.dirname(__file__))
sys.path.append(os.path.join(pwd, "../"))
import openai
from openai import AzureOpenAI
from project_settings import environment, project_path
def get_args():
"""
python3 azure_openai.py --model_name gpt-4o-mini \
--eval_dataset_name agent-lingoace-zh-400-choice.jsonl \
--client "us_west(47.88.76.239)" \
--create_time_str 20250723_095001 \
--interval 10
python3 azure_openai.py --model_name gpt-4o-mini \
--eval_dataset_name arc-easy-1000-choice.jsonl \
--client "us_west(47.88.76.239)" \
--create_time_str 20250723_111000 \
--interval 10
"""
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_name",
default="gpt-4o",
# default="gpt-4o-mini",
type=str
)
parser.add_argument(
"--service",
default="west_us_chatgpt_openai_azure_com",
type=str
)
args = parser.parse_args()
return args
prompt = """
<Identity>
You are a telephone salesperson for a COD e-commerce company. You are good at communicating with users over the phone and conducting product marketing and promotion.
<Job>
Your task is to stimulate customer interest in products and eventually complete marketing conversion so that customers agree to buy. For customers who have no intention, you will collect reasons and conduct secondary persuasion to achieve the goal of successful conversion.
<Background>
(1) Customer background: The customer has browsed or added a 【专抹顽固斑点】祛斑精华油 to the shopping cart, but has not completed the final payment order;
(2) Product information: 【专抹顽固斑点】升級版精华油,不止祛斑还能美白,改善肌肤泛红,令肌肤稳定光泽透亮,限时促销,原价799一瓶,限时499两瓶;
(3) Discount information: 1. Order now and get a discount of 499两瓶 yuan; 2. Promotion valid for 3 days; 3. Lightning delivery; 4. 24-hour VIP service;
<Constraint>
- Communicate in a cordial, friendly, professional and witty manner;
- Communicate as concisely and effectively as possible (about 20 words) to avoid clients having no patience for communication;
- Imitate real-person communication in a telephone scenario, such as using informal verbal expressions, such as "Well...", "Then...";
- For questions you don't know, answer with words such as "You can discuss this with us in detail later", and don't make assumptions or promises without authorization.
- If the client does not have a positive or meaningful answer, just continue to persuade, and don't ask the client, "You say", "Please speak" and other words;
- Clients generally do not actively communicate, you need to guide the communication;
- Please use the telephone communication expressions that conform to the cultural habits in Taiwan.
<Current Job>
Your goal is to stimulate customer interest in products and eventually complete marketing conversion so that customers agree to buy.
Step 1: You will inform customers of the purpose of the call and broadcast brief product information, and ask customers about their intentions;
Step 2: You will answer customers' questions, including product information, discount information, etc.;
Step 3: Finally, you need to ask users about their intentions;
<Task>
For the above task settings, please rate the last one **Assistant Response** and give reasons for your rating.
(1) Topic Related. The Assistant's response is consistent with the task purpose, only contain the intended message to the client, without any `Note`, `Tips`, with a total score of 1.5 points.
(2) Client Reply Related. Whether the Assistant's response reflects the client's words well. Total score: 1.5 points.
(3) Success Related. Whether the Assistant's Response push the ultimate success goal of the task. Score: 1.0 point.
(4) Brevity. The Assistant's responses should be brief and appropriate for oral communication. Total score: 1.0 points.
<Example>
## Conversation:
client: 你好
Assistant: 您好,您之前购买的【专抹顽固斑点】祛斑精华油全新升级了,新版特别添加【复配烟碱醯胺+熊果苷】双成分,祛斑率提升至百分之九十八点二,作为VIP客户,现在购买立享买一送一,499两瓶,要帮您同步升级到新版吗?
client: 喂。
client: 不不,不必要,不必要。
Assistant:
## Assistant Response:
我明白了,不过这款精华油现在有限时促销,499两瓶,效果真的很好哦!您有没有考虑过改善肌肤问题呢?
## Score:
{
"Task Related": 1.5,
"Task Related - Explain": "The Assistant's response is consistent with the task purpose, which is to promote the product and inform the client about the ongoing promotion. The Assistant effectively mentions the limited-time offer and emphasizes the product's benefits.",
"Client Reply Related": 0.9,
"Client Reply Related - Explain": "The response does not fully reflect the client's words. The client expressed disinterest ("不不,不必要,不必要"), but the Assistant did not acknowledge the client's reluctance adequately before continuing with persuasion.",
"Success Related": 0.9,
"Success Related - Explain": "The Assistant's response attempts to push towards the success goal by mentioning the promotion and benefits, but it could be more effective if it engaged with the client's concerns or disinterest more directly.",
"Brevity": 0.9,
"Brevity - Explain": "The length was moderate and led to the next step of the conversation."
}
<Finish This Task>
## Conversation:
client: 你好
Assistant: 您好。您之前购买的【专抹顽固斑点】祛斑精华油全新升级了,新版特别添加【复配烟碱醯胺+熊果苷】双成分,祛斑率提升至百分之九十八点二。作为VIP客户,现在购买立享买一送一,499两瓶,要帮您同步升级到新版吗?
client: 我上次购买的都没有效,可以退吗?
Assistant:
## Assistant Response:
了解您的顾虑,您可以跟我们客服详细讨论退货事宜。不过,新的升级版效果更佳,您可以试试看,限时499两瓶,真的很划算哦!
## Score:
""".strip()
# <Example>
def main():
args = get_args()
service_params = environment.get(args.service, dtype=json.loads)
client = AzureOpenAI(
**service_params,
)
llm_response = client.chat.completions.create(
model=args.model_name,
messages=[{"role": "user", "content": prompt}],
stream=False,
# max_tokens=1,
top_p=0.95,
temperature=0.6,
# logit_bias={
# 32: 100,
# 33: 100,
# 34: 100,
# 35: 100,
# 36: 100,
# 37: 100,
# }
)
prediction = llm_response.choices[0].message.content
print(prediction)
return
if __name__ == "__main__":
main()