import numpy as np
import re


def extract_judge_hash(response):
    """
    checklist 별로 yes, in, no를 판단한 정보를 hash 형태로 변환하여 반환
    """
    content = response['response']
    
    try:
        judge_content = content.lower().replace(' ', '').split('<answer>')[1].split('</answer>')[0]
    except:
        import traceback
        traceback.print_exc()
        return None
    pattern = r":yes|:inprogress|:no"
    matches = re.findall(pattern, judge_content)
    matches = [{':yes': 'y', ':inprogress': 'i', ':no': 'n'}[match] for match in matches]
    return ''.join(matches)

def average_logits(response):
    """
    yes, in, no를 logits 레벨에서 계산.
    """
    judge_probs = response['judge_probs']
    
    yes_ = np.mean([r['yes'] for r in judge_probs])
    in_ = np.mean([r['in'] for r in judge_probs])
    
    reward = yes_ + 0.5 * in_
    return reward


REWARD_PROCESSORS = {
    'avg_logits': average_logits
}

REWARD_PROCESSOR_N_SAMPLES = {
    'avg_logits': 5
}