# inference/mei_parser.py
from lxml import etree

def parse_mei(mei_path, tempo=120):
    tree = etree.parse(str(mei_path))
    ns = {"mei": "http://www.music-encoding.org/ns/mei"}

    staff_defs = tree.xpath("//mei:staffDef", namespaces=ns)
    if len(staff_defs) != 1:
        raise ValueError(f"Expected exactly one staffDef (monophonic input), found {len(staff_defs)}.")

    notes = []
    syllables = []
    durations = []
    is_slur_seq = []

    quarter_duration = 60 / tempo

    for note in tree.xpath("//mei:staff//mei:note", namespaces=ns):
        pname = note.get("pname")
        octv = note.get("oct")
        dur = note.get("dur")
        syl_elem = note.find(".//mei:syl", namespaces=ns)

        if not pname or not octv or not dur:
            continue

        # Note name
        pitch = pname.upper() + octv
        notes.append(pitch)

        # Duration in seconds
        dur_val = int(dur)
        sec = 4 / dur_val * quarter_duration
        durations.append(round(sec, 6))

        # Syllable text
        if syl_elem is not None and syl_elem.text:
            syllables.append(syl_elem.text.strip())
            is_slur_seq.append(1 if syl_elem.get("con") == "d" else 0)
        else:
            syllables.append("a")
            is_slur_seq.append(0)

    return {
        "notes": notes,
        "durations": durations,
        "lyrics": " ".join(syllables),
        "is_slur_seq": is_slur_seq
    }