github-actions[bot]
Sync with https://github.com/mozilla-ai/any-agent-demo
7e103cf
raw
history blame
4 kB
import copy
import json
from datetime import datetime, timedelta
import pandas as pd
import requests
import streamlit as st
from constants import (
DEFAULT_EVALUATION_CRITERIA,
DEFAULT_EVALUATION_MODEL,
MODEL_OPTIONS,
)
from pydantic import BaseModel, ConfigDict
from any_agent import AgentFramework
class UserInputs(BaseModel):
model_config = ConfigDict(extra="forbid")
model_id: str
location: str
max_driving_hours: int
date: datetime
framework: str
evaluation_model: str
evaluation_criteria: list[dict[str, str]]
run_evaluation: bool
@st.cache_resource
def get_area(area_name: str) -> dict:
"""Get the area from Nominatim.
Uses the [Nominatim API](https://nominatim.org/release-docs/develop/api/Search/).
Args:
area_name (str): The name of the area.
Returns:
dict: The area found.
"""
response = requests.get(
f"https://nominatim.openstreetmap.org/search?q={area_name}&format=jsonv2",
headers={"User-Agent": "Mozilla/5.0"},
timeout=5,
)
response.raise_for_status()
return json.loads(response.content.decode())
def get_user_inputs() -> UserInputs:
default_val = "Los Angeles California, US"
location = st.text_input("Enter a location", value=default_val)
if location:
location_check = get_area(location)
if not location_check:
st.error("❌ Invalid location")
max_driving_hours = st.number_input(
"Enter the maximum driving hours", min_value=1, value=2
)
col_date, col_time = st.columns([2, 1])
with col_date:
date = st.date_input(
"Select a date in the future", value=datetime.now() + timedelta(days=1)
)
with col_time:
time = st.selectbox(
"Select a time",
[datetime.strptime(f"{i:02d}:00", "%H:%M").time() for i in range(24)],
index=9,
)
date = datetime.combine(date, time)
supported_frameworks = [framework for framework in AgentFramework]
framework = st.selectbox(
"Select the agent framework to use",
supported_frameworks,
index=2,
format_func=lambda x: x.name,
)
model_id = st.selectbox(
"Select the model to use",
MODEL_OPTIONS,
index=1,
format_func=lambda x: "/".join(x.split("/")[-3:]),
)
with st.expander("Custom Evaluation"):
evaluation_model_id = st.selectbox(
"Select the model to use for LLM-as-a-Judge evaluation",
MODEL_OPTIONS,
index=2,
format_func=lambda x: "/".join(x.split("/")[-3:]),
)
evaluation_criteria = copy.deepcopy(DEFAULT_EVALUATION_CRITERIA)
criteria_df = pd.DataFrame(evaluation_criteria)
criteria_df = st.data_editor(
criteria_df,
column_config={
"criteria": st.column_config.TextColumn(label="Criteria"),
},
hide_index=True,
num_rows="dynamic",
)
new_criteria = []
if len(criteria_df) > 20:
st.error("You can only add up to 20 criteria for the purpose of this demo.")
criteria_df = criteria_df[:20]
for _, row in criteria_df.iterrows():
if row["criteria"] == "":
continue
try:
if len(row["criteria"].split(" ")) > 100:
msg = "Criteria is too long"
raise ValueError(msg)
new_criteria.append({"criteria": row["criteria"]})
except Exception as e:
st.error(f"Error creating criterion: {e}")
return UserInputs(
model_id=model_id,
location=location,
max_driving_hours=max_driving_hours,
date=date,
framework=framework,
evaluation_model=evaluation_model_id,
evaluation_criteria=new_criteria,
run_evaluation=st.checkbox("Run Evaluation", value=True),
)