Spaces:
Sleeping
Sleeping
File size: 15,868 Bytes
cf367e2 663a6db cf367e2 c0a5a18 663a6db cf367e2 c0a5a18 cf367e2 c0a5a18 cf367e2 c0a5a18 cf367e2 c0a5a18 |
|
import os
import streamlit as st
import dotenv
import openai
from openai import OpenAI
import anthropic
from together import Together
import google.generativeai as genai
import time
dotenv.load_dotenv()
PASSWORD = os.getenv("APP_PASSWORD")
# Load API keys from environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
# Initialize API clients
together_client = Together(api_key=TOGETHER_API_KEY)
genai.configure(api_key=GOOGLE_API_KEY)
# Set up API clients for OpenAI and Anthropic
openai.api_key = OPENAI_API_KEY
openai_client = OpenAI(
organization="org-kUoRSK0nOw4W2nQYMVGWOt03",
project="proj_zb6k1DdgnSEbiAEMWxSOVVu4",
)
# anthropic_client = anthropic.Client(api_key=ANTHROPIC_API_KEY)
anthropic_client = anthropic.Anthropic()
LLM_COUNCIL_MEMBERS = {
"Smalls": [
"openai://gpt-4o-mini",
"together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"vertex://gemini-1.5-flash-001",
"anthropic://claude-3-haiku-20240307",
],
"Flagships": [
"openai://gpt-4",
"together://meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
"vertex://gemini-1.5-pro-001",
"anthropic://claude-3-5-sonnet",
],
}
PROVIDER_TO_AVATAR_MAP = {
"openai://gpt-4o-mini": "",
"anthropic://claude-3-5-sonnet": "",
"vertex://gemini-1.5-flash-001": "",
"together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": "",
"anthropic://claude-3-haiku-20240307": "",
}
AGGREGATORS = ["openai://gpt-4", "openai://gpt-3.5-turbo"]
def anthropic_streamlit_streamer(stream):
"""
Process the Anthropic streaming response and yield content from the deltas.
:param stream: Streaming object from Anthropic API
:return: Yields content (text) from the streaming response.
"""
for event in stream:
if hasattr(event, "type"):
# Handle content blocks
if event.type == "content_block_delta" and hasattr(event, "delta"):
# Extract text delta from the event
text_delta = getattr(event.delta, "text", None)
if text_delta:
yield text_delta
# Handle message completion events (optional if needed)
elif event.type == "message_stop":
break # End of message, stop streaming
def google_streamlit_streamer(stream):
for chunk in stream:
yield chunk.text
def together_streamlit_streamer(stream):
for chunk in stream:
yield chunk.choices[0].delta.content
# Helper functions for LLM council and aggregator selection
def llm_council_selector():
selected_council = st.radio(
"Choose a council configuration", options=list(LLM_COUNCIL_MEMBERS.keys())
)
return LLM_COUNCIL_MEMBERS[selected_council]
def aggregator_selector():
return st.radio("Choose an aggregator LLM", options=AGGREGATORS)
# API calls for different providers
def get_openai_response(model_name, prompt):
return openai_client.chat.completions.create(
model=model_name,
messages=[{"role": "user", "content": prompt}],
stream=True,
)
# https://docs.anthropic.com/en/api/messages-streaming
def get_anthropic_response(model_name, prompt):
return anthropic_client.messages.create(
max_tokens=1024,
messages=[{"role": "user", "content": prompt}],
model=model_name,
stream=True,
)
def get_together_response(model_name, prompt):
return together_client.chat.completions.create(
model=model_name,
messages=[{"role": "user", "content": prompt}],
stream=True,
)
# https://ai.google.dev/gemini-api/docs/text-generation?lang=python
def get_google_response(model_name, prompt):
model = genai.GenerativeModel(model_name)
return model.generate_content(prompt, stream=True)
def get_llm_response(model_identifier, prompt):
provider, model_name = model_identifier.split("://")
if provider == "openai":
return get_openai_response(model_name, prompt)
elif provider == "anthropic":
return get_anthropic_response(model_name, prompt)
elif provider == "together":
return get_together_response(model_name, prompt)
elif provider == "vertex":
return get_google_response(model_name, prompt)
else:
return None
# Main Streamlit App
def main():
st.set_page_config(
page_title="Language Model Council Sandbox", page_icon="🏛️", layout="wide"
)
# Custom CSS for the chat display
center_css = """
<style>
h1, h2, h3, h6 { text-align: center; }
.chat-container {
display: flex;
align-items: flex-start;
margin-bottom: 10px;
}
.avatar {
width: 50px;
margin-right: 10px;
}
.message {
background-color: #f1f1f1;
padding: 10px;
border-radius: 10px;
width: 100%;
}
</style>
"""
st.markdown(center_css, unsafe_allow_html=True)
# App title and description
st.title("Language Model Council Sandbox")
st.markdown("###### Invoke a council of LLMs to generate and judge each other.")
st.markdown("###### [ArXiv Paper](https://arxiv.org/abs/2406.08598)")
# Authentication system
if "authenticated" not in st.session_state:
st.session_state.authenticated = False
cols = st.columns([2, 1, 2])
if not st.session_state.authenticated:
with cols[1]:
password = st.text_input("Password", type="password")
if st.button("Login", use_container_width=True):
if password == PASSWORD:
st.session_state.authenticated = True
else:
st.error("Invalid credentials")
if st.session_state.authenticated:
st.success("Logged in successfully!")
# Council and aggregator selection
selected_models = llm_council_selector()
st.write("Selected Models:", selected_models)
selected_aggregator = aggregator_selector()
st.write("Selected Aggregator:", selected_aggregator)
# Prompt input
prompt = st.text_area("Enter your prompt:")
if st.button("Submit"):
st.write("Responses:")
# Fetching and streaming responses from each selected model
for model in selected_models:
# with st.chat_message(model):
with st.chat_message(
model,
avatar=PROVIDER_TO_AVATAR_MAP[model],
):
message_placeholder = st.empty()
stream = get_llm_response(model, prompt)
if stream:
if model.startswith("anthropic"):
stream = anthropic_streamlit_streamer(stream)
elif model.startswith("vertex"):
stream = google_streamlit_streamer(stream)
elif model.startswith("together"):
stream = together_streamlit_streamer(stream)
message_placeholder.write_stream(stream)
# Constructing the aggregator prompt
aggregator_prompt = f"User prompt: {prompt}\n\n"
aggregator_prompt += "Responses from other LLMs:\n"
aggregator_prompt += "\n".join(
[
f"{model}: {st.session_state.get(model, '')}"
for model in selected_models
]
)
aggregator_prompt += "\n\nPlease provide an aggregated response."
# Fetching and streaming response from the aggregator
st.write(f"Aggregated response from {selected_aggregator}:")
with st.chat_message(selected_aggregator):
message_placeholder = st.empty()
aggregator_stream = get_llm_response(
selected_aggregator, aggregator_prompt
)
if aggregator_stream:
message_placeholder.write_stream(aggregator_stream)
else:
with cols[1]:
st.warning("Please log in to access this app.")
if __name__ == "__main__":
main()
# import streamlit as st
# from components import llm_council_selector
# st.title("LLM Council Selector")
# selected_models = llm_council_selector()
# if selected_models is not None:
# st.write("Selected Models:", selected_models)
# else:
# st.write("No models selected or component didn't return a value.")
# Choose your council.
# Pre-selected.
# Smalls: GPT-4o-mini, llama-3.1-70b, qwen-2.0-70b
# Flagships: GPT-4o, llama-3.1-405b, qwen-2.0-110b, gemini, claude-3.5-sonnet
# Best: chatgpt-4o-latest, gemini-1.5-pro-exp-0827, grok-2-2024-08-13, claude-3-5-sonnet-20240620, llama-3.1-405b-instruct
# Custom:
# Choose from a list of available models.
# All:
# All available models.
# Choose aggregator.
# Aggregators are models proficient in synthesizing responses from other models into a single, highquality output. An effective aggregator should maintain or enhance output quality even when
# integrating inputs that are of lesser quality than its own.
# Choices:
# - 4o-latest
# - gemini-1.5
# - grok-2
# - claude-3.5-sonnet
# - llama-3.1-405b-instruct
# Provide a prompt. (Or pre-canned prompts.)
# Paste chat history.
# Checkbox, enable judging.
#
# If checked, Judging config:
# Single sided
# Provide criteria. (or default).
# If pairwise, choose granularity (or default).
# Choose criteria. (or default).
# Enable position swapping?
# Go button.
# Sections.
# 1. Model outputs.
# 2. Aggregated output.
# 3. Judging underneath each output.
# Highlight in green, the output that was best, as determined by council.
# Show graph breakdown of scores and justifications. (by criteria, # wins and # losses)
# Show final overall score.
# Highlight in red, the output that was worst, as determined by council.
# Judging section.
# Show agreement matrix.
# Show bar graph of self-bias.
# Plot contrarianism vs. conviction (scatter plot)
# Show per-judge scores.
# Calculate total cost.
# Show total tokens used.
|