Spaces:
Sleeping
Sleeping
File size: 15,868 Bytes
cf367e2 663a6db cf367e2 c0a5a18 663a6db cf367e2 c0a5a18 cf367e2 c0a5a18 cf367e2 c0a5a18 cf367e2 c0a5a18 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 |
import os
import streamlit as st
import dotenv
import openai
from openai import OpenAI
import anthropic
from together import Together
import google.generativeai as genai
import time
dotenv.load_dotenv()
PASSWORD = os.getenv("APP_PASSWORD")
# Load API keys from environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
# Initialize API clients
together_client = Together(api_key=TOGETHER_API_KEY)
genai.configure(api_key=GOOGLE_API_KEY)
# Set up API clients for OpenAI and Anthropic
openai.api_key = OPENAI_API_KEY
openai_client = OpenAI(
organization="org-kUoRSK0nOw4W2nQYMVGWOt03",
project="proj_zb6k1DdgnSEbiAEMWxSOVVu4",
)
# anthropic_client = anthropic.Client(api_key=ANTHROPIC_API_KEY)
anthropic_client = anthropic.Anthropic()
LLM_COUNCIL_MEMBERS = {
"Smalls": [
"openai://gpt-4o-mini",
"together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"vertex://gemini-1.5-flash-001",
"anthropic://claude-3-haiku-20240307",
],
"Flagships": [
"openai://gpt-4",
"together://meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
"vertex://gemini-1.5-pro-001",
"anthropic://claude-3-5-sonnet",
],
}
PROVIDER_TO_AVATAR_MAP = {
"openai://gpt-4o-mini": "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIwLjk5ZW0iIGhlaWdodD0iMWVtIiB2aWV3Qm94PSIwIDAgMjU2IDI2MCI+PHBhdGggZD0iTTIzOS4xODQgMTA2LjIwM2E2NC43MiA2NC43MiAwIDAgMC01LjU3Ni01My4xMDNDMjE5LjQ1MiAyOC40NTkgMTkxIDE1Ljc4NCAxNjMuMjEzIDIxLjc0QTY1LjU4NiA2NS41ODYgMCAwIDAgNTIuMDk2IDQ1LjIyYTY0LjcyIDY0LjcyIDAgMCAwLTQzLjIzIDMxLjM2Yy0xNC4zMSAyNC42MDItMTEuMDYxIDU1LjYzNCA4LjAzMyA3Ni43NGE2NC42NyA2NC42NyAwIDAgMCA1LjUyNSA1My4xMDJjMTQuMTc0IDI0LjY1IDQyLjY0NCAzNy4zMjQgNzAuNDQ2IDMxLjM2YTY0LjcyIDY0LjcyIDAgMCAwIDQ4Ljc1NCAyMS43NDRjMjguNDgxLjAyNSA1My43MTQtMTguMzYxIDYyLjQxNC00NS40ODFhNjQuNzcgNjQuNzcgMCAwIDAgNDMuMjI5LTMxLjM2YzE0LjEzNy0yNC41NTggMTAuODc1LTU1LjQyMy04LjA4My03Ni40ODNtLTk3LjU2IDEzNi4zMzhhNDguNCA0OC40IDAgMCAxLTMxLjEwNS0xMS4yNTVsMS41MzUtLjg3bDUxLjY3LTI5LjgyNWE4LjYgOC42IDAgMCAwIDQuMjQ3LTcuMzY3di03Mi44NWwyMS44NDUgMTIuNjM2Yy4yMTguMTExLjM3LjMyLjQwOS41NjN2NjAuMzY3Yy0uMDU2IDI2LjgxOC0yMS43ODMgNDguNTQ1LTQ4LjYwMSA0OC42MDFNMzcuMTU4IDE5Ny45M2E0OC4zNSA0OC4zNSAwIDAgMS01Ljc4MS0zMi41ODlsMS41MzQuOTIxbDUxLjcyMiAyOS44MjZhOC4zNCA4LjM0IDAgMCAwIDguNDQxIDBsNjMuMTgxLTM2LjQyNXYyNS4yMjFhLjg3Ljg3IDAgMCAxLS4zNTguNjY1bC01Mi4zMzUgMzAuMTg0Yy0yMy4yNTcgMTMuMzk4LTUyLjk3IDUuNDMxLTY2LjQwNC0xNy44MDNNMjMuNTQ5IDg1LjM4YTQ4LjUgNDguNSAwIDAgMSAyNS41OC0yMS4zMzN2NjEuMzlhOC4yOSA4LjI5IDAgMCAwIDQuMTk1IDcuMzE2bDYyLjg3NCAzNi4yNzJsLTIxLjg0NSAxMi42MzZhLjgyLjgyIDAgMCAxLS43NjcgMEw0MS4zNTMgMTUxLjUzYy0yMy4yMTEtMTMuNDU0LTMxLjE3MS00My4xNDQtMTcuODA0LTY2LjQwNXptMTc5LjQ2NiA0MS42OTVsLTYzLjA4LTM2LjYzTDE2MS43MyA3Ny44NmEuODIuODIgMCAwIDEgLjc2OCAwbDUyLjIzMyAzMC4xODRhNDguNiA0OC42IDAgMCAxLTcuMzE2IDg3LjYzNXYtNjEuMzkxYTguNTQgOC41NCAwIDAgMC00LjQtNy4yMTNtMjEuNzQyLTMyLjY5bC0xLjUzNS0uOTIybC01MS42MTktMzAuMDgxYTguMzkgOC4zOSAwIDAgMC04LjQ5MiAwTDk5Ljk4IDk5LjgwOFY3NC41ODdhLjcyLjcyIDAgMCAxIC4zMDctLjY2NWw1Mi4yMzMtMzAuMTMzYTQ4LjY1MiA0OC42NTIgMCAwIDEgNzIuMjM2IDUwLjM5MXpNODguMDYxIDEzOS4wOTdsLTIxLjg0NS0xMi41ODVhLjg3Ljg3IDAgMCAxLS40MS0uNjE0VjY1LjY4NWE0OC42NTIgNDguNjUyIDAgMCAxIDc5Ljc1Ny0zNy4zNDZsLTEuNTM1Ljg3bC01MS42NyAyOS44MjVhOC42IDguNiAwIDAgMC00LjI0NiA3LjM2N3ptMTEuODY4LTI1LjU4TDEyOC4wNjcgOTcuM2wyOC4xODggMTYuMjE4djMyLjQzNGwtMjguMDg2IDE2LjIxOGwtMjguMTg4LTE2LjIxOHoiLz48L3N2Zz4=",
"anthropic://claude-3-5-sonnet": "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIxZW0iIGhlaWdodD0iMWVtIiB2aWV3Qm94PSIwIDAgMjQgMjQiPjxwYXRoIGZpbGw9ImN1cnJlbnRDb2xvciIgZD0iTTE3LjMwNCAzLjU0MWgtMy42NzJsNi42OTYgMTYuOTE4SDI0Wm0tMTAuNjA4IDBMMCAyMC40NTloMy43NDRsMS4zNy0zLjU1M2g3LjAwNWwxLjM2OSAzLjU1M2gzLjc0NEwxMC41MzYgMy41NDFabS0uMzcxIDEwLjIyM0w4LjYxNiA3LjgybDIuMjkxIDUuOTQ1WiIvPjwvc3ZnPg==",
"vertex://gemini-1.5-flash-001": "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIxZW0iIGhlaWdodD0iMWVtIiB2aWV3Qm94PSIwIDAgMjQgMjQiPjxwYXRoIGZpbGw9IiM0MjY4ZmYiIGQ9Ik0yNCAxMi4wMjRjLTYuNDM3LjM4OC0xMS41OSA1LjUzOS0xMS45NzcgMTEuOTc2aC0uMDQ3QzExLjU4OCAxNy41NjMgNi40MzYgMTIuNDEyIDAgMTIuMDI0di0uMDQ3QzYuNDM3IDExLjU4OCAxMS41ODggNi40MzcgMTEuOTc2IDBoLjA0N2MuMzg4IDYuNDM3IDUuNTQgMTEuNTg4IDExLjk3NyAxMS45Nzd6Ii8+PC9zdmc+",
"together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIxZW0iIGhlaWdodD0iMWVtIiB2aWV3Qm94PSIwIDAgMzIgMzIiPjxnIGZpbGw9Im5vbmUiPjxwYXRoIGZpbGw9IiNiNGFjYmMiIGQ9Ik0yMC44NzEgMjQuNzh2LTYuMDZoMy4wMXY1Ljc3YzAgLjM0LS4xMi42Mi0uMzEuOTRsLTIuNDEgNC4yYy0uMTguMjMtLjQ1LjM3LS43NS4zN2gtMS4wM2MtLjIzIDAtLjM4LS4yNC0uMjgtLjQ1bDEuNjctNC4zNWMuMDctLjEzLjEtLjI3LjEtLjQyTTE3LjA5MSAzMGMuMiAwIC4yNi0uMjEuMjItLjM4bC0yLjMyLTguNjFoLTIuOTlsLjg1IDMuNTVjLjE5LjcxLjY2IDEuMzIgMS4zIDEuNjljLjE0LjA4LjI1LjIyLjI5LjM4bC44NyAzLjE0Yy4wNC4xNy4yMS4yMi4zOC4yMnoiLz48cGF0aCBmaWxsPSIjY2RjNGQ2IiBkPSJtMjguNTQxIDIzLjA4bC0xLjI3LS45NmEuOTQuOTQgMCAwIDEtLjI3LS42NnYtMi43NWMwLS42NC0uMTYtMS4yMzgtLjQ0Mi0xLjc2cS4yMTMuMDUuNDQyLjA1YTIgMiAwIDEgMC0xLjk0OS0xLjU0OWEzLjggMy44IDAgMCAwLTEuOC0uNDUxaC04LjE3Yy0uNjYgMC0xLjI3LS40Mi0xLjU3LTEuMDFMMTAuMDQxIDMuNWEyLjIzIDIuMjMgMCAwIDAtMi4xLTEuNWMtLjE4IDAtLjMuMTctLjI0LjM0TDguNTcxIDVjLS4yIDAtMS4wNy4yMy0xLjg1LjczbC0uODA2LjQ5OEw3LjAwMiAxMHY4LjI2YzAgMi4wMSAxLjI1IDMuNzIgMy4wMSA0LjQxdjcuMDJjLS4wMS4xNy4xMy4zMS4zLjMxaDEuMzdjLjE4IDAgLjMyLS4xNC4zMi0uMzF2LTEuOTZjMC0uMTcuMDctLjMyLjE4LS40NGMuNTMtLjUyLjgyLTEuMjMuODItMS45N1YyM2g1LjA3YzEuMjcgMCAyLjQ5LS41NSAzLjMzLTEuNWMwIC45NC40OCAxLjcyIDEuMzggMi4zMmwzLjk2IDIuNDNjLjE2LjExLjI2LjMuMjYuNXYyLjkzYzAgLjE3LjE0LjMxLjMxLjMxaDEuMzdjLjE3IDAgLjMxLS4xNC4zMS0uMzF2LTUuNTFjLjAxLS40LS4xNS0uOC0uNDUtMS4wOSIvPjxwYXRoIGZpbGw9IiNmM2FkNjEiIGQ9Ik02Ljg0MSA2Yy0uMzYgMC0uNzIuMS0xLjAzLjI5bC0yLjE5IDEuMzVjLS4zNi4yMy0uNi42MS0uNjIgMS4wM2MtLjAzLjczLjU1IDEuMzMgMS4yNyAxLjMzaDMuNDljLjU3IDAgMS4wNC0uNDcgMS4wNC0xLjA1di0xYzAtMS4wNy0uODgtMS45NS0xLjk2LTEuOTUiLz48cGF0aCBmaWxsPSIjMWMxYzFjIiBkPSJNNi41IDhhLjUuNSAwIDEgMCAwLTFhLjUuNSAwIDAgMCAwIDFtLTEuOTk5LjVjMC0uMjgtLjIyLS41LS41LS41aC0uNzZhMS4yIDEuMiAwIDAgMC0uMjEgMWguOTdjLjI4IDAgLjUtLjIyLjUtLjUiLz48cGF0aCBmaWxsPSIjZjNhZDYxIiBkPSJNMjguOTkxIDI4aC0xLjk5djEuNjhjMCAuMTcuMTQuMzEuMzEuMzFoMS4zN2MuMTcgMCAuMzEtLjE0LjMxLS4zMXptLTE2Ljk5IDBoLTEuOTl2MS42OWMtLjAxLjE3LjEzLjMxLjMuMzFoMS4zN2MuMTggMCAuMzItLjE0LjMyLS4zMXptNS4wODggMmwtMS4zOTgtLjAxYy0uMTcgMC0uMzQtLjA1LS4zOC0uMjJsLS40OS0xLjc3aDIuMDU0bC40MzYgMS42MmMuMDQuMTctLjAyLjM3OC0uMjE2LjM4em0yLjI4OCAwYS4zMTMuMzEzIDAgMCAxLS4yNzYtLjQ1bC41OTUtMS41NWgyLjRsLS45MzUgMS42M2EuOTUuOTUgMCAwIDEtLjc0Mi4zN3oiLz48L2c+PC9zdmc+",
"anthropic://claude-3-haiku-20240307": "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIxZW0iIGhlaWdodD0iMWVtIiB2aWV3Qm94PSIwIDAgMjQgMjQiPjxwYXRoIGZpbGw9ImN1cnJlbnRDb2xvciIgZD0iTTE3LjMwNCAzLjU0MWgtMy42NzJsNi42OTYgMTYuOTE4SDI0Wm0tMTAuNjA4IDBMMCAyMC40NTloMy43NDRsMS4zNy0zLjU1M2g3LjAwNWwxLjM2OSAzLjU1M2gzLjc0NEwxMC41MzYgMy41NDFabS0uMzcxIDEwLjIyM0w4LjYxNiA3LjgybDIuMjkxIDUuOTQ1WiIvPjwvc3ZnPg==",
}
AGGREGATORS = ["openai://gpt-4", "openai://gpt-3.5-turbo"]
def anthropic_streamlit_streamer(stream):
"""
Process the Anthropic streaming response and yield content from the deltas.
:param stream: Streaming object from Anthropic API
:return: Yields content (text) from the streaming response.
"""
for event in stream:
if hasattr(event, "type"):
# Handle content blocks
if event.type == "content_block_delta" and hasattr(event, "delta"):
# Extract text delta from the event
text_delta = getattr(event.delta, "text", None)
if text_delta:
yield text_delta
# Handle message completion events (optional if needed)
elif event.type == "message_stop":
break # End of message, stop streaming
def google_streamlit_streamer(stream):
for chunk in stream:
yield chunk.text
def together_streamlit_streamer(stream):
for chunk in stream:
yield chunk.choices[0].delta.content
# Helper functions for LLM council and aggregator selection
def llm_council_selector():
selected_council = st.radio(
"Choose a council configuration", options=list(LLM_COUNCIL_MEMBERS.keys())
)
return LLM_COUNCIL_MEMBERS[selected_council]
def aggregator_selector():
return st.radio("Choose an aggregator LLM", options=AGGREGATORS)
# API calls for different providers
def get_openai_response(model_name, prompt):
return openai_client.chat.completions.create(
model=model_name,
messages=[{"role": "user", "content": prompt}],
stream=True,
)
# https://docs.anthropic.com/en/api/messages-streaming
def get_anthropic_response(model_name, prompt):
return anthropic_client.messages.create(
max_tokens=1024,
messages=[{"role": "user", "content": prompt}],
model=model_name,
stream=True,
)
def get_together_response(model_name, prompt):
return together_client.chat.completions.create(
model=model_name,
messages=[{"role": "user", "content": prompt}],
stream=True,
)
# https://ai.google.dev/gemini-api/docs/text-generation?lang=python
def get_google_response(model_name, prompt):
model = genai.GenerativeModel(model_name)
return model.generate_content(prompt, stream=True)
def get_llm_response(model_identifier, prompt):
provider, model_name = model_identifier.split("://")
if provider == "openai":
return get_openai_response(model_name, prompt)
elif provider == "anthropic":
return get_anthropic_response(model_name, prompt)
elif provider == "together":
return get_together_response(model_name, prompt)
elif provider == "vertex":
return get_google_response(model_name, prompt)
else:
return None
# Main Streamlit App
def main():
st.set_page_config(
page_title="Language Model Council Sandbox", page_icon="🏛️", layout="wide"
)
# Custom CSS for the chat display
center_css = """
<style>
h1, h2, h3, h6 { text-align: center; }
.chat-container {
display: flex;
align-items: flex-start;
margin-bottom: 10px;
}
.avatar {
width: 50px;
margin-right: 10px;
}
.message {
background-color: #f1f1f1;
padding: 10px;
border-radius: 10px;
width: 100%;
}
</style>
"""
st.markdown(center_css, unsafe_allow_html=True)
# App title and description
st.title("Language Model Council Sandbox")
st.markdown("###### Invoke a council of LLMs to generate and judge each other.")
st.markdown("###### [ArXiv Paper](https://arxiv.org/abs/2406.08598)")
# Authentication system
if "authenticated" not in st.session_state:
st.session_state.authenticated = False
cols = st.columns([2, 1, 2])
if not st.session_state.authenticated:
with cols[1]:
password = st.text_input("Password", type="password")
if st.button("Login", use_container_width=True):
if password == PASSWORD:
st.session_state.authenticated = True
else:
st.error("Invalid credentials")
if st.session_state.authenticated:
st.success("Logged in successfully!")
# Council and aggregator selection
selected_models = llm_council_selector()
st.write("Selected Models:", selected_models)
selected_aggregator = aggregator_selector()
st.write("Selected Aggregator:", selected_aggregator)
# Prompt input
prompt = st.text_area("Enter your prompt:")
if st.button("Submit"):
st.write("Responses:")
# Fetching and streaming responses from each selected model
for model in selected_models:
# with st.chat_message(model):
with st.chat_message(
model,
avatar=PROVIDER_TO_AVATAR_MAP[model],
):
message_placeholder = st.empty()
stream = get_llm_response(model, prompt)
if stream:
if model.startswith("anthropic"):
stream = anthropic_streamlit_streamer(stream)
elif model.startswith("vertex"):
stream = google_streamlit_streamer(stream)
elif model.startswith("together"):
stream = together_streamlit_streamer(stream)
message_placeholder.write_stream(stream)
# Constructing the aggregator prompt
aggregator_prompt = f"User prompt: {prompt}\n\n"
aggregator_prompt += "Responses from other LLMs:\n"
aggregator_prompt += "\n".join(
[
f"{model}: {st.session_state.get(model, '')}"
for model in selected_models
]
)
aggregator_prompt += "\n\nPlease provide an aggregated response."
# Fetching and streaming response from the aggregator
st.write(f"Aggregated response from {selected_aggregator}:")
with st.chat_message(selected_aggregator):
message_placeholder = st.empty()
aggregator_stream = get_llm_response(
selected_aggregator, aggregator_prompt
)
if aggregator_stream:
message_placeholder.write_stream(aggregator_stream)
else:
with cols[1]:
st.warning("Please log in to access this app.")
if __name__ == "__main__":
main()
# import streamlit as st
# from components import llm_council_selector
# st.title("LLM Council Selector")
# selected_models = llm_council_selector()
# if selected_models is not None:
# st.write("Selected Models:", selected_models)
# else:
# st.write("No models selected or component didn't return a value.")
# Choose your council.
# Pre-selected.
# Smalls: GPT-4o-mini, llama-3.1-70b, qwen-2.0-70b
# Flagships: GPT-4o, llama-3.1-405b, qwen-2.0-110b, gemini, claude-3.5-sonnet
# Best: chatgpt-4o-latest, gemini-1.5-pro-exp-0827, grok-2-2024-08-13, claude-3-5-sonnet-20240620, llama-3.1-405b-instruct
# Custom:
# Choose from a list of available models.
# All:
# All available models.
# Choose aggregator.
# Aggregators are models proficient in synthesizing responses from other models into a single, highquality output. An effective aggregator should maintain or enhance output quality even when
# integrating inputs that are of lesser quality than its own.
# Choices:
# - 4o-latest
# - gemini-1.5
# - grok-2
# - claude-3.5-sonnet
# - llama-3.1-405b-instruct
# Provide a prompt. (Or pre-canned prompts.)
# Paste chat history.
# Checkbox, enable judging.
#
# If checked, Judging config:
# Single sided
# Provide criteria. (or default).
# If pairwise, choose granularity (or default).
# Choose criteria. (or default).
# Enable position swapping?
# Go button.
# Sections.
# 1. Model outputs.
# 2. Aggregated output.
# 3. Judging underneath each output.
# Highlight in green, the output that was best, as determined by council.
# Show graph breakdown of scores and justifications. (by criteria, # wins and # losses)
# Show final overall score.
# Highlight in red, the output that was worst, as determined by council.
# Judging section.
# Show agreement matrix.
# Show bar graph of self-bias.
# Plot contrarianism vs. conviction (scatter plot)
# Show per-judge scores.
# Calculate total cost.
# Show total tokens used.
|