import os import streamlit as st import dotenv import openai from openai import OpenAI import anthropic from together import Together import google.generativeai as genai import time dotenv.load_dotenv() PASSWORD = os.getenv("APP_PASSWORD") # Load API keys from environment variables OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY") GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY") # Initialize API clients together_client = Together(api_key=TOGETHER_API_KEY) genai.configure(api_key=GOOGLE_API_KEY) # Set up API clients for OpenAI and Anthropic openai.api_key = OPENAI_API_KEY openai_client = OpenAI( organization="org-kUoRSK0nOw4W2nQYMVGWOt03", project="proj_zb6k1DdgnSEbiAEMWxSOVVu4", ) # anthropic_client = anthropic.Client(api_key=ANTHROPIC_API_KEY) anthropic_client = anthropic.Anthropic() LLM_COUNCIL_MEMBERS = { "Smalls": [ "openai://gpt-4o-mini", "together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", "vertex://gemini-1.5-flash-001", "anthropic://claude-3-haiku-20240307", ], "Flagships": [ "openai://gpt-4", "together://meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", "vertex://gemini-1.5-pro-001", "anthropic://claude-3-5-sonnet", ], } PROVIDER_TO_AVATAR_MAP = { "openai://gpt-4o-mini": "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIwLjk5ZW0iIGhlaWdodD0iMWVtIiB2aWV3Qm94PSIwIDAgMjU2IDI2MCI+PHBhdGggZD0iTTIzOS4xODQgMTA2LjIwM2E2NC43MiA2NC43MiAwIDAgMC01LjU3Ni01My4xMDNDMjE5LjQ1MiAyOC40NTkgMTkxIDE1Ljc4NCAxNjMuMjEzIDIxLjc0QTY1LjU4NiA2NS41ODYgMCAwIDAgNTIuMDk2IDQ1LjIyYTY0LjcyIDY0LjcyIDAgMCAwLTQzLjIzIDMxLjM2Yy0xNC4zMSAyNC42MDItMTEuMDYxIDU1LjYzNCA4LjAzMyA3Ni43NGE2NC42NyA2NC42NyAwIDAgMCA1LjUyNSA1My4xMDJjMTQuMTc0IDI0LjY1IDQyLjY0NCAzNy4zMjQgNzAuNDQ2IDMxLjM2YTY0LjcyIDY0LjcyIDAgMCAwIDQ4Ljc1NCAyMS43NDRjMjguNDgxLjAyNSA1My43MTQtMTguMzYxIDYyLjQxNC00NS40ODFhNjQuNzcgNjQuNzcgMCAwIDAgNDMuMjI5LTMxLjM2YzE0LjEzNy0yNC41NTggMTAuODc1LTU1LjQyMy04LjA4My03Ni40ODNtLTk3LjU2IDEzNi4zMzhhNDguNCA0OC40IDAgMCAxLTMxLjEwNS0xMS4yNTVsMS41MzUtLjg3bDUxLjY3LTI5LjgyNWE4LjYgOC42IDAgMCAwIDQuMjQ3LTcuMzY3di03Mi44NWwyMS44NDUgMTIuNjM2Yy4yMTguMTExLjM3LjMyLjQwOS41NjN2NjAuMzY3Yy0uMDU2IDI2LjgxOC0yMS43ODMgNDguNTQ1LTQ4LjYwMSA0OC42MDFNMzcuMTU4IDE5Ny45M2E0OC4zNSA0OC4zNSAwIDAgMS01Ljc4MS0zMi41ODlsMS41MzQuOTIxbDUxLjcyMiAyOS44MjZhOC4zNCA4LjM0IDAgMCAwIDguNDQxIDBsNjMuMTgxLTM2LjQyNXYyNS4yMjFhLjg3Ljg3IDAgMCAxLS4zNTguNjY1bC01Mi4zMzUgMzAuMTg0Yy0yMy4yNTcgMTMuMzk4LTUyLjk3IDUuNDMxLTY2LjQwNC0xNy44MDNNMjMuNTQ5IDg1LjM4YTQ4LjUgNDguNSAwIDAgMSAyNS41OC0yMS4zMzN2NjEuMzlhOC4yOSA4LjI5IDAgMCAwIDQuMTk1IDcuMzE2bDYyLjg3NCAzNi4yNzJsLTIxLjg0NSAxMi42MzZhLjgyLjgyIDAgMCAxLS43NjcgMEw0MS4zNTMgMTUxLjUzYy0yMy4yMTEtMTMuNDU0LTMxLjE3MS00My4xNDQtMTcuODA0LTY2LjQwNXptMTc5LjQ2NiA0MS42OTVsLTYzLjA4LTM2LjYzTDE2MS43MyA3Ny44NmEuODIuODIgMCAwIDEgLjc2OCAwbDUyLjIzMyAzMC4xODRhNDguNiA0OC42IDAgMCAxLTcuMzE2IDg3LjYzNXYtNjEuMzkxYTguNTQgOC41NCAwIDAgMC00LjQtNy4yMTNtMjEuNzQyLTMyLjY5bC0xLjUzNS0uOTIybC01MS42MTktMzAuMDgxYTguMzkgOC4zOSAwIDAgMC04LjQ5MiAwTDk5Ljk4IDk5LjgwOFY3NC41ODdhLjcyLjcyIDAgMCAxIC4zMDctLjY2NWw1Mi4yMzMtMzAuMTMzYTQ4LjY1MiA0OC42NTIgMCAwIDEgNzIuMjM2IDUwLjM5MXpNODguMDYxIDEzOS4wOTdsLTIxLjg0NS0xMi41ODVhLjg3Ljg3IDAgMCAxLS40MS0uNjE0VjY1LjY4NWE0OC42NTIgNDguNjUyIDAgMCAxIDc5Ljc1Ny0zNy4zNDZsLTEuNTM1Ljg3bC01MS42NyAyOS44MjVhOC42IDguNiAwIDAgMC00LjI0NiA3LjM2N3ptMTEuODY4LTI1LjU4TDEyOC4wNjcgOTcuM2wyOC4xODggMTYuMjE4djMyLjQzNGwtMjguMDg2IDE2LjIxOGwtMjguMTg4LTE2LjIxOHoiLz48L3N2Zz4=", "anthropic://claude-3-5-sonnet": "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIxZW0iIGhlaWdodD0iMWVtIiB2aWV3Qm94PSIwIDAgMjQgMjQiPjxwYXRoIGZpbGw9ImN1cnJlbnRDb2xvciIgZD0iTTE3LjMwNCAzLjU0MWgtMy42NzJsNi42OTYgMTYuOTE4SDI0Wm0tMTAuNjA4IDBMMCAyMC40NTloMy43NDRsMS4zNy0zLjU1M2g3LjAwNWwxLjM2OSAzLjU1M2gzLjc0NEwxMC41MzYgMy41NDFabS0uMzcxIDEwLjIyM0w4LjYxNiA3LjgybDIuMjkxIDUuOTQ1WiIvPjwvc3ZnPg==", "vertex://gemini-1.5-flash-001": "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIxZW0iIGhlaWdodD0iMWVtIiB2aWV3Qm94PSIwIDAgMjQgMjQiPjxwYXRoIGZpbGw9IiM0MjY4ZmYiIGQ9Ik0yNCAxMi4wMjRjLTYuNDM3LjM4OC0xMS41OSA1LjUzOS0xMS45NzcgMTEuOTc2aC0uMDQ3QzExLjU4OCAxNy41NjMgNi40MzYgMTIuNDEyIDAgMTIuMDI0di0uMDQ3QzYuNDM3IDExLjU4OCAxMS41ODggNi40MzcgMTEuOTc2IDBoLjA0N2MuMzg4IDYuNDM3IDUuNTQgMTEuNTg4IDExLjk3NyAxMS45Nzd6Ii8+PC9zdmc+", "together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIxZW0iIGhlaWdodD0iMWVtIiB2aWV3Qm94PSIwIDAgMzIgMzIiPjxnIGZpbGw9Im5vbmUiPjxwYXRoIGZpbGw9IiNiNGFjYmMiIGQ9Ik0yMC44NzEgMjQuNzh2LTYuMDZoMy4wMXY1Ljc3YzAgLjM0LS4xMi42Mi0uMzEuOTRsLTIuNDEgNC4yYy0uMTguMjMtLjQ1LjM3LS43NS4zN2gtMS4wM2MtLjIzIDAtLjM4LS4yNC0uMjgtLjQ1bDEuNjctNC4zNWMuMDctLjEzLjEtLjI3LjEtLjQyTTE3LjA5MSAzMGMuMiAwIC4yNi0uMjEuMjItLjM4bC0yLjMyLTguNjFoLTIuOTlsLjg1IDMuNTVjLjE5LjcxLjY2IDEuMzIgMS4zIDEuNjljLjE0LjA4LjI1LjIyLjI5LjM4bC44NyAzLjE0Yy4wNC4xNy4yMS4yMi4zOC4yMnoiLz48cGF0aCBmaWxsPSIjY2RjNGQ2IiBkPSJtMjguNTQxIDIzLjA4bC0xLjI3LS45NmEuOTQuOTQgMCAwIDEtLjI3LS42NnYtMi43NWMwLS42NC0uMTYtMS4yMzgtLjQ0Mi0xLjc2cS4yMTMuMDUuNDQyLjA1YTIgMiAwIDEgMC0xLjk0OS0xLjU0OWEzLjggMy44IDAgMCAwLTEuOC0uNDUxaC04LjE3Yy0uNjYgMC0xLjI3LS40Mi0xLjU3LTEuMDFMMTAuMDQxIDMuNWEyLjIzIDIuMjMgMCAwIDAtMi4xLTEuNWMtLjE4IDAtLjMuMTctLjI0LjM0TDguNTcxIDVjLS4yIDAtMS4wNy4yMy0xLjg1LjczbC0uODA2LjQ5OEw3LjAwMiAxMHY4LjI2YzAgMi4wMSAxLjI1IDMuNzIgMy4wMSA0LjQxdjcuMDJjLS4wMS4xNy4xMy4zMS4zLjMxaDEuMzdjLjE4IDAgLjMyLS4xNC4zMi0uMzF2LTEuOTZjMC0uMTcuMDctLjMyLjE4LS40NGMuNTMtLjUyLjgyLTEuMjMuODItMS45N1YyM2g1LjA3YzEuMjcgMCAyLjQ5LS41NSAzLjMzLTEuNWMwIC45NC40OCAxLjcyIDEuMzggMi4zMmwzLjk2IDIuNDNjLjE2LjExLjI2LjMuMjYuNXYyLjkzYzAgLjE3LjE0LjMxLjMxLjMxaDEuMzdjLjE3IDAgLjMxLS4xNC4zMS0uMzF2LTUuNTFjLjAxLS40LS4xNS0uOC0uNDUtMS4wOSIvPjxwYXRoIGZpbGw9IiNmM2FkNjEiIGQ9Ik02Ljg0MSA2Yy0uMzYgMC0uNzIuMS0xLjAzLjI5bC0yLjE5IDEuMzVjLS4zNi4yMy0uNi42MS0uNjIgMS4wM2MtLjAzLjczLjU1IDEuMzMgMS4yNyAxLjMzaDMuNDljLjU3IDAgMS4wNC0uNDcgMS4wNC0xLjA1di0xYzAtMS4wNy0uODgtMS45NS0xLjk2LTEuOTUiLz48cGF0aCBmaWxsPSIjMWMxYzFjIiBkPSJNNi41IDhhLjUuNSAwIDEgMCAwLTFhLjUuNSAwIDAgMCAwIDFtLTEuOTk5LjVjMC0uMjgtLjIyLS41LS41LS41aC0uNzZhMS4yIDEuMiAwIDAgMC0uMjEgMWguOTdjLjI4IDAgLjUtLjIyLjUtLjUiLz48cGF0aCBmaWxsPSIjZjNhZDYxIiBkPSJNMjguOTkxIDI4aC0xLjk5djEuNjhjMCAuMTcuMTQuMzEuMzEuMzFoMS4zN2MuMTcgMCAuMzEtLjE0LjMxLS4zMXptLTE2Ljk5IDBoLTEuOTl2MS42OWMtLjAxLjE3LjEzLjMxLjMuMzFoMS4zN2MuMTggMCAuMzItLjE0LjMyLS4zMXptNS4wODggMmwtMS4zOTgtLjAxYy0uMTcgMC0uMzQtLjA1LS4zOC0uMjJsLS40OS0xLjc3aDIuMDU0bC40MzYgMS42MmMuMDQuMTctLjAyLjM3OC0uMjE2LjM4em0yLjI4OCAwYS4zMTMuMzEzIDAgMCAxLS4yNzYtLjQ1bC41OTUtMS41NWgyLjRsLS45MzUgMS42M2EuOTUuOTUgMCAwIDEtLjc0Mi4zN3oiLz48L2c+PC9zdmc+", "anthropic://claude-3-haiku-20240307": "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIxZW0iIGhlaWdodD0iMWVtIiB2aWV3Qm94PSIwIDAgMjQgMjQiPjxwYXRoIGZpbGw9ImN1cnJlbnRDb2xvciIgZD0iTTE3LjMwNCAzLjU0MWgtMy42NzJsNi42OTYgMTYuOTE4SDI0Wm0tMTAuNjA4IDBMMCAyMC40NTloMy43NDRsMS4zNy0zLjU1M2g3LjAwNWwxLjM2OSAzLjU1M2gzLjc0NEwxMC41MzYgMy41NDFabS0uMzcxIDEwLjIyM0w4LjYxNiA3LjgybDIuMjkxIDUuOTQ1WiIvPjwvc3ZnPg==", } AGGREGATORS = ["openai://gpt-4", "openai://gpt-3.5-turbo"] def anthropic_streamlit_streamer(stream): """ Process the Anthropic streaming response and yield content from the deltas. :param stream: Streaming object from Anthropic API :return: Yields content (text) from the streaming response. """ for event in stream: if hasattr(event, "type"): # Handle content blocks if event.type == "content_block_delta" and hasattr(event, "delta"): # Extract text delta from the event text_delta = getattr(event.delta, "text", None) if text_delta: yield text_delta # Handle message completion events (optional if needed) elif event.type == "message_stop": break # End of message, stop streaming def google_streamlit_streamer(stream): for chunk in stream: yield chunk.text def together_streamlit_streamer(stream): for chunk in stream: yield chunk.choices[0].delta.content # Helper functions for LLM council and aggregator selection def llm_council_selector(): selected_council = st.radio( "Choose a council configuration", options=list(LLM_COUNCIL_MEMBERS.keys()) ) return LLM_COUNCIL_MEMBERS[selected_council] def aggregator_selector(): return st.radio("Choose an aggregator LLM", options=AGGREGATORS) # API calls for different providers def get_openai_response(model_name, prompt): return openai_client.chat.completions.create( model=model_name, messages=[{"role": "user", "content": prompt}], stream=True, ) # https://docs.anthropic.com/en/api/messages-streaming def get_anthropic_response(model_name, prompt): return anthropic_client.messages.create( max_tokens=1024, messages=[{"role": "user", "content": prompt}], model=model_name, stream=True, ) def get_together_response(model_name, prompt): return together_client.chat.completions.create( model=model_name, messages=[{"role": "user", "content": prompt}], stream=True, ) # https://ai.google.dev/gemini-api/docs/text-generation?lang=python def get_google_response(model_name, prompt): model = genai.GenerativeModel(model_name) return model.generate_content(prompt, stream=True) def get_llm_response(model_identifier, prompt): provider, model_name = model_identifier.split("://") if provider == "openai": return get_openai_response(model_name, prompt) elif provider == "anthropic": return get_anthropic_response(model_name, prompt) elif provider == "together": return get_together_response(model_name, prompt) elif provider == "vertex": return get_google_response(model_name, prompt) else: return None # Main Streamlit App def main(): st.set_page_config( page_title="Language Model Council Sandbox", page_icon="🏛️", layout="wide" ) # Custom CSS for the chat display center_css = """ """ st.markdown(center_css, unsafe_allow_html=True) # App title and description st.title("Language Model Council Sandbox") st.markdown("###### Invoke a council of LLMs to generate and judge each other.") st.markdown("###### [ArXiv Paper](https://arxiv.org/abs/2406.08598)") # Authentication system if "authenticated" not in st.session_state: st.session_state.authenticated = False cols = st.columns([2, 1, 2]) if not st.session_state.authenticated: with cols[1]: password = st.text_input("Password", type="password") if st.button("Login", use_container_width=True): if password == PASSWORD: st.session_state.authenticated = True else: st.error("Invalid credentials") if st.session_state.authenticated: st.success("Logged in successfully!") # Council and aggregator selection selected_models = llm_council_selector() st.write("Selected Models:", selected_models) selected_aggregator = aggregator_selector() st.write("Selected Aggregator:", selected_aggregator) # Prompt input prompt = st.text_area("Enter your prompt:") if st.button("Submit"): st.write("Responses:") # Fetching and streaming responses from each selected model for model in selected_models: # with st.chat_message(model): with st.chat_message( model, avatar=PROVIDER_TO_AVATAR_MAP[model], ): message_placeholder = st.empty() stream = get_llm_response(model, prompt) if stream: if model.startswith("anthropic"): stream = anthropic_streamlit_streamer(stream) elif model.startswith("vertex"): stream = google_streamlit_streamer(stream) elif model.startswith("together"): stream = together_streamlit_streamer(stream) message_placeholder.write_stream(stream) # Constructing the aggregator prompt aggregator_prompt = f"User prompt: {prompt}\n\n" aggregator_prompt += "Responses from other LLMs:\n" aggregator_prompt += "\n".join( [ f"{model}: {st.session_state.get(model, '')}" for model in selected_models ] ) aggregator_prompt += "\n\nPlease provide an aggregated response." # Fetching and streaming response from the aggregator st.write(f"Aggregated response from {selected_aggregator}:") with st.chat_message(selected_aggregator): message_placeholder = st.empty() aggregator_stream = get_llm_response( selected_aggregator, aggregator_prompt ) if aggregator_stream: message_placeholder.write_stream(aggregator_stream) else: with cols[1]: st.warning("Please log in to access this app.") if __name__ == "__main__": main() # import streamlit as st # from components import llm_council_selector # st.title("LLM Council Selector") # selected_models = llm_council_selector() # if selected_models is not None: # st.write("Selected Models:", selected_models) # else: # st.write("No models selected or component didn't return a value.") # Choose your council. # Pre-selected. # Smalls: GPT-4o-mini, llama-3.1-70b, qwen-2.0-70b # Flagships: GPT-4o, llama-3.1-405b, qwen-2.0-110b, gemini, claude-3.5-sonnet # Best: chatgpt-4o-latest, gemini-1.5-pro-exp-0827, grok-2-2024-08-13, claude-3-5-sonnet-20240620, llama-3.1-405b-instruct # Custom: # Choose from a list of available models. # All: # All available models. # Choose aggregator. # Aggregators are models proficient in synthesizing responses from other models into a single, highquality output. An effective aggregator should maintain or enhance output quality even when # integrating inputs that are of lesser quality than its own. # Choices: # - 4o-latest # - gemini-1.5 # - grok-2 # - claude-3.5-sonnet # - llama-3.1-405b-instruct # Provide a prompt. (Or pre-canned prompts.) # Paste chat history. # Checkbox, enable judging. # # If checked, Judging config: # Single sided # Provide criteria. (or default). # If pairwise, choose granularity (or default). # Choose criteria. (or default). # Enable position swapping? # Go button. # Sections. # 1. Model outputs. # 2. Aggregated output. # 3. Judging underneath each output. # Highlight in green, the output that was best, as determined by council. # Show graph breakdown of scores and justifications. (by criteria, # wins and # losses) # Show final overall score. # Highlight in red, the output that was worst, as determined by council. # Judging section. # Show agreement matrix. # Show bar graph of self-bias. # Plot contrarianism vs. conviction (scatter plot) # Show per-judge scores. # Calculate total cost. # Show total tokens used.