Llama3-philosophy-demo

Running on Zero

App Files Files Community

ruggsea commited on Jan 13

Commit

55c3fcf

1 Parent(s): 99e31a8

changed model, added system prompt

Browse files

Files changed (1) hide show

app.py +72 -57

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 from threading import Thread
 from typing import Iterator
 import gradio as gr
 import spaces
@@ -14,7 +15,7 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 DESCRIPTION = """\
 # Llama-3.1 8B Stanford Encyclopedia of Philosophy Chat
-This Space showcases the Llama3.1-8B-SEP-Chat model from ruggsea, a fine-tuned version of Meta's Llama 3.1 8B model, specifically tailored for philosophical discussions with a formal and informative tone. The model was trained using the Stanford Encyclopedia of Philosophy dataset and carefully crafted prompts.
 Feel free to engage in philosophical discussions and ask questions. The model supports multi-turn conversations and will maintain context.
 """
@@ -31,7 +32,7 @@ if not torch.cuda.is_available():
 # Initialize model and tokenizer
 if torch.cuda.is_available():
-    model_id = "ruggsea/Llama3.1-8B-SEP-Chat"
     model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     tokenizer.use_default_system_prompt = False
@@ -83,68 +84,82 @@ def generate(
         outputs.append(text)
         yield "".join(outputs)
-chat_interface = gr.ChatInterface(
-    fn=generate,
-    additional_inputs=[
-        gr.Textbox(
-            label="System prompt",
-            lines=6,
-            value="You are a knowledgeable philosophy professor. Provide clear, accurate responses using markdown formatting. Focus on philosophical concepts and maintain academic rigor while being accessible."
-        ),
-        gr.Slider(
-            label="Max new tokens",
-            minimum=1,
-            maximum=MAX_MAX_NEW_TOKENS,
-            step=1,
-            value=DEFAULT_MAX_NEW_TOKENS,
-        ),
-        gr.Slider(
-            label="Temperature",
-            minimum=0.1,
-            maximum=4.0,
-            step=0.1,
-            value=0.7,
-        ),
-        gr.Slider(
-            label="Top-p (nucleus sampling)",
-            minimum=0.05,
-            maximum=1.0,
-            step=0.05,
-            value=0.9,
-        ),
-        gr.Slider(
-            label="Top-k",
-            minimum=1,
-            maximum=1000,
-            step=1,
-            value=50,
-        ),
-        gr.Slider(
-            label="Repetition penalty",
-            minimum=1.0,
-            maximum=2.0,
-            step=0.05,
-            value=1.1,
-        ),
-    ],
-    stop_btn=None,
-    examples=[
-        ["What is the trolley problem and what are its main ethical implications?"],
-        ["Can you explain Plato's Theory of Forms?"],
-        ["What is the difference between analytic and continental philosophy?"],
-        ["How does Kant's Categorical Imperative work?"],
-        ["What is the problem of consciousness in philosophy of mind?"],
-    ],
-)
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown("# Philosophy Chat with Llama 3.1")
     gr.Markdown(DESCRIPTION)
     gr.DuplicateButton(
         value="Duplicate Space for private use",
         elem_id="duplicate-button"
     )
-    chat_interface.render()
     gr.Markdown(LICENSE)
 if __name__ == "__main__":

 import os
 from threading import Thread
 from typing import Iterator
+import time
 import gradio as gr
 import spaces
 DESCRIPTION = """\
 # Llama-3.1 8B Stanford Encyclopedia of Philosophy Chat
+This Space showcases the Llama3.1-Instruct-SEP-Chat model from ruggsea, a fine-tuned instruction version of Meta's Llama 3.1 8B model, specifically tailored for philosophical discussions with a formal and informative tone. The model was trained using the Stanford Encyclopedia of Philosophy dataset and carefully crafted prompts.
 Feel free to engage in philosophical discussions and ask questions. The model supports multi-turn conversations and will maintain context.
 """
 # Initialize model and tokenizer
 if torch.cuda.is_available():
+    model_id = "ruggsea/Llama3.1-Instruct-SEP-Chat"
     model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     tokenizer.use_default_system_prompt = False
         outputs.append(text)
         yield "".join(outputs)
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown("# Philosophy Chat with Llama 3.1")
     gr.Markdown(DESCRIPTION)
+    chatbot = gr.Chatbot(
+        height=500,
+        placeholder="Start your philosophical discussion here...",
+        type="messages",
+    )
+    chat_interface = gr.ChatInterface(
+        fn=generate,
+        additional_inputs=[
+            gr.Textbox(
+                label="System prompt",
+                lines=6,
+                value="You are a knowledgeable philosophy professor using the Stanford Encyclopedia of Philosophy as your knowledge base. Provide clear, accurate responses using markdown formatting. Focus on philosophical concepts and maintain academic rigor while being accessible. Always cite relevant philosophers and concepts."
+            ),
+            gr.Slider(
+                label="Max new tokens",
+                minimum=1,
+                maximum=MAX_MAX_NEW_TOKENS,
+                step=1,
+                value=DEFAULT_MAX_NEW_TOKENS,
+            ),
+            gr.Slider(
+                label="Temperature",
+                minimum=0.1,
+                maximum=4.0,
+                step=0.1,
+                value=0.7,
+            ),
+            gr.Slider(
+                label="Top-p (nucleus sampling)",
+                minimum=0.05,
+                maximum=1.0,
+                step=0.05,
+                value=0.9,
+            ),
+            gr.Slider(
+                label="Top-k",
+                minimum=1,
+                maximum=1000,
+                step=1,
+                value=50,
+            ),
+            gr.Slider(
+                label="Repetition penalty",
+                minimum=1.0,
+                maximum=2.0,
+                step=0.05,
+                value=1.1,
+            ),
+        ],
+        stop_btn=None,
+        examples=[
+            ["What is the trolley problem and what are its main ethical implications?"],
+            ["Can you explain Plato's Theory of Forms?"],
+            ["What is the difference between analytic and continental philosophy?"],
+            ["How does Kant's Categorical Imperative work?"],
+            ["What is the problem of consciousness in philosophy of mind?"],
+        ],
+        chatbot=chatbot,
+        title="Philosophy Chat with Llama 3.1",
+        description="Ask philosophical questions and engage in deep discussions",
+        theme="soft",
+        flagging_mode="manual",  # Enable user feedback
+        flagging_options=["Helpful", "Incorrect", "Unclear"],
+        save_history=True,  # Enable chat history persistence
+    )
     gr.DuplicateButton(
         value="Duplicate Space for private use",
         elem_id="duplicate-button"
     )
     gr.Markdown(LICENSE)
 if __name__ == "__main__":