umar141 commited on
Commit
e515527
·
verified ·
1 Parent(s): 3fe5e19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -63
app.py CHANGED
@@ -1,63 +1,25 @@
1
- import streamlit as st
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
3
- import torch
4
- import openai
5
-
6
- st.set_page_config(page_title="Baro - Emotional AI", layout="centered")
7
-
8
- st.title("🧠 Baro - Emotion-Aware AI (Gemma 1B)")
9
- st.markdown("Interact with your emotionally intelligent AI assistant fine-tuned on Gemma 1B.")
10
-
11
- # Inference Mode Selector
12
- mode = st.radio("Choose Inference Mode:", ["Transformers (local)", "vLLM API (remote)"])
13
-
14
- # Shared Input
15
- prompt = st.text_area("🗣️ Your Message:", height=200)
16
-
17
- if st.button("🔮 Generate Response"):
18
- if not prompt.strip():
19
- st.warning("Please enter a message.")
20
- else:
21
- with st.spinner("Baro is thinking..."):
22
-
23
- # Transformers Mode
24
- if mode == "Transformers (local)":
25
- try:
26
- tokenizer = AutoTokenizer.from_pretrained("umar141/Gemma_1B_Baro_v2_vllm")
27
- model = AutoModelForCausalLM.from_pretrained(
28
- "umar141/Gemma_1B_Baro_v2_vllm",
29
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
30
- device_map="auto"
31
- )
32
- input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
33
- streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
34
-
35
- _ = model.generate(
36
- input_ids=input_ids,
37
- max_new_tokens=200,
38
- do_sample=True,
39
- top_p=0.9,
40
- temperature=0.7,
41
- streamer=streamer,
42
- )
43
- except Exception as e:
44
- st.error(f"Error in Transformers mode: {e}")
45
-
46
- # vLLM API Mode
47
- else:
48
- api_url = st.text_input("vLLM Server Base URL", value="http://localhost:8000/v1")
49
- if api_url:
50
- openai.api_key = "EMPTY"
51
- openai.base_url = api_url
52
-
53
- try:
54
- response = openai.ChatCompletion.create(
55
- model="umar141/Gemma_1B_Baro_v2_vllm",
56
- messages=[
57
- {"role": "system", "content": "You are Baro, an emotionally intelligent assistant."},
58
- {"role": "user", "content": prompt}
59
- ]
60
- )
61
- st.success(response.choices[0].message["content"])
62
- except Exception as e:
63
- st.error(f"Error in vLLM API mode: {e}")
 
1
+ from transformers import GemmaForCausalLM, AutoTokenizer
2
+
3
+ # Load tokenizer
4
+ tokenizer = AutoTokenizer.from_pretrained("umar141/Gemma_1B_Baro_v2_vllm")
5
+
6
+ # Load model
7
+ model = GemmaForCausalLM.from_pretrained(
8
+ "umar141/Gemma_1B_Baro_v2_vllm",
9
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
10
+ device_map="auto"
11
+ )
12
+
13
+ # Tokenize prompt
14
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
15
+
16
+ # Generate
17
+ outputs = model.generate(
18
+ input_ids=input_ids,
19
+ max_new_tokens=200,
20
+ do_sample=True,
21
+ top_p=0.9,
22
+ temperature=0.7,
23
+ )
24
+
25
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)