Bartix84 commited on
Commit
6b01707
·
verified ·
1 Parent(s): e08680b

Upload 4 files

Browse files
Files changed (4) hide show
  1. .streamlit/secrets.toml +1 -0
  2. fondo.jpeg +0 -0
  3. main.py +241 -0
  4. portada3.jpg +0 -0
.streamlit/secrets.toml ADDED
@@ -0,0 +1 @@
 
 
1
+ OPENROUTER_API_KEY = "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"
fondo.jpeg ADDED
main.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #--------------------IMPORTED LIBRARIES-----------------------------
2
+
3
+ import streamlit as st
4
+ import base64
5
+ import json
6
+ import faiss
7
+ import torch
8
+ from transformers import AutoTokenizer, AutoModel
9
+ import torch.nn.functional as F
10
+ import httpx
11
+ from huggingface_hub import hf_hub_download
12
+
13
+ # ---------------------- INITIAL CONFIGURATION ----------------------
14
+
15
+ st.set_page_config(page_title="PoliticBot", layout="wide")
16
+
17
+ with open("fondo.jpeg", "rb") as f:
18
+ img_bytes = f.read()
19
+ encoded_img = base64.b64encode(img_bytes).decode()
20
+
21
+ st.markdown(f"""
22
+ <style>
23
+ .stApp {{
24
+ background-image: url("data:image/jpeg;base64,{encoded_img}");
25
+ background-size: cover;
26
+ background-repeat: no-repeat;
27
+ background-attachment: fixed;
28
+ }}
29
+ </style>
30
+ """, unsafe_allow_html=True)
31
+
32
+ # -------------------------- STYLE CUSTOMIZATION-------------------------
33
+
34
+ st.markdown("""
35
+ <style>
36
+ section[data-testid="stSidebar"] {
37
+ background-color: rgba(0, 0, 50, 0.6);
38
+ color: white;
39
+ }
40
+
41
+ h1, h2, h3, h4, h5, h6, p, label, div, span {
42
+ color: white !important;
43
+ }
44
+ textarea {
45
+ color: white !important;
46
+ background-color: rgba(0, 0, 0, 0.3) !important;
47
+ border: 1px solid #ccc !important;
48
+ border-radius: 8px !important;
49
+ padding: 0.5em !important;
50
+ }
51
+ ::placeholder {
52
+ color: #ccc !important;
53
+ }
54
+ pre, code {
55
+ background-color: rgba(0, 0, 0, 0.4) !important;
56
+ color: white !important;
57
+ border-radius: 8px !important;
58
+ padding: 0.5em !important;
59
+ }
60
+ /* SOLO APLICA A BOTONES DEL SIDEBAR */
61
+ section[data-testid="stSidebar"] div[data-testid="stButton"] > button {
62
+ background-color: #526366 !important;
63
+ color: white !important;
64
+ font-weight: bold;
65
+ font-size: 16px;
66
+ border-radius: 8px;
67
+ padding: 0.6em;
68
+ width: 80% !important;
69
+ margin-bottom: 0.5em;
70
+ }
71
+ /* APLICA A BOTONES FUERA DEL SIDEBAR (ej: Send question) */
72
+ div[data-testid="stButton"] > button {
73
+ background-color: #526366 !important;
74
+ color: white !important;
75
+ font-weight: bold;
76
+ font-size: 16px;
77
+ border-radius: 8px;
78
+ padding: 0.6em;
79
+ margin-top: 1em;
80
+ }
81
+ </style>
82
+ """, unsafe_allow_html=True)
83
+
84
+ # ---------------------- LIBRARIES AND MODELS ----------------------
85
+
86
+ ideology_families = ["Communism", "Liberalism", "Conservatism", "Fascism", "Radical_Left"]
87
+
88
+ ideology_keywords = {
89
+ "Communism": ["communism", "marxism", "marxist", "anarcho-communism", "leninism"],
90
+ "Liberalism": ["liberalism", "libertarianism", "classical liberal"],
91
+ "Conservatism": ["conservatism", "traditional conservatism", "neoconservatism"],
92
+ "Fascism": ["fascism", "nazism", "national socialism"],
93
+ "Radical_Left": ["radical left", "far-left", "revolutionary socialism", "anarchism"]
94
+ }
95
+
96
+ @st.cache_resource
97
+ def load_encoder():
98
+ model_name = "intfloat/e5-base-v2"
99
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
100
+ model = AutoModel.from_pretrained(model_name).to("cuda" if torch.cuda.is_available() else "cpu")
101
+ return tokenizer, model
102
+
103
+ tokenizer, model = load_encoder()
104
+
105
+ def mean_pooling(output, mask):
106
+ token_embeddings = output.last_hidden_state
107
+ input_mask_expanded = mask.unsqueeze(-1).expand(token_embeddings.size())
108
+ return (token_embeddings * input_mask_expanded).sum(1) / input_mask_expanded.sum(1)
109
+
110
+ def embed_query(query):
111
+ prefixed = f"query: {query}"
112
+ inputs = tokenizer(prefixed, return_tensors='pt', truncation=True, padding=True, max_length=512)
113
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
114
+ with torch.no_grad():
115
+ outputs = model(**inputs)
116
+ pooled = mean_pooling(outputs, inputs["attention_mask"])
117
+ return F.normalize(pooled, p=2, dim=1).cpu().numpy().astype("float32")
118
+
119
+ @st.cache_resource
120
+ def load_data_global():
121
+ chunks_path = hf_hub_download(repo_id="Bartix84/politicbot-data", filename="chunks.jsonl", repo_type="dataset")
122
+ index_path = hf_hub_download(repo_id="Bartix84/politicbot-data", filename="faiss_index.index", repo_type="dataset")
123
+ metadata_path = hf_hub_download(repo_id="Bartix84/politicbot-data", filename="metadata_titles.json", repo_type="dataset")
124
+
125
+ index = faiss.read_index(index_path)
126
+
127
+ with open(metadata_path, "r", encoding="utf-8") as f:
128
+ metadata = json.load(f)
129
+
130
+ with open(chunks_path, "r", encoding="utf-8") as f:
131
+ chunks = [json.loads(line) for line in f]
132
+
133
+ return index, metadata, chunks
134
+
135
+ def search_in_global_index(query_embedding, index, metadata, chunks, selected_ideology, k=5):
136
+ _, indices = index.search(query_embedding, k * 8)
137
+ results = []
138
+ keywords = ideology_keywords.get(selected_ideology, [])
139
+ seen_titles = set()
140
+
141
+ for i in range(indices.shape[1]):
142
+ idx = indices[0][i]
143
+ title = metadata[idx]
144
+ if title in seen_titles:
145
+ continue
146
+ seen_titles.add(title)
147
+ match = next((chunk for chunk in chunks if chunk["title"] == title), None)
148
+ if match:
149
+ title_text = title.lower()
150
+ if any(keyword in title_text for keyword in keywords):
151
+ results.append(match)
152
+ if len(results) >= k:
153
+ break
154
+ return results
155
+
156
+ def generate_rag_response(ideology, user_query, context_chunks):
157
+ context = "\n\n".join(chunk["chunk"] for chunk in context_chunks)[:1500]
158
+
159
+ system_prompt = f"You are a political assistant who thinks and reasons like a {ideology} thinker."
160
+
161
+ user_prompt = f"""
162
+ Answer the following political or ethical question based strictly on the CONTEXT provided.
163
+ Think according to the principles and values of {ideology}. If the context is insufficient, clearly say so or explain its limitations.
164
+ Avoid always starting your answer the same way. Vary the introduction while staying formal and ideologically grounded.
165
+ CONTEXT:
166
+ {context}
167
+ QUESTION:
168
+ {user_query}
169
+ ANSWER:"""
170
+
171
+ headers = {
172
+ "Authorization": f"Bearer {st.secrets['OPENROUTER_API_KEY']}",
173
+ "HTTP-Referer": "https://yourappname.streamlit.app",
174
+ "X-Title": "PoliticBot"
175
+ }
176
+
177
+ payload = {
178
+ "model": "mistralai/mistral-7b-instruct",
179
+ "messages": [
180
+ {"role": "system", "content": system_prompt},
181
+ {"role": "user", "content": user_prompt}
182
+ ],
183
+ "temperature": 0.9,
184
+ "max_tokens": 768,
185
+ "top_p": 0.95
186
+ }
187
+
188
+ response = httpx.post(
189
+ "https://openrouter.ai/api/v1/chat/completions",
190
+ headers=headers,
191
+ json=payload,
192
+ timeout=60
193
+ )
194
+
195
+ if response.status_code != 200:
196
+ return f"❌ Error {response.status_code}: {response.text}"
197
+
198
+ return response.json()["choices"][0]["message"]["content"].strip()
199
+
200
+ # ---------------------- STREAMLIT INTERFACE ----------------------
201
+
202
+ st.image('portada3.jpg', use_container_width=True)
203
+ st.title('🗳️ PoliticBot')
204
+ st.subheader('Reasoning with political ideologies')
205
+
206
+ with st.sidebar:
207
+ st.header("Choose a political ideology")
208
+
209
+ if "selected_ideology" not in st.session_state:
210
+ st.session_state.selected_ideology = None
211
+
212
+ for ideology in ideology_families:
213
+ if st.button(ideology):
214
+ st.session_state.selected_ideology = ideology
215
+
216
+ selected_ideology = st.session_state.selected_ideology
217
+
218
+ if selected_ideology:
219
+ st.write(f"You have selected: **{selected_ideology}**")
220
+ user_query = st.text_area("Write your question or political dilemma:", height=100)
221
+
222
+ if st.button("Send question"):
223
+ if user_query.strip() == "":
224
+ st.warning("Write a question before continuing.")
225
+ else:
226
+ with st.spinner("Thinking like that ideology..."):
227
+ query_emb = embed_query(user_query + " in the context of " + selected_ideology)
228
+ index, metadata, chunks = load_data_global()
229
+ context = search_in_global_index(query_emb, index, metadata, chunks, selected_ideology, k=5)
230
+ response = generate_rag_response(selected_ideology, user_query, context)
231
+
232
+ st.subheader("🤖 Generated response:")
233
+ st.markdown(f"> {response}")
234
+
235
+ with st.expander("🌐 Display the context used"):
236
+ if not context:
237
+ st.markdown("*No relevant context found.*")
238
+ else:
239
+ for chunk in context:
240
+ st.markdown(f"**{chunk['title']}**")
241
+ st.code(chunk["chunk"][:500] + "...")
portada3.jpg ADDED