Spaces:
Sleeping
Sleeping
Update modules/studentact/current_situation_analysis.py
Browse files
modules/studentact/current_situation_analysis.py
CHANGED
|
@@ -35,24 +35,234 @@ def display_current_situation_visual(doc, metrics):
|
|
| 35 |
logger.error(f"Error mostrando visualizaciones: {str(e)}")
|
| 36 |
st.error("Error al generar visualizaciones")
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
def create_vocabulary_network(doc):
|
| 39 |
"""
|
| 40 |
Genera el grafo de red de vocabulario.
|
| 41 |
-
Reutiliza la l贸gica de visualizaci贸n de grafos sem谩nticos.
|
| 42 |
"""
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
def create_syntax_complexity_graph(doc):
|
| 47 |
"""
|
| 48 |
Genera el diagrama de arco de complejidad sint谩ctica.
|
| 49 |
-
|
| 50 |
-
"""
|
| 51 |
-
# Implementaci贸n similar a nuestros diagramas de arco existentes
|
| 52 |
-
pass
|
| 53 |
-
|
| 54 |
-
def create_cohesion_heatmap(doc):
|
| 55 |
-
"""
|
| 56 |
-
Genera el mapa de calor de cohesi贸n textual.
|
| 57 |
"""
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
logger.error(f"Error mostrando visualizaciones: {str(e)}")
|
| 36 |
st.error("Error al generar visualizaciones")
|
| 37 |
|
| 38 |
+
def analyze_text_dimensions(doc):
|
| 39 |
+
"""
|
| 40 |
+
Analiza las diferentes dimensiones del texto.
|
| 41 |
+
|
| 42 |
+
Args:
|
| 43 |
+
doc: Documento procesado por spaCy
|
| 44 |
+
|
| 45 |
+
Returns:
|
| 46 |
+
dict: M茅tricas del an谩lisis
|
| 47 |
+
"""
|
| 48 |
+
try:
|
| 49 |
+
# Analizar claridad (basado en longitud de oraciones)
|
| 50 |
+
clarity_score = analyze_clarity(doc)
|
| 51 |
+
|
| 52 |
+
# Analizar vocabulario (diversidad l茅xica)
|
| 53 |
+
vocabulary_score = analyze_vocabulary_diversity(doc)
|
| 54 |
+
|
| 55 |
+
# Analizar cohesi贸n (conexiones entre oraciones)
|
| 56 |
+
cohesion_score = analyze_cohesion(doc)
|
| 57 |
+
|
| 58 |
+
# Analizar estructura (complejidad sint谩ctica)
|
| 59 |
+
structure_score = analyze_structure(doc)
|
| 60 |
+
|
| 61 |
+
# Generar gr谩ficos
|
| 62 |
+
sentence_graphs = generate_sentence_graphs(doc)
|
| 63 |
+
word_connections = generate_word_connections(doc)
|
| 64 |
+
connection_paths = generate_connection_paths(doc)
|
| 65 |
+
|
| 66 |
+
return {
|
| 67 |
+
'clarity': clarity_score,
|
| 68 |
+
'vocabulary': vocabulary_score,
|
| 69 |
+
'cohesion': cohesion_score,
|
| 70 |
+
'structure': structure_score,
|
| 71 |
+
'sentence_graphs': sentence_graphs,
|
| 72 |
+
'word_connections': word_connections,
|
| 73 |
+
'connection_paths': connection_paths
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
except Exception as e:
|
| 77 |
+
logger.error(f"Error en analyze_text_dimensions: {str(e)}")
|
| 78 |
+
raise
|
| 79 |
+
|
| 80 |
+
def analyze_clarity(doc):
|
| 81 |
+
"""Analiza la claridad basada en longitud de oraciones"""
|
| 82 |
+
sentences = list(doc.sents)
|
| 83 |
+
avg_length = sum(len(sent) for sent in sentences) / len(sentences)
|
| 84 |
+
return normalize_score(avg_length, optimal_length=20)
|
| 85 |
+
|
| 86 |
+
def analyze_vocabulary_diversity(doc):
|
| 87 |
+
"""Analiza la diversidad del vocabulario"""
|
| 88 |
+
unique_lemmas = {token.lemma_ for token in doc if token.is_alpha}
|
| 89 |
+
total_words = len([token for token in doc if token.is_alpha])
|
| 90 |
+
return len(unique_lemmas) / total_words if total_words > 0 else 0
|
| 91 |
+
|
| 92 |
+
def analyze_cohesion(doc):
|
| 93 |
+
"""Analiza la cohesi贸n textual"""
|
| 94 |
+
sentences = list(doc.sents)
|
| 95 |
+
connections = 0
|
| 96 |
+
for i in range(len(sentences)-1):
|
| 97 |
+
sent1_words = {token.lemma_ for token in sentences[i]}
|
| 98 |
+
sent2_words = {token.lemma_ for token in sentences[i+1]}
|
| 99 |
+
connections += len(sent1_words.intersection(sent2_words))
|
| 100 |
+
return normalize_score(connections, optimal_connections=5)
|
| 101 |
+
|
| 102 |
+
def analyze_structure(doc):
|
| 103 |
+
"""Analiza la complejidad estructural"""
|
| 104 |
+
root_distances = []
|
| 105 |
+
for token in doc:
|
| 106 |
+
if token.dep_ == 'ROOT':
|
| 107 |
+
depths = get_dependency_depths(token)
|
| 108 |
+
root_distances.extend(depths)
|
| 109 |
+
avg_depth = sum(root_distances) / len(root_distances) if root_distances else 0
|
| 110 |
+
return normalize_score(avg_depth, optimal_depth=3)
|
| 111 |
+
|
| 112 |
+
def get_dependency_depths(token, depth=0):
|
| 113 |
+
"""Obtiene las profundidades de dependencia"""
|
| 114 |
+
depths = [depth]
|
| 115 |
+
for child in token.children:
|
| 116 |
+
depths.extend(get_dependency_depths(child, depth + 1))
|
| 117 |
+
return depths
|
| 118 |
+
|
| 119 |
+
def normalize_score(value, optimal_value=1.0, range_factor=2.0):
|
| 120 |
+
"""Normaliza un valor a un score entre 0 y 1"""
|
| 121 |
+
return 1 / (1 + abs(value - optimal_value) / range_factor)
|
| 122 |
+
|
| 123 |
+
# Implementaci贸n de las funciones de visualizaci贸n
|
| 124 |
+
def generate_sentence_graphs(doc):
|
| 125 |
+
"""Genera visualizaciones de estructura de oraciones"""
|
| 126 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 127 |
+
# Implementar visualizaci贸n
|
| 128 |
+
plt.close()
|
| 129 |
+
return fig
|
| 130 |
+
|
| 131 |
+
def generate_word_connections(doc):
|
| 132 |
+
"""Genera red de conexiones de palabras"""
|
| 133 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 134 |
+
# Implementar visualizaci贸n
|
| 135 |
+
plt.close()
|
| 136 |
+
return fig
|
| 137 |
+
|
| 138 |
+
def generate_connection_paths(doc):
|
| 139 |
+
"""Genera patrones de conexi贸n"""
|
| 140 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 141 |
+
# Implementar visualizaci贸n
|
| 142 |
+
plt.close()
|
| 143 |
+
return fig
|
| 144 |
+
|
| 145 |
def create_vocabulary_network(doc):
|
| 146 |
"""
|
| 147 |
Genera el grafo de red de vocabulario.
|
|
|
|
| 148 |
"""
|
| 149 |
+
G = nx.Graph()
|
| 150 |
+
|
| 151 |
+
# Crear nodos para palabras significativas
|
| 152 |
+
words = [token.text.lower() for token in doc if token.is_alpha and not token.is_stop]
|
| 153 |
+
word_freq = Counter(words)
|
| 154 |
+
|
| 155 |
+
# A帽adir nodos con tama帽o basado en frecuencia
|
| 156 |
+
for word, freq in word_freq.items():
|
| 157 |
+
G.add_node(word, size=freq)
|
| 158 |
+
|
| 159 |
+
# Crear conexiones basadas en co-ocurrencia
|
| 160 |
+
window_size = 5
|
| 161 |
+
for i in range(len(words) - window_size):
|
| 162 |
+
window = words[i:i+window_size]
|
| 163 |
+
for w1, w2 in combinations(set(window), 2):
|
| 164 |
+
if G.has_edge(w1, w2):
|
| 165 |
+
G[w1][w2]['weight'] += 1
|
| 166 |
+
else:
|
| 167 |
+
G.add_edge(w1, w2, weight=1)
|
| 168 |
+
|
| 169 |
+
# Crear visualizaci贸n
|
| 170 |
+
fig, ax = plt.subplots(figsize=(12, 8))
|
| 171 |
+
pos = nx.spring_layout(G)
|
| 172 |
+
|
| 173 |
+
# Dibujar nodos
|
| 174 |
+
nx.draw_networkx_nodes(G, pos,
|
| 175 |
+
node_size=[G.nodes[node]['size']*100 for node in G.nodes],
|
| 176 |
+
node_color='lightblue',
|
| 177 |
+
alpha=0.7)
|
| 178 |
+
|
| 179 |
+
# Dibujar conexiones
|
| 180 |
+
nx.draw_networkx_edges(G, pos,
|
| 181 |
+
width=[G[u][v]['weight']*0.5 for u,v in G.edges],
|
| 182 |
+
alpha=0.5)
|
| 183 |
+
|
| 184 |
+
# A帽adir etiquetas
|
| 185 |
+
nx.draw_networkx_labels(G, pos)
|
| 186 |
+
|
| 187 |
+
plt.title("Red de Vocabulario")
|
| 188 |
+
plt.axis('off')
|
| 189 |
+
return fig
|
| 190 |
|
| 191 |
def create_syntax_complexity_graph(doc):
|
| 192 |
"""
|
| 193 |
Genera el diagrama de arco de complejidad sint谩ctica.
|
| 194 |
+
Muestra la estructura de dependencias con colores basados en la complejidad.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
"""
|
| 196 |
+
try:
|
| 197 |
+
# Preparar datos para la visualizaci贸n
|
| 198 |
+
sentences = list(doc.sents)
|
| 199 |
+
if not sentences:
|
| 200 |
+
return None
|
| 201 |
+
|
| 202 |
+
# Crear figura para el gr谩fico
|
| 203 |
+
fig, ax = plt.subplots(figsize=(12, len(sentences) * 2))
|
| 204 |
+
|
| 205 |
+
# Colores para diferentes niveles de profundidad
|
| 206 |
+
depth_colors = plt.cm.viridis(np.linspace(0, 1, 6))
|
| 207 |
+
|
| 208 |
+
y_offset = 0
|
| 209 |
+
max_x = 0
|
| 210 |
+
|
| 211 |
+
for sent in sentences:
|
| 212 |
+
words = [token.text for token in sent]
|
| 213 |
+
x_positions = range(len(words))
|
| 214 |
+
max_x = max(max_x, len(words))
|
| 215 |
+
|
| 216 |
+
# Dibujar palabras
|
| 217 |
+
plt.plot(x_positions, [y_offset] * len(words), 'k-', alpha=0.2)
|
| 218 |
+
plt.scatter(x_positions, [y_offset] * len(words), alpha=0)
|
| 219 |
+
|
| 220 |
+
# A帽adir texto
|
| 221 |
+
for i, word in enumerate(words):
|
| 222 |
+
plt.annotate(word, (i, y_offset), xytext=(0, -10),
|
| 223 |
+
textcoords='offset points', ha='center')
|
| 224 |
+
|
| 225 |
+
# Dibujar arcos de dependencia
|
| 226 |
+
for token in sent:
|
| 227 |
+
if token.dep_ != "ROOT":
|
| 228 |
+
# Calcular profundidad de dependencia
|
| 229 |
+
depth = 0
|
| 230 |
+
current = token
|
| 231 |
+
while current.head != current:
|
| 232 |
+
depth += 1
|
| 233 |
+
current = current.head
|
| 234 |
+
|
| 235 |
+
# Determinar posiciones para el arco
|
| 236 |
+
start = token.i - sent[0].i
|
| 237 |
+
end = token.head.i - sent[0].i
|
| 238 |
+
|
| 239 |
+
# Altura del arco basada en la distancia entre palabras
|
| 240 |
+
height = 0.5 * abs(end - start)
|
| 241 |
+
|
| 242 |
+
# Color basado en la profundidad
|
| 243 |
+
color = depth_colors[min(depth, len(depth_colors)-1)]
|
| 244 |
+
|
| 245 |
+
# Crear arco
|
| 246 |
+
arc = patches.Arc((min(start, end) + abs(end - start)/2, y_offset),
|
| 247 |
+
width=abs(end - start),
|
| 248 |
+
height=height,
|
| 249 |
+
angle=0,
|
| 250 |
+
theta1=0,
|
| 251 |
+
theta2=180,
|
| 252 |
+
color=color,
|
| 253 |
+
alpha=0.6)
|
| 254 |
+
ax.add_patch(arc)
|
| 255 |
+
|
| 256 |
+
y_offset -= 2
|
| 257 |
+
|
| 258 |
+
# Configurar el gr谩fico
|
| 259 |
+
plt.xlim(-1, max_x)
|
| 260 |
+
plt.ylim(y_offset - 1, 1)
|
| 261 |
+
plt.axis('off')
|
| 262 |
+
plt.title("Complejidad Sint谩ctica")
|
| 263 |
+
|
| 264 |
+
return fig
|
| 265 |
+
|
| 266 |
+
except Exception as e:
|
| 267 |
+
logger.error(f"Error en create_syntax_complexity_graph: {str(e)}")
|
| 268 |
+
return None
|