Sebbe33 commited on
Commit
89aa5bd
·
verified ·
1 Parent(s): 681b063

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -0
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def chunk_text(text, chunk_size, overlap):
4
+ chunks = []
5
+ start = 0
6
+ while start < len(text):
7
+ end = start + chunk_size
8
+ chunks.append(text[start:end])
9
+ start += (chunk_size - overlap)
10
+ return chunks
11
+
12
+ def main():
13
+ st.set_page_config(page_title="Text Chunker", page_icon="✂️", layout="centered")
14
+
15
+ # Custom CSS for styling
16
+ st.markdown("""
17
+ <style>
18
+ .header {
19
+ color: #2F4F4F;
20
+ border-bottom: 2px solid #2F4F4F;
21
+ padding-bottom: 10px;
22
+ }
23
+ .chunk-box {
24
+ padding: 20px;
25
+ margin: 10px 0;
26
+ border-radius: 10px;
27
+ background-color: #F0F2F6;
28
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
29
+ }
30
+ </style>
31
+ """, unsafe_allow_html=True)
32
+
33
+ st.markdown('<h1 class="header">✂️ Text Chunker</h1>', unsafe_allow_html=True)
34
+
35
+ # Example text
36
+ example_text = """Natural language processing (NLP) is a subfield of linguistics, computer science,
37
+ and artificial intelligence concerned with the interactions between computers and human language.
38
+ It focuses on how to program computers to process and analyze large amounts of natural language data.
39
+ The result is a computer capable of understanding natural language in a way that is both meaningful
40
+ and useful to humans."""
41
+
42
+ # Inputs
43
+ input_text = st.text_area("Input Text", value=example_text, height=200)
44
+ col1, col2 = st.columns(2)
45
+ with col1:
46
+ chunk_size = st.slider("Chunk Size (characters)", 50, 200, 100, 10)
47
+ with col2:
48
+ overlap = st.slider("Overlap (characters)", 0, 50, 20, 5)
49
+
50
+ if overlap >= chunk_size:
51
+ st.error("Overlap must be smaller than chunk size!")
52
+ return
53
+
54
+ # Processing
55
+ if st.button("Chunk It!", type="primary"):
56
+ chunks = chunk_text(input_text, chunk_size, overlap)
57
+
58
+ st.markdown(f"**🔖 {len(chunks)} Chunks Created**")
59
+ for i, chunk in enumerate(chunks, 1):
60
+ with st.container():
61
+ st.markdown(f"""
62
+ <div class="chunk-box">
63
+ <h4>Chunk #{i} (Length: {len(chunk)})</h4>
64
+ <hr style="border:1px solid #2F4F4F">
65
+ <p>{chunk}</p>
66
+ </div>
67
+ """, unsafe_allow_html=True)
68
+
69
+ st.success("✅ Chunking completed! Scroll to see all chunks.")
70
+
71
+ if __name__ == "__main__":
72
+ main()