Spaces:
Running
Running
adjust generation time
Browse files
app.py
CHANGED
@@ -13,9 +13,16 @@ import psutil
|
|
13 |
def print_system_resources():
|
14 |
memory = psutil.virtual_memory()
|
15 |
cpu_percent = psutil.cpu_percent(interval=1)
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
print(f"CPU usage: {cpu_percent}%")
|
18 |
-
print(f"Memory usage: {memory.percent}% ({memory.used/1e9:.2f}/{
|
19 |
print(f"Active processes: {len(psutil.pids())}")
|
20 |
|
21 |
# Print Gradio version for debugging
|
@@ -51,9 +58,17 @@ print(f"Memory allocated: {torch.cuda.memory_allocated(device)/1e9:.2f} GB" if t
|
|
51 |
print_system_resources()
|
52 |
|
53 |
def clean_text(text):
|
54 |
-
"""Clean generated text by removing non-alphabetic characters and
|
55 |
text = re.sub(r'[^\w\s.,!?àáâãèéêìíòóôõùúýăđĩũơưạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹ]', '', text)
|
56 |
text = re.sub(r'\s+', ' ', text).strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
return text
|
58 |
|
59 |
def generate_text(prompt, max_length=50, temperature=0.9):
|
@@ -73,10 +88,10 @@ def generate_text(prompt, max_length=50, temperature=0.9):
|
|
73 |
outputs = model.generate(
|
74 |
input_ids=inputs["input_ids"],
|
75 |
attention_mask=inputs["attention_mask"],
|
76 |
-
max_new_tokens=
|
77 |
min_length=10,
|
78 |
-
do_sample=True,
|
79 |
-
top_k=
|
80 |
top_p=0.9,
|
81 |
temperature=temperature,
|
82 |
no_repeat_ngram_size=2,
|
@@ -110,4 +125,4 @@ demo = gr.Interface(
|
|
110 |
)
|
111 |
|
112 |
if __name__ == "__main__":
|
113 |
-
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
13 |
def print_system_resources():
|
14 |
memory = psutil.virtual_memory()
|
15 |
cpu_percent = psutil.cpu_percent(interval=1)
|
16 |
+
# Get container memory limit (for Docker)
|
17 |
+
try:
|
18 |
+
with open('/sys/fs/cgroup/memory/memory.limit_in_bytes', 'r') as f:
|
19 |
+
mem_limit = int(f.read().strip()) / 1e9 # Convert to GB
|
20 |
+
except:
|
21 |
+
mem_limit = memory.total / 1e9 # Fallback to psutil
|
22 |
+
print(f"Total physical memory (psutil): {memory.total/1e9:.2f} GB")
|
23 |
+
print(f"Container memory limit: {mem_limit:.2f} GB")
|
24 |
print(f"CPU usage: {cpu_percent}%")
|
25 |
+
print(f"Memory usage: {memory.percent}% ({memory.used/1e9:.2f}/{mem_limit:.2f} GB)")
|
26 |
print(f"Active processes: {len(psutil.pids())}")
|
27 |
|
28 |
# Print Gradio version for debugging
|
|
|
58 |
print_system_resources()
|
59 |
|
60 |
def clean_text(text):
|
61 |
+
"""Clean generated text by removing non-alphabetic characters and incomplete sentences."""
|
62 |
text = re.sub(r'[^\w\s.,!?àáâãèéêìíòóôõùúýăđĩũơưạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹ]', '', text)
|
63 |
text = re.sub(r'\s+', ' ', text).strip()
|
64 |
+
# Remove incomplete sentence (ends without punctuation)
|
65 |
+
if not re.search(r'[.!?]$', text):
|
66 |
+
last_period = text.rfind('.')
|
67 |
+
last_exclam = text.rfind('!')
|
68 |
+
last_question = text.rfind('?')
|
69 |
+
last_punct = max(last_period, last_exclam, last_question)
|
70 |
+
if last_punct != -1:
|
71 |
+
text = text[:last_punct + 1]
|
72 |
return text
|
73 |
|
74 |
def generate_text(prompt, max_length=50, temperature=0.9):
|
|
|
88 |
outputs = model.generate(
|
89 |
input_ids=inputs["input_ids"],
|
90 |
attention_mask=inputs["attention_mask"],
|
91 |
+
max_new_tokens=20, # Reduce for speed
|
92 |
min_length=10,
|
93 |
+
do_sample=True,
|
94 |
+
top_k=40, # Reduce for speed
|
95 |
top_p=0.9,
|
96 |
temperature=temperature,
|
97 |
no_repeat_ngram_size=2,
|
|
|
125 |
)
|
126 |
|
127 |
if __name__ == "__main__":
|
128 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|