Spaces:
Running
Running
adjust generation time
Browse files
app.py
CHANGED
@@ -18,11 +18,11 @@ def print_system_resources():
|
|
18 |
with open('/sys/fs/cgroup/memory/memory.limit_in_bytes', 'r') as f:
|
19 |
mem_limit = int(f.read().strip()) / 1e9 # Convert to GB
|
20 |
except:
|
21 |
-
mem_limit =
|
22 |
print(f"Total physical memory (psutil): {memory.total/1e9:.2f} GB")
|
23 |
print(f"Container memory limit: {mem_limit:.2f} GB")
|
24 |
print(f"CPU usage: {cpu_percent}%")
|
25 |
-
print(f"Memory usage: {memory.
|
26 |
print(f"Active processes: {len(psutil.pids())}")
|
27 |
|
28 |
# Print Gradio version for debugging
|
@@ -61,14 +61,15 @@ def clean_text(text):
|
|
61 |
"""Clean generated text by removing non-alphabetic characters and incomplete sentences."""
|
62 |
text = re.sub(r'[^\w\s.,!?àáâãèéêìíòóôõùúýăđĩũơưạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹ]', '', text)
|
63 |
text = re.sub(r'\s+', ' ', text).strip()
|
64 |
-
#
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
if
|
71 |
-
|
|
|
72 |
return text
|
73 |
|
74 |
def generate_text(prompt, max_length=50, temperature=0.9):
|
@@ -88,10 +89,10 @@ def generate_text(prompt, max_length=50, temperature=0.9):
|
|
88 |
outputs = model.generate(
|
89 |
input_ids=inputs["input_ids"],
|
90 |
attention_mask=inputs["attention_mask"],
|
91 |
-
max_new_tokens=
|
92 |
min_length=10,
|
93 |
do_sample=True,
|
94 |
-
top_k=
|
95 |
top_p=0.9,
|
96 |
temperature=temperature,
|
97 |
no_repeat_ngram_size=2,
|
@@ -99,6 +100,7 @@ def generate_text(prompt, max_length=50, temperature=0.9):
|
|
99 |
)
|
100 |
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
101 |
print(f"Raw output: {generated_text}")
|
|
|
102 |
cleaned_text = clean_text(generated_text)
|
103 |
elapsed_time = time.time() - start_time
|
104 |
print(f"Generation time: {elapsed_time:.2f} seconds")
|
|
|
18 |
with open('/sys/fs/cgroup/memory/memory.limit_in_bytes', 'r') as f:
|
19 |
mem_limit = int(f.read().strip()) / 1e9 # Convert to GB
|
20 |
except:
|
21 |
+
mem_limit = 16.0 # Fallback for HFS free (16GB)
|
22 |
print(f"Total physical memory (psutil): {memory.total/1e9:.2f} GB")
|
23 |
print(f"Container memory limit: {mem_limit:.2f} GB")
|
24 |
print(f"CPU usage: {cpu_percent}%")
|
25 |
+
print(f"Memory usage: {min(memory.used / (mem_limit * 1e9) * 100, 100):.1f}% ({memory.used/1e9:.2f}/{mem_limit:.2f} GB)")
|
26 |
print(f"Active processes: {len(psutil.pids())}")
|
27 |
|
28 |
# Print Gradio version for debugging
|
|
|
61 |
"""Clean generated text by removing non-alphabetic characters and incomplete sentences."""
|
62 |
text = re.sub(r'[^\w\s.,!?àáâãèéêìíòóôõùúýăđĩũơưạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹ]', '', text)
|
63 |
text = re.sub(r'\s+', ' ', text).strip()
|
64 |
+
# Keep only complete sentences (ending with punctuation)
|
65 |
+
sentences = re.split(r'(?<=[.!?])\s+', text)
|
66 |
+
complete_sentences = [s for s in sentences if re.search(r'[.!?]$', s)]
|
67 |
+
if complete_sentences:
|
68 |
+
text = ' '.join(complete_sentences)
|
69 |
+
else:
|
70 |
+
# Fallback: Keep until last valid word if no complete sentence
|
71 |
+
words = text.split()
|
72 |
+
text = ' '.join(words[:-1]) if len(words) > 1 else text
|
73 |
return text
|
74 |
|
75 |
def generate_text(prompt, max_length=50, temperature=0.9):
|
|
|
89 |
outputs = model.generate(
|
90 |
input_ids=inputs["input_ids"],
|
91 |
attention_mask=inputs["attention_mask"],
|
92 |
+
max_new_tokens=15, # Reduce for speed
|
93 |
min_length=10,
|
94 |
do_sample=True,
|
95 |
+
top_k=30, # Reduce for speed
|
96 |
top_p=0.9,
|
97 |
temperature=temperature,
|
98 |
no_repeat_ngram_size=2,
|
|
|
100 |
)
|
101 |
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
102 |
print(f"Raw output: {generated_text}")
|
103 |
+
print(f"Generated token count: {len(outputs[0])}")
|
104 |
cleaned_text = clean_text(generated_text)
|
105 |
elapsed_time = time.time() - start_time
|
106 |
print(f"Generation time: {elapsed_time:.2f} seconds")
|