Spaces:

VietCat
/

gpt2-vietnamese-api

Running

App Files Files Community

VietCat commited on May 7

Commit

abe4cc3

1 Parent(s): 8d2936e

adjust generation time

Browse files

Files changed (1) hide show

app.py +14 -12

app.py CHANGED Viewed

@@ -18,11 +18,11 @@ def print_system_resources():
         with open('/sys/fs/cgroup/memory/memory.limit_in_bytes', 'r') as f:
             mem_limit = int(f.read().strip()) / 1e9  # Convert to GB
     except:
-        mem_limit = memory.total / 1e9  # Fallback to psutil
     print(f"Total physical memory (psutil): {memory.total/1e9:.2f} GB")
     print(f"Container memory limit: {mem_limit:.2f} GB")
     print(f"CPU usage: {cpu_percent}%")
-    print(f"Memory usage: {memory.percent}% ({memory.used/1e9:.2f}/{mem_limit:.2f} GB)")
     print(f"Active processes: {len(psutil.pids())}")
 # Print Gradio version for debugging
@@ -61,14 +61,15 @@ def clean_text(text):
     """Clean generated text by removing non-alphabetic characters and incomplete sentences."""
     text = re.sub(r'[^\w\s.,!?àáâãèéêìíòóôõùúýăđĩũơưạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹ]', '', text)
     text = re.sub(r'\s+', ' ', text).strip()
-    # Remove incomplete sentence (ends without punctuation)
-    if not re.search(r'[.!?]$', text):
-        last_period = text.rfind('.')
-        last_exclam = text.rfind('!')
-        last_question = text.rfind('?')
-        last_punct = max(last_period, last_exclam, last_question)
-        if last_punct != -1:
-            text = text[:last_punct + 1]
     return text
 def generate_text(prompt, max_length=50, temperature=0.9):
@@ -88,10 +89,10 @@ def generate_text(prompt, max_length=50, temperature=0.9):
         outputs = model.generate(
             input_ids=inputs["input_ids"],
             attention_mask=inputs["attention_mask"],
-            max_new_tokens=20,  # Reduce for speed
             min_length=10,
             do_sample=True,
-            top_k=40,  # Reduce for speed
             top_p=0.9,
             temperature=temperature,
             no_repeat_ngram_size=2,
@@ -99,6 +100,7 @@ def generate_text(prompt, max_length=50, temperature=0.9):
         )
         generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
         print(f"Raw output: {generated_text}")
         cleaned_text = clean_text(generated_text)
         elapsed_time = time.time() - start_time
         print(f"Generation time: {elapsed_time:.2f} seconds")

         with open('/sys/fs/cgroup/memory/memory.limit_in_bytes', 'r') as f:
             mem_limit = int(f.read().strip()) / 1e9  # Convert to GB
     except:
+        mem_limit = 16.0  # Fallback for HFS free (16GB)
     print(f"Total physical memory (psutil): {memory.total/1e9:.2f} GB")
     print(f"Container memory limit: {mem_limit:.2f} GB")
     print(f"CPU usage: {cpu_percent}%")
+    print(f"Memory usage: {min(memory.used / (mem_limit * 1e9) * 100, 100):.1f}% ({memory.used/1e9:.2f}/{mem_limit:.2f} GB)")
     print(f"Active processes: {len(psutil.pids())}")
 # Print Gradio version for debugging
     """Clean generated text by removing non-alphabetic characters and incomplete sentences."""
     text = re.sub(r'[^\w\s.,!?àáâãèéêìíòóôõùúýăđĩũơưạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹ]', '', text)
     text = re.sub(r'\s+', ' ', text).strip()
+    # Keep only complete sentences (ending with punctuation)
+    sentences = re.split(r'(?<=[.!?])\s+', text)
+    complete_sentences = [s for s in sentences if re.search(r'[.!?]$', s)]
+    if complete_sentences:
+        text = ' '.join(complete_sentences)
+    else:
+        # Fallback: Keep until last valid word if no complete sentence
+        words = text.split()
+        text = ' '.join(words[:-1]) if len(words) > 1 else text
     return text
 def generate_text(prompt, max_length=50, temperature=0.9):
         outputs = model.generate(
             input_ids=inputs["input_ids"],
             attention_mask=inputs["attention_mask"],
+            max_new_tokens=15,  # Reduce for speed
             min_length=10,
             do_sample=True,
+            top_k=30,  # Reduce for speed
             top_p=0.9,
             temperature=temperature,
             no_repeat_ngram_size=2,
         )
         generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
         print(f"Raw output: {generated_text}")
+        print(f"Generated token count: {len(outputs[0])}")
         cleaned_text = clean_text(generated_text)
         elapsed_time = time.time() - start_time
         print(f"Generation time: {elapsed_time:.2f} seconds")