VietCat commited on
Commit
abe4cc3
·
1 Parent(s): 8d2936e

adjust generation time

Browse files
Files changed (1) hide show
  1. app.py +14 -12
app.py CHANGED
@@ -18,11 +18,11 @@ def print_system_resources():
18
  with open('/sys/fs/cgroup/memory/memory.limit_in_bytes', 'r') as f:
19
  mem_limit = int(f.read().strip()) / 1e9 # Convert to GB
20
  except:
21
- mem_limit = memory.total / 1e9 # Fallback to psutil
22
  print(f"Total physical memory (psutil): {memory.total/1e9:.2f} GB")
23
  print(f"Container memory limit: {mem_limit:.2f} GB")
24
  print(f"CPU usage: {cpu_percent}%")
25
- print(f"Memory usage: {memory.percent}% ({memory.used/1e9:.2f}/{mem_limit:.2f} GB)")
26
  print(f"Active processes: {len(psutil.pids())}")
27
 
28
  # Print Gradio version for debugging
@@ -61,14 +61,15 @@ def clean_text(text):
61
  """Clean generated text by removing non-alphabetic characters and incomplete sentences."""
62
  text = re.sub(r'[^\w\s.,!?àáâãèéêìíòóôõùúýăđĩũơưạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹ]', '', text)
63
  text = re.sub(r'\s+', ' ', text).strip()
64
- # Remove incomplete sentence (ends without punctuation)
65
- if not re.search(r'[.!?]$', text):
66
- last_period = text.rfind('.')
67
- last_exclam = text.rfind('!')
68
- last_question = text.rfind('?')
69
- last_punct = max(last_period, last_exclam, last_question)
70
- if last_punct != -1:
71
- text = text[:last_punct + 1]
 
72
  return text
73
 
74
  def generate_text(prompt, max_length=50, temperature=0.9):
@@ -88,10 +89,10 @@ def generate_text(prompt, max_length=50, temperature=0.9):
88
  outputs = model.generate(
89
  input_ids=inputs["input_ids"],
90
  attention_mask=inputs["attention_mask"],
91
- max_new_tokens=20, # Reduce for speed
92
  min_length=10,
93
  do_sample=True,
94
- top_k=40, # Reduce for speed
95
  top_p=0.9,
96
  temperature=temperature,
97
  no_repeat_ngram_size=2,
@@ -99,6 +100,7 @@ def generate_text(prompt, max_length=50, temperature=0.9):
99
  )
100
  generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
101
  print(f"Raw output: {generated_text}")
 
102
  cleaned_text = clean_text(generated_text)
103
  elapsed_time = time.time() - start_time
104
  print(f"Generation time: {elapsed_time:.2f} seconds")
 
18
  with open('/sys/fs/cgroup/memory/memory.limit_in_bytes', 'r') as f:
19
  mem_limit = int(f.read().strip()) / 1e9 # Convert to GB
20
  except:
21
+ mem_limit = 16.0 # Fallback for HFS free (16GB)
22
  print(f"Total physical memory (psutil): {memory.total/1e9:.2f} GB")
23
  print(f"Container memory limit: {mem_limit:.2f} GB")
24
  print(f"CPU usage: {cpu_percent}%")
25
+ print(f"Memory usage: {min(memory.used / (mem_limit * 1e9) * 100, 100):.1f}% ({memory.used/1e9:.2f}/{mem_limit:.2f} GB)")
26
  print(f"Active processes: {len(psutil.pids())}")
27
 
28
  # Print Gradio version for debugging
 
61
  """Clean generated text by removing non-alphabetic characters and incomplete sentences."""
62
  text = re.sub(r'[^\w\s.,!?àáâãèéêìíòóôõùúýăđĩũơưạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹ]', '', text)
63
  text = re.sub(r'\s+', ' ', text).strip()
64
+ # Keep only complete sentences (ending with punctuation)
65
+ sentences = re.split(r'(?<=[.!?])\s+', text)
66
+ complete_sentences = [s for s in sentences if re.search(r'[.!?]$', s)]
67
+ if complete_sentences:
68
+ text = ' '.join(complete_sentences)
69
+ else:
70
+ # Fallback: Keep until last valid word if no complete sentence
71
+ words = text.split()
72
+ text = ' '.join(words[:-1]) if len(words) > 1 else text
73
  return text
74
 
75
  def generate_text(prompt, max_length=50, temperature=0.9):
 
89
  outputs = model.generate(
90
  input_ids=inputs["input_ids"],
91
  attention_mask=inputs["attention_mask"],
92
+ max_new_tokens=15, # Reduce for speed
93
  min_length=10,
94
  do_sample=True,
95
+ top_k=30, # Reduce for speed
96
  top_p=0.9,
97
  temperature=temperature,
98
  no_repeat_ngram_size=2,
 
100
  )
101
  generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
102
  print(f"Raw output: {generated_text}")
103
+ print(f"Generated token count: {len(outputs[0])}")
104
  cleaned_text = clean_text(generated_text)
105
  elapsed_time = time.time() - start_time
106
  print(f"Generation time: {elapsed_time:.2f} seconds")