VietCat commited on
Commit
8d2936e
·
1 Parent(s): b638223

adjust generation time

Browse files
Files changed (1) hide show
  1. app.py +22 -7
app.py CHANGED
@@ -13,9 +13,16 @@ import psutil
13
  def print_system_resources():
14
  memory = psutil.virtual_memory()
15
  cpu_percent = psutil.cpu_percent(interval=1)
16
- print(f"Total physical memory: {memory.total/1e9:.2f} GB")
 
 
 
 
 
 
 
17
  print(f"CPU usage: {cpu_percent}%")
18
- print(f"Memory usage: {memory.percent}% ({memory.used/1e9:.2f}/{memory.total/1e9:.2f} GB)")
19
  print(f"Active processes: {len(psutil.pids())}")
20
 
21
  # Print Gradio version for debugging
@@ -51,9 +58,17 @@ print(f"Memory allocated: {torch.cuda.memory_allocated(device)/1e9:.2f} GB" if t
51
  print_system_resources()
52
 
53
  def clean_text(text):
54
- """Clean generated text by removing non-alphabetic characters and extra spaces."""
55
  text = re.sub(r'[^\w\s.,!?àáâãèéêìíòóôõùúýăđĩũơưạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹ]', '', text)
56
  text = re.sub(r'\s+', ' ', text).strip()
 
 
 
 
 
 
 
 
57
  return text
58
 
59
  def generate_text(prompt, max_length=50, temperature=0.9):
@@ -73,10 +88,10 @@ def generate_text(prompt, max_length=50, temperature=0.9):
73
  outputs = model.generate(
74
  input_ids=inputs["input_ids"],
75
  attention_mask=inputs["attention_mask"],
76
- max_new_tokens=30, # Increase for more content
77
  min_length=10,
78
- do_sample=True, # Enable sampling for diversity
79
- top_k=50,
80
  top_p=0.9,
81
  temperature=temperature,
82
  no_repeat_ngram_size=2,
@@ -110,4 +125,4 @@ demo = gr.Interface(
110
  )
111
 
112
  if __name__ == "__main__":
113
- demo.launch(server_name="0.0.0.0", server_port=7860) # Remove queue=False
 
13
  def print_system_resources():
14
  memory = psutil.virtual_memory()
15
  cpu_percent = psutil.cpu_percent(interval=1)
16
+ # Get container memory limit (for Docker)
17
+ try:
18
+ with open('/sys/fs/cgroup/memory/memory.limit_in_bytes', 'r') as f:
19
+ mem_limit = int(f.read().strip()) / 1e9 # Convert to GB
20
+ except:
21
+ mem_limit = memory.total / 1e9 # Fallback to psutil
22
+ print(f"Total physical memory (psutil): {memory.total/1e9:.2f} GB")
23
+ print(f"Container memory limit: {mem_limit:.2f} GB")
24
  print(f"CPU usage: {cpu_percent}%")
25
+ print(f"Memory usage: {memory.percent}% ({memory.used/1e9:.2f}/{mem_limit:.2f} GB)")
26
  print(f"Active processes: {len(psutil.pids())}")
27
 
28
  # Print Gradio version for debugging
 
58
  print_system_resources()
59
 
60
  def clean_text(text):
61
+ """Clean generated text by removing non-alphabetic characters and incomplete sentences."""
62
  text = re.sub(r'[^\w\s.,!?àáâãèéêìíòóôõùúýăđĩũơưạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹ]', '', text)
63
  text = re.sub(r'\s+', ' ', text).strip()
64
+ # Remove incomplete sentence (ends without punctuation)
65
+ if not re.search(r'[.!?]$', text):
66
+ last_period = text.rfind('.')
67
+ last_exclam = text.rfind('!')
68
+ last_question = text.rfind('?')
69
+ last_punct = max(last_period, last_exclam, last_question)
70
+ if last_punct != -1:
71
+ text = text[:last_punct + 1]
72
  return text
73
 
74
  def generate_text(prompt, max_length=50, temperature=0.9):
 
88
  outputs = model.generate(
89
  input_ids=inputs["input_ids"],
90
  attention_mask=inputs["attention_mask"],
91
+ max_new_tokens=20, # Reduce for speed
92
  min_length=10,
93
+ do_sample=True,
94
+ top_k=40, # Reduce for speed
95
  top_p=0.9,
96
  temperature=temperature,
97
  no_repeat_ngram_size=2,
 
125
  )
126
 
127
  if __name__ == "__main__":
128
+ demo.launch(server_name="0.0.0.0", server_port=7860)