VietCat commited on
Commit
b638223
·
1 Parent(s): d78035c

adjust generation time

Browse files
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -16,6 +16,10 @@ def print_system_resources():
16
  print(f"Total physical memory: {memory.total/1e9:.2f} GB")
17
  print(f"CPU usage: {cpu_percent}%")
18
  print(f"Memory usage: {memory.percent}% ({memory.used/1e9:.2f}/{memory.total/1e9:.2f} GB)")
 
 
 
 
19
 
20
  # Load model and tokenizer
21
  model_id = "NlpHUST/gpt2-vietnamese"
@@ -69,11 +73,12 @@ def generate_text(prompt, max_length=50, temperature=0.9):
69
  outputs = model.generate(
70
  input_ids=inputs["input_ids"],
71
  attention_mask=inputs["attention_mask"],
72
- max_new_tokens=25, # Slightly increase for more content
73
  min_length=10,
74
  do_sample=True, # Enable sampling for diversity
75
- top_k=50, # Limit to top 50 tokens
76
- top_p=0.9, # Nucleus sampling
 
77
  no_repeat_ngram_size=2,
78
  pad_token_id=tokenizer.pad_token_id
79
  )
@@ -81,7 +86,6 @@ def generate_text(prompt, max_length=50, temperature=0.9):
81
  print(f"Raw output: {generated_text}")
82
  cleaned_text = clean_text(generated_text)
83
  elapsed_time = time.time() - start_time
84
- print_system_resources()
85
  print(f"Generation time: {elapsed_time:.2f} seconds")
86
  return cleaned_text
87
  except Exception as e:
@@ -106,4 +110,4 @@ demo = gr.Interface(
106
  )
107
 
108
  if __name__ == "__main__":
109
- demo.launch(server_name="0.0.0.0", server_port=7860, queue=False)
 
16
  print(f"Total physical memory: {memory.total/1e9:.2f} GB")
17
  print(f"CPU usage: {cpu_percent}%")
18
  print(f"Memory usage: {memory.percent}% ({memory.used/1e9:.2f}/{memory.total/1e9:.2f} GB)")
19
+ print(f"Active processes: {len(psutil.pids())}")
20
+
21
+ # Print Gradio version for debugging
22
+ print(f"Gradio version: {gr.__version__}")
23
 
24
  # Load model and tokenizer
25
  model_id = "NlpHUST/gpt2-vietnamese"
 
73
  outputs = model.generate(
74
  input_ids=inputs["input_ids"],
75
  attention_mask=inputs["attention_mask"],
76
+ max_new_tokens=30, # Increase for more content
77
  min_length=10,
78
  do_sample=True, # Enable sampling for diversity
79
+ top_k=50,
80
+ top_p=0.9,
81
+ temperature=temperature,
82
  no_repeat_ngram_size=2,
83
  pad_token_id=tokenizer.pad_token_id
84
  )
 
86
  print(f"Raw output: {generated_text}")
87
  cleaned_text = clean_text(generated_text)
88
  elapsed_time = time.time() - start_time
 
89
  print(f"Generation time: {elapsed_time:.2f} seconds")
90
  return cleaned_text
91
  except Exception as e:
 
110
  )
111
 
112
  if __name__ == "__main__":
113
+ demo.launch(server_name="0.0.0.0", server_port=7860) # Remove queue=False