Spaces:
Running
Running
adjust generation time
Browse files
app.py
CHANGED
@@ -16,6 +16,10 @@ def print_system_resources():
|
|
16 |
print(f"Total physical memory: {memory.total/1e9:.2f} GB")
|
17 |
print(f"CPU usage: {cpu_percent}%")
|
18 |
print(f"Memory usage: {memory.percent}% ({memory.used/1e9:.2f}/{memory.total/1e9:.2f} GB)")
|
|
|
|
|
|
|
|
|
19 |
|
20 |
# Load model and tokenizer
|
21 |
model_id = "NlpHUST/gpt2-vietnamese"
|
@@ -69,11 +73,12 @@ def generate_text(prompt, max_length=50, temperature=0.9):
|
|
69 |
outputs = model.generate(
|
70 |
input_ids=inputs["input_ids"],
|
71 |
attention_mask=inputs["attention_mask"],
|
72 |
-
max_new_tokens=
|
73 |
min_length=10,
|
74 |
do_sample=True, # Enable sampling for diversity
|
75 |
-
top_k=50,
|
76 |
-
top_p=0.9,
|
|
|
77 |
no_repeat_ngram_size=2,
|
78 |
pad_token_id=tokenizer.pad_token_id
|
79 |
)
|
@@ -81,7 +86,6 @@ def generate_text(prompt, max_length=50, temperature=0.9):
|
|
81 |
print(f"Raw output: {generated_text}")
|
82 |
cleaned_text = clean_text(generated_text)
|
83 |
elapsed_time = time.time() - start_time
|
84 |
-
print_system_resources()
|
85 |
print(f"Generation time: {elapsed_time:.2f} seconds")
|
86 |
return cleaned_text
|
87 |
except Exception as e:
|
@@ -106,4 +110,4 @@ demo = gr.Interface(
|
|
106 |
)
|
107 |
|
108 |
if __name__ == "__main__":
|
109 |
-
demo.launch(server_name="0.0.0.0", server_port=7860
|
|
|
16 |
print(f"Total physical memory: {memory.total/1e9:.2f} GB")
|
17 |
print(f"CPU usage: {cpu_percent}%")
|
18 |
print(f"Memory usage: {memory.percent}% ({memory.used/1e9:.2f}/{memory.total/1e9:.2f} GB)")
|
19 |
+
print(f"Active processes: {len(psutil.pids())}")
|
20 |
+
|
21 |
+
# Print Gradio version for debugging
|
22 |
+
print(f"Gradio version: {gr.__version__}")
|
23 |
|
24 |
# Load model and tokenizer
|
25 |
model_id = "NlpHUST/gpt2-vietnamese"
|
|
|
73 |
outputs = model.generate(
|
74 |
input_ids=inputs["input_ids"],
|
75 |
attention_mask=inputs["attention_mask"],
|
76 |
+
max_new_tokens=30, # Increase for more content
|
77 |
min_length=10,
|
78 |
do_sample=True, # Enable sampling for diversity
|
79 |
+
top_k=50,
|
80 |
+
top_p=0.9,
|
81 |
+
temperature=temperature,
|
82 |
no_repeat_ngram_size=2,
|
83 |
pad_token_id=tokenizer.pad_token_id
|
84 |
)
|
|
|
86 |
print(f"Raw output: {generated_text}")
|
87 |
cleaned_text = clean_text(generated_text)
|
88 |
elapsed_time = time.time() - start_time
|
|
|
89 |
print(f"Generation time: {elapsed_time:.2f} seconds")
|
90 |
return cleaned_text
|
91 |
except Exception as e:
|
|
|
110 |
)
|
111 |
|
112 |
if __name__ == "__main__":
|
113 |
+
demo.launch(server_name="0.0.0.0", server_port=7860) # Remove queue=False
|