prithivMLmods commited on
Commit
0ed1602
·
verified ·
1 Parent(s): 7f06ad0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -89,12 +89,14 @@ SYSTEM_PROMPT = load_system_prompt(MODEL_ID, "SYSTEM_PROMPT.txt")
89
  # If you prefer a hardcoded system prompt, you can use:
90
  # SYSTEM_PROMPT = "You are a conversational agent that always answers straight to the point, and ends with an ASCII cat."
91
 
92
- # Set the device explicitly (vLLM requires an explicit device specification)
93
  device = "cuda" if torch.cuda.is_available() else "cpu"
94
 
95
  # Initialize the Mistral LLM via vllm.
96
  # Note: Running this model on GPU may require very high VRAM.
97
- llm = LLM(model=MODEL_ID, tokenizer_mode="mistral", device=device)
 
 
98
 
99
  # -----------------------------------------------------------------------------
100
  # Main Generation Function
 
89
  # If you prefer a hardcoded system prompt, you can use:
90
  # SYSTEM_PROMPT = "You are a conversational agent that always answers straight to the point, and ends with an ASCII cat."
91
 
92
+ # Set the device explicitly
93
  device = "cuda" if torch.cuda.is_available() else "cpu"
94
 
95
  # Initialize the Mistral LLM via vllm.
96
  # Note: Running this model on GPU may require very high VRAM.
97
+ # The 'enforce_eager=True' parameter disables asynchronous output,
98
+ # which avoids the NotImplementedError on platforms that do not support it.
99
+ llm = LLM(model=MODEL_ID, tokenizer_mode="mistral", device=device, enforce_eager=True)
100
 
101
  # -----------------------------------------------------------------------------
102
  # Main Generation Function