Spaces:

prithivMLmods
/

Doc-VLMs-OCR

Running on Zero

prithivMLmods commited on Mar 18

Commit

0ed1602

verified ·

1 Parent(s): 7f06ad0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -89,12 +89,14 @@ SYSTEM_PROMPT = load_system_prompt(MODEL_ID, "SYSTEM_PROMPT.txt")
 # If you prefer a hardcoded system prompt, you can use:
 # SYSTEM_PROMPT = "You are a conversational agent that always answers straight to the point, and ends with an ASCII cat."
-# Set the device explicitly (vLLM requires an explicit device specification)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Initialize the Mistral LLM via vllm.
 # Note: Running this model on GPU may require very high VRAM.
-llm = LLM(model=MODEL_ID, tokenizer_mode="mistral", device=device)
 # -----------------------------------------------------------------------------
 # Main Generation Function

 # If you prefer a hardcoded system prompt, you can use:
 # SYSTEM_PROMPT = "You are a conversational agent that always answers straight to the point, and ends with an ASCII cat."
+# Set the device explicitly
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Initialize the Mistral LLM via vllm.
 # Note: Running this model on GPU may require very high VRAM.
+# The 'enforce_eager=True' parameter disables asynchronous output,
+# which avoids the NotImplementedError on platforms that do not support it.
+llm = LLM(model=MODEL_ID, tokenizer_mode="mistral", device=device, enforce_eager=True)
 # -----------------------------------------------------------------------------
 # Main Generation Function