# Core transformers + inference
transformers>=4.38.0
accelerate>=0.27.2
torch>=2.1.0
huggingface-hub>=0.22.2

# For video frame sampling
decord==0.6.0
imageio>=2.31.1

# For Gradio and Spaces integration
gradio>=4.17.0
spaces>=0.20.0

# PIL backend (imageio requires it)
pillow>=10.0.0

# Optional: To avoid tokenizer warnings
sentencepiece>=0.1.99
einops
timm
Pillow
flash-attn