thecollabagepatch commited on
Commit
de5d1e6
·
1 Parent(s): 87147f5

env update

Browse files
Files changed (1) hide show
  1. Dockerfile +6 -0
Dockerfile CHANGED
@@ -29,6 +29,12 @@ RUN set -eux; \
29
  # (optional) preload workaround if still needed
30
  ENV LD_PRELOAD=/usr/local/cuda/lib64/libcusparse.so.12:/usr/local/cuda/lib64/libcublas.so.12:/usr/local/cuda/lib64/libcublasLt.so.12:/usr/local/cuda/lib64/libcufft.so.11:/usr/local/cuda/lib64/libcusolver.so.11
31
 
 
 
 
 
 
 
32
  ENV DEBIAN_FRONTEND=noninteractive \
33
  PYTHONUNBUFFERED=1 \
34
  PIP_NO_CACHE_DIR=1 \
 
29
  # (optional) preload workaround if still needed
30
  ENV LD_PRELOAD=/usr/local/cuda/lib64/libcusparse.so.12:/usr/local/cuda/lib64/libcublas.so.12:/usr/local/cuda/lib64/libcublasLt.so.12:/usr/local/cuda/lib64/libcufft.so.11:/usr/local/cuda/lib64/libcusolver.so.11
31
 
32
+ # Better allocator (less fragmentation than BFC during XLA autotune)
33
+ ENV TF_GPU_ALLOCATOR=cuda_malloc_async
34
+
35
+ # Let cuBLAS use TF32 fast path on Ada (L40S) for big GEMMs
36
+ ENV TF_ENABLE_CUBLAS_TF32=1 NVIDIA_TF32_OVERRIDE=1
37
+
38
  ENV DEBIAN_FRONTEND=noninteractive \
39
  PYTHONUNBUFFERED=1 \
40
  PIP_NO_CACHE_DIR=1 \