toolboc/nv-llama
NVIDIA Jetson Accelerated build of https://github.com/ggerganov/llama.cpp
76
docker run --rm -it --name llama --net=host --gpus all -v ~/src/llama.cpp/models:/models toolboc/nv-llama:r35.2.1 --run --model /models/13B/llama-13b.ggmlv3.q6_K.bin --n-predict 512 --n-gpu-layers 43 --repeat_penalty 1.0 --color --interactive-first
docker pull toolboc/nv-llama