Skip to Main Content
Oracle APEX and DGX Spark Clusters. gpt-oss-120b-mxfp4

Create Llama Server Service

Llama
Create Llama Server Service

Commands:

sudo systemctl daemon-reload
sudo systemctl reset-failed llama-server.service
sudo systemctl start llama-server.service
sudo systemctl stop llama-server.service
sudo systemctl restart llama-server.service
sudo systemctl status llama-server.service
sudo journalctl -u llama-server.service -n 120 --no-pager

File name: /etc/default/llama-server.profile

sudo touch /etc/default/llama-server.profile
chown sysadmin:sysadmin /etc/default/llama-server.profile

sudo vi /etc/default/llama-server.profile
# PASTE ENTRIES BELOW
# Path to binary
LLAMA_BIN=/usr/local/bin/llama-server
# Device/model/runtime args
LLAMA_DEVICE=CUDA0
LLAMA_MODEL=/usr/local/models/openai/gpt-oss-120b-mxfp4-00001-of-00003.gguf
LLAMA_HOST=0.0.0.0
LLAMA_PORT=10000
# Extra args (keep as one string)
LLAMA_EXTRA_ARGS="--n-gpu-layers 99 --no-mmap --cont-batching --batch-size 4096 --ubatch-size 1024 --ctx-size 64000"

File name: /etc/systemd/system/llama-server.service

sudo touch /etc/systemd/system/llama-server.service
chown sysadmin:sysadmin /etc/systemd/system/llama-server.service

sudo vi /etc/default/llama-server.profile
# PASTE ENTRIES BELOW
[Unit]
Description=LLAMA Server
Wants=network-online.target
After=network-online.target docker.service openwebui.service
Requires=docker.service openwebui.service

StartLimitIntervalSec=300
StartLimitBurst=10

[Service]
Type=simple
User=sysadmin
Group=sysadmin
SupplementaryGroups=docker
Environment=LLAMA_LOG_COLORS=1
Environment=LLAMA_LOG_PREFIX=1
Environment=LLAMA_LOG_TIMESTAMPS=1
EnvironmentFile=/etc/default/llama-server.profile
ExecStart=/usr/local/bin/llama-server-start
# Let systemd stop the main process cleanly (SIGTERM below).
# ExecStop is optional; keep only if it actually performs shutdown logic.
# ExecStop=/home/sysadmin/bin/llama.server.stop.sh
Restart=always
RestartSec=10
KillSignal=SIGTERM
TimeoutStopSec=60
KillMode=mixed
StandardOutput=append:/var/log/llama/llama-server.log
StandardError=append:/var/log/llama/llama-server.log
LogsDirectory=llama

# Hardening
NoNewPrivileges=true
PrivateTmp=true
ProtectSystem=full
ProtectHome=false
ReadWritePaths=/var/log/llama

[Install]
WantedBy=multi-user.target