From 659a9d43c0261dcb37ed471622b57315d5efad8d Mon Sep 17 00:00:00 2001 From: Sergio Lopez Date: Wed, 9 Oct 2024 15:50:17 +0200 Subject: [PATCH] Add a cli option to enable GPU offload Add a "--gpu" that allows users to request the workload to be offloaded to the GPU. This works natively on macOS using Metal and in containers using Vulkan with llama.cpp's Kompute backend. Signed-off-by: Sergio Lopez --- ramalama/cli.py | 9 ++++++++- ramalama/model.py | 9 +++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/ramalama/cli.py b/ramalama/cli.py index 0e85df6..2a52594 100644 --- a/ramalama/cli.py +++ b/ramalama/cli.py @@ -62,6 +62,13 @@ def init_cli(): action="store_false", help="do not run RamaLama in the default container", ) + parser.add_argument( + "--gpu", + dest="gpu", + default=False, + action="store_true", + help="offload the workload to the GPU", + ) parser.add_argument( "--runtime", default="llama.cpp", @@ -517,7 +524,7 @@ def run_container(args): if hasattr(args, "port"): conman_args += ["-p", f"{args.port}:{args.port}"] - if os.path.exists("/dev/dri"): + if args.gpu and (os.path.exists("/dev/dri") or sys.platform == "darwin"): conman_args += ["--device", "/dev/dri"] if os.path.exists("/dev/kfd"): diff --git a/ramalama/model.py b/ramalama/model.py index 533ce9b..09cdbff 100644 --- a/ramalama/model.py +++ b/ramalama/model.py @@ -1,5 +1,6 @@ import os import sys +from pathlib import Path from ramalama.common import container_manager, exec_cmd, default_image @@ -13,8 +14,6 @@ class Model: def __init__(self, model): self.model = model - if sys.platform == "darwin": - self.common_params += ["-ngl", "99"] def login(self, args): raise NotImplementedError(f"ramalama login for {self.type} not implemented") @@ -100,6 +99,12 @@ def run(self, args): if not args.ARGS: exec_args.append("-cnv") + if args.gpu: + if sys.platform == "darwin" or (sys.platform == "linux" and Path("/dev/dri").exists()): + exec_args.extend(["-ngl", "99"]) + else: + print("GPU offload was requested but is not available on this system") + exec_cmd(exec_args, False) def serve(self, args):