aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--misc/llama-cpp/Makefile10
-rw-r--r--misc/llama-cpp/files/llama-server.in17
-rw-r--r--misc/llama-cpp/pkg-message9
3 files changed, 20 insertions, 16 deletions
diff --git a/misc/llama-cpp/Makefile b/misc/llama-cpp/Makefile
index cde8ceb39f81..d56bdbf3dd40 100644
--- a/misc/llama-cpp/Makefile
+++ b/misc/llama-cpp/Makefile
@@ -1,6 +1,7 @@
PORTNAME= llama-cpp
DISTVERSIONPREFIX= b
DISTVERSION= 8895
+PORTREVISION= 1
CATEGORIES= misc # machine-learning
MAINTAINER= yuri@FreeBSD.org
@@ -36,8 +37,8 @@ CMAKE_TESTING_ON= LLAMA_BUILD_TESTS
USER= nobody
SUB_LIST= USER=${USER}
-OPTIONS_DEFINE= CURL EXAMPLES VULKAN
-OPTIONS_DEFAULT= CURL VULKAN
+OPTIONS_DEFINE= CURL EXAMPLES
+OPTIONS_DEFAULT= CURL EXAMPLES
OPTIONS_SUB= yes
CURL_DESCR= Use libcurl to download model from an URL
@@ -47,11 +48,6 @@ CURL_LIB_DEPENDS= libcurl.so:ftp/curl
EXAMPLES_CMAKE_BOOL= LLAMA_BUILD_EXAMPLES
-VULKAN_CMAKE_BOOL= GGML_VULKAN
-VULKAN_BUILD_DEPENDS= glslc:graphics/shaderc \
- vulkan-headers>0:graphics/vulkan-headers
-VULKAN_LIB_DEPENDS= libvulkan.so:graphics/vulkan-loader
-
BINARY_ALIAS= git=false \
python=${PYTHON_CMD} # for tests
diff --git a/misc/llama-cpp/files/llama-server.in b/misc/llama-cpp/files/llama-server.in
index 6eec15953978..82992f661ca3 100644
--- a/misc/llama-cpp/files/llama-server.in
+++ b/misc/llama-cpp/files/llama-server.in
@@ -13,7 +13,7 @@
# llama_server_user (str): llama_server daemon user
# Default: %%USER%%
# llama_server_model (str): AI model that llama-server will use
-# Default: "" (required)
+# Default: "" (not required)
# llama_server_args (str): Additional arguments for llama-server
# Default: "" (optional)
# llama_server_log (str): Log file that llama-server will write log to
@@ -38,20 +38,19 @@ run_command="%%PREFIX%%/bin/llama-server"
procname="${run_command}"
pidfile=${llama_server_pidfile}
command=/usr/sbin/daemon
-command_args="-f -t ${name} -p ${pidfile} -o ${llama_server_log} ${run_command} -m ${llama_server_model} ${llama_server_args}"
+command_args="-f -t ${name} -p ${pidfile} -o ${llama_server_log} ${run_command} ${llama_server_args}"
start_precmd="llama_server_precmd"
llama_server_chdir=/tmp
llama_server_precmd()
{
# check model
- if [ -z "${llama_server_model}" ]; then
- echo "llama_server_model isn't set, it is required"
- exit 1
- fi
- if [ ! -f "${llama_server_model}" ]; then
- echo "llama_server_model isn't a file"
- exit 1
+ if [ -n "${llama_server_model}" ]; then
+ if [ ! -f "${llama_server_model}" ]; then
+ echo "llama_server_model isn't a file"
+ exit 1
+ fi
+ command_args="${command_args} -m ${llama_server_model}"
fi
# ensure that the log file exists and has right permissions
diff --git a/misc/llama-cpp/pkg-message b/misc/llama-cpp/pkg-message
index 157a4db6ea78..15534c8fd534 100644
--- a/misc/llama-cpp/pkg-message
+++ b/misc/llama-cpp/pkg-message
@@ -22,6 +22,15 @@ and navigate to http://localhost:8080:
> llama_server_model=/path/to/models/llama-2-7b-chat.Q4_K_M.gguf
> llama_server_args="--device Vulkan0 -ngl 27"
+In order to use the multi-model feature do not use llama_server_model.
+Instead add the argument "--models-preset /path/to/models.ini"
+Add pre-downloaded models into models.ini, for example:
+[Qwen3.5-35B-A3B-Uncensored]
+model = /path/to/Qwen3.5-35B-A3B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf
+
+You can switch to the CPU-only operation by choosing the port option
+VULKAN=OFF in misc/ggml (not in llama-cpp).
+
EOM
}
]