diff options
| -rw-r--r-- | misc/llama-cpp/Makefile | 10 | ||||
| -rw-r--r-- | misc/llama-cpp/files/llama-server.in | 17 | ||||
| -rw-r--r-- | misc/llama-cpp/pkg-message | 9 |
3 files changed, 20 insertions, 16 deletions
diff --git a/misc/llama-cpp/Makefile b/misc/llama-cpp/Makefile index cde8ceb39f81..d56bdbf3dd40 100644 --- a/misc/llama-cpp/Makefile +++ b/misc/llama-cpp/Makefile @@ -1,6 +1,7 @@ PORTNAME= llama-cpp DISTVERSIONPREFIX= b DISTVERSION= 8895 +PORTREVISION= 1 CATEGORIES= misc # machine-learning MAINTAINER= yuri@FreeBSD.org @@ -36,8 +37,8 @@ CMAKE_TESTING_ON= LLAMA_BUILD_TESTS USER= nobody SUB_LIST= USER=${USER} -OPTIONS_DEFINE= CURL EXAMPLES VULKAN -OPTIONS_DEFAULT= CURL VULKAN +OPTIONS_DEFINE= CURL EXAMPLES +OPTIONS_DEFAULT= CURL EXAMPLES OPTIONS_SUB= yes CURL_DESCR= Use libcurl to download model from an URL @@ -47,11 +48,6 @@ CURL_LIB_DEPENDS= libcurl.so:ftp/curl EXAMPLES_CMAKE_BOOL= LLAMA_BUILD_EXAMPLES -VULKAN_CMAKE_BOOL= GGML_VULKAN -VULKAN_BUILD_DEPENDS= glslc:graphics/shaderc \ - vulkan-headers>0:graphics/vulkan-headers -VULKAN_LIB_DEPENDS= libvulkan.so:graphics/vulkan-loader - BINARY_ALIAS= git=false \ python=${PYTHON_CMD} # for tests diff --git a/misc/llama-cpp/files/llama-server.in b/misc/llama-cpp/files/llama-server.in index 6eec15953978..82992f661ca3 100644 --- a/misc/llama-cpp/files/llama-server.in +++ b/misc/llama-cpp/files/llama-server.in @@ -13,7 +13,7 @@ # llama_server_user (str): llama_server daemon user # Default: %%USER%% # llama_server_model (str): AI model that llama-server will use -# Default: "" (required) +# Default: "" (not required) # llama_server_args (str): Additional arguments for llama-server # Default: "" (optional) # llama_server_log (str): Log file that llama-server will write log to @@ -38,20 +38,19 @@ run_command="%%PREFIX%%/bin/llama-server" procname="${run_command}" pidfile=${llama_server_pidfile} command=/usr/sbin/daemon -command_args="-f -t ${name} -p ${pidfile} -o ${llama_server_log} ${run_command} -m ${llama_server_model} ${llama_server_args}" +command_args="-f -t ${name} -p ${pidfile} -o ${llama_server_log} ${run_command} ${llama_server_args}" start_precmd="llama_server_precmd" llama_server_chdir=/tmp llama_server_precmd() { # check model - if [ -z "${llama_server_model}" ]; then - echo "llama_server_model isn't set, it is required" - exit 1 - fi - if [ ! -f "${llama_server_model}" ]; then - echo "llama_server_model isn't a file" - exit 1 + if [ -n "${llama_server_model}" ]; then + if [ ! -f "${llama_server_model}" ]; then + echo "llama_server_model isn't a file" + exit 1 + fi + command_args="${command_args} -m ${llama_server_model}" fi # ensure that the log file exists and has right permissions diff --git a/misc/llama-cpp/pkg-message b/misc/llama-cpp/pkg-message index 157a4db6ea78..15534c8fd534 100644 --- a/misc/llama-cpp/pkg-message +++ b/misc/llama-cpp/pkg-message @@ -22,6 +22,15 @@ and navigate to http://localhost:8080: > llama_server_model=/path/to/models/llama-2-7b-chat.Q4_K_M.gguf > llama_server_args="--device Vulkan0 -ngl 27" +In order to use the multi-model feature do not use llama_server_model. +Instead add the argument "--models-preset /path/to/models.ini" +Add pre-downloaded models into models.ini, for example: +[Qwen3.5-35B-A3B-Uncensored] +model = /path/to/Qwen3.5-35B-A3B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf + +You can switch to the CPU-only operation by choosing the port option +VULKAN=OFF in misc/ggml (not in llama-cpp). + EOM } ] |
