commit c497b19b2070fc878c609ef1de94e58c2c9f6180 Author: Green Sky Date: Mon Jan 22 21:14:33 2024 +0100 simple llama.cpp server api usage works diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..56f48bf --- /dev/null +++ b/.gitignore @@ -0,0 +1,26 @@ +.vs/ +*.o +*.swp +~* +*~ +.idea/ +cmake-build-debug/ +cmake-build-debugandtest/ +cmake-build-release/ +*.stackdump +*.coredump +compile_commands.json +/build* +/result* +.clangd +.cache + +.DS_Store +.AppleDouble +.LSOverride + +CMakeLists.txt.user* +CMakeCache.txt + +*.tox +imgui.ini diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..2047f18 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,75 @@ +cmake_minimum_required(VERSION 3.24 FATAL_ERROR) + +# cmake setup begin +project(solanaceae_llama-cpp-web) + +if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + set(SOLANACEAE_LLAMA-CPP-WEB_STANDALONE ON) + # why the f do i need this >:( + set(NOT_SOLANACEAE_LLAMA-CPP-WEB_STANDALONE OFF) +else() + set(SOLANACEAE_LLAMA-CPP-WEB_STANDALONE OFF) + set(NOT_SOLANACEAE_LLAMA-CPP-WEB_STANDALONE ON) +endif() +message("II SOLANACEAE_LLAMA-CPP-WEB_STANDALONE " ${SOLANACEAE_LLAMA-CPP-WEB_STANDALONE}) + +option(SOLANACEAE_LLAMA-CPP-WEB_BUILD_PLUGINS "Build the llama-cpp-web plugins" ${SOLANACEAE_LLAMA-CPP-WEB_STANDALONE}) + +if (SOLANACEAE_LLAMA-CPP-WEB_STANDALONE) + set(CMAKE_POSITION_INDEPENDENT_CODE ON) + + # defaulting to debug mode, if not specified + if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Debug") + endif() + + # setup my vim ycm :D + set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + + # more paths + set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") + set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin") + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin") +endif() + +# external libs +add_subdirectory(./external EXCLUDE_FROM_ALL) # before increasing warn levels, sad :( + +if (SOLANACEAE_LLAMA-CPP-WEB_STANDALONE) + set(CMAKE_CXX_EXTENSIONS OFF) + + # bump up warning levels appropriately for clang, gcc & msvc + if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + add_compile_options( + -Wall -Wextra # Reasonable and standard + -Wpedantic # Warn if non-standard C++ is used + -Wunused # Warn on anything being unused + #-Wconversion # Warn on type conversions that may lose data + #-Wsign-conversion # Warn on sign conversions + -Wshadow # Warn if a variable declaration shadows one from a parent context + ) + + if (NOT WIN32) + #link_libraries(-fsanitize=address) + #link_libraries(-fsanitize=address,undefined) + #link_libraries(-fsanitize-address-use-after-scope) + #link_libraries(-fsanitize=undefined) + endif() + elseif (${CMAKE_CXX_COMPILER_ID} STREQUAL "MSVC") + if (CMAKE_CXX_FLAGS MATCHES "/W[0-4]") + string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4") + endif() + endif() + +endif() + +# cmake setup end + +add_subdirectory(./src) + +if (SOLANACEAE_LLAMA-CPP-WEB_BUILD_PLUGINS) + add_subdirectory(./plugins) +endif() + diff --git a/README.md b/README.md new file mode 100644 index 0000000..11a2dd1 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +used llama embedded webserver api for easier portability diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt new file mode 100644 index 0000000..d08025a --- /dev/null +++ b/external/CMakeLists.txt @@ -0,0 +1,80 @@ +cmake_minimum_required(VERSION 3.24 FATAL_ERROR) + +include(FetchContent) + +# TODO: move entt dep into solanaceae_contact +if (NOT TARGET EnTT::EnTT) + FetchContent_Declare(EnTT + GIT_REPOSITORY https://github.com/skypjack/entt.git + GIT_TAG v3.12.2 + EXCLUDE_FROM_ALL + ) + FetchContent_MakeAvailable(EnTT) +endif() + +if (NOT TARGET solanaceae_util) + FetchContent_Declare(solanaceae_util + GIT_REPOSITORY https://github.com/Green-Sky/solanaceae_util.git + GIT_TAG master + EXCLUDE_FROM_ALL + ) + FetchContent_MakeAvailable(solanaceae_util) +endif() + +if (NOT TARGET solanaceae_contact) + FetchContent_Declare(solanaceae_contact + GIT_REPOSITORY https://github.com/Green-Sky/solanaceae_contact.git + GIT_TAG master + EXCLUDE_FROM_ALL + ) + FetchContent_MakeAvailable(solanaceae_contact) +endif() + +if (NOT TARGET solanaceae_message3) + FetchContent_Declare(solanaceae_message3 + GIT_REPOSITORY https://github.com/Green-Sky/solanaceae_message3.git + GIT_TAG master + EXCLUDE_FROM_ALL + ) + FetchContent_MakeAvailable(solanaceae_message3) +endif() + +if (NOT TARGET solanaceae_plugin) + FetchContent_Declare(solanaceae_plugin + GIT_REPOSITORY https://github.com/Green-Sky/solanaceae_plugin.git + GIT_TAG master + EXCLUDE_FROM_ALL + ) + FetchContent_MakeAvailable(solanaceae_plugin) +endif() + +#if (NOT TARGET oatpp) + #set(OATPP_INSTALL OFF CACHE BOOL "" FORCE) + #set(OATPP_BUILD_TESTS OFF CACHE BOOL "" FORCE) + #set(OATPP_LINK_TEST_LIBRARY OFF CACHE BOOL "" FORCE) + #FetchContent_Declare(oatpp + #GIT_REPOSITORY https://github.com/oatpp/oatpp.git + #GIT_TAG master + #EXCLUDE_FROM_ALL + #) + #FetchContent_MakeAvailable(oatpp) +#endif() + +if (NOT TARGET httplib::httplib) + FetchContent_Declare(httplib + GIT_REPOSITORY https://github.com/yhirose/cpp-httplib.git + GIT_TAG master + EXCLUDE_FROM_ALL + ) + FetchContent_MakeAvailable(httplib) +endif() + +if (NOT TARGET nlohmann_json::nlohmann_json) + FetchContent_Declare(json + URL https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz + URL_HASH SHA256=d6c65aca6b1ed68e7a182f4757257b107ae403032760ed6ef121c9d55e81757d + EXCLUDE_FROM_ALL + ) + FetchContent_MakeAvailable(json) +endif() + diff --git a/plugins/CMakeLists.txt b/plugins/CMakeLists.txt new file mode 100644 index 0000000..1fcfd32 --- /dev/null +++ b/plugins/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 3.14...3.24 FATAL_ERROR) + +add_library(plugin_llama-cpp-web SHARED + ./plugin_llama-cpp-web.cpp +) + +target_link_libraries(plugin_llama-cpp-web PUBLIC + solanaceae_plugin + solanaceae_llama-cpp-web +) + diff --git a/plugins/plugin_llama-cpp-web.cpp b/plugins/plugin_llama-cpp-web.cpp new file mode 100644 index 0000000..878d449 --- /dev/null +++ b/plugins/plugin_llama-cpp-web.cpp @@ -0,0 +1,62 @@ +#include + +#include + +#include +#include +#include + +static std::unique_ptr g_lcw = nullptr; + +constexpr const char* plugin_name = "llama-cpp-web"; + +extern "C" { + +SOLANA_PLUGIN_EXPORT const char* solana_plugin_get_name(void) { + return plugin_name; +} + +SOLANA_PLUGIN_EXPORT uint32_t solana_plugin_get_version(void) { + return SOLANA_PLUGIN_VERSION; +} + +SOLANA_PLUGIN_EXPORT uint32_t solana_plugin_start(struct SolanaAPI* solana_api) { + std::cout << "PLUGIN " << plugin_name << " START()\n"; + + if (solana_api == nullptr) { + return 1; + } + + try { + //auto* conf = PLUG_RESOLVE_INSTANCE(ConfigModelI); + + // static store, could be anywhere tho + // construct with fetched dependencies + g_lcw = std::make_unique(); + + // register types + PLUG_PROVIDE_INSTANCE(LlamaCppWeb, plugin_name, g_lcw.get()); + PLUG_PROVIDE_INSTANCE(LlamaCppWebI, plugin_name, g_lcw.get()); + } catch (const ResolveException& e) { + std::cerr << "PLUGIN " << plugin_name << " " << e.what << "\n"; + return 2; + } + + return 0; +} + +SOLANA_PLUGIN_EXPORT void solana_plugin_stop(void) { + std::cout << "PLUGIN " << plugin_name << " STOP()\n"; + + g_lcw.reset(); +} + +SOLANA_PLUGIN_EXPORT float solana_plugin_tick(float delta) { + (void)delta; + //g_ircc->iterate(); // TODO: return interval, respect dcc etc + + return std::numeric_limits::max(); +} + +} // extern C + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..2fd73f2 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,31 @@ +cmake_minimum_required(VERSION 3.9...3.24 FATAL_ERROR) + +project(solanaceae) + +add_library(solanaceae_llama-cpp-web + ./solanaceae/llama-cpp-web/llama_cpp_web_interface.hpp + ./solanaceae/llama-cpp-web/llama_cpp_web_impl.hpp + ./solanaceae/llama-cpp-web/llama_cpp_web_impl.cpp +) + +target_include_directories(solanaceae_llama-cpp-web PUBLIC .) +target_compile_features(solanaceae_llama-cpp-web PRIVATE cxx_std_20) +target_compile_features(solanaceae_llama-cpp-web INTERFACE cxx_std_17) +target_link_libraries(solanaceae_llama-cpp-web PUBLIC + httplib::httplib + nlohmann_json::nlohmann_json + + solanaceae_util + solanaceae_message3 +) + +######################################## + +add_executable(test1 + test1.cpp +) + +target_link_libraries(test1 PUBLIC + solanaceae_llama-cpp-web +) + diff --git a/src/solanaceae/llama-cpp-web/llama_cpp_web_impl.cpp b/src/solanaceae/llama-cpp-web/llama_cpp_web_impl.cpp new file mode 100644 index 0000000..d68f99a --- /dev/null +++ b/src/solanaceae/llama-cpp-web/llama_cpp_web_impl.cpp @@ -0,0 +1,152 @@ +#include "./llama_cpp_web_impl.hpp" + +#include + +#include + +#include + +// TODO: variant that strips unicode? +static std::string convertToSafeGrammarString(std::string_view input) { + std::string res; + for (const char c : input) { + res += "\\x"; + res += bin2hex({static_cast(c)}); + } + return res; +} + +LlamaCppWeb::~LlamaCppWeb(void) { +} + +bool LlamaCppWeb::isHealthy(void) { + auto res = _cli.Get("/health"); + if ( + res.error() != httplib::Error::Success || + res->status != 200 || + res->body.empty() || + res->get_header_value("Content-Type") != "application/json" + ) { + return false; + } + + //std::cout << "/health code: " << res->status << " body: " << res->body << "\n"; + //std::cout << "Content-Type: " << res->get_header_value("Content-Type") << "\n"; + + const auto response_body_j = nlohmann::json::parse(res->body, nullptr, false); + + const std::string status = response_body_j.value("status", std::string{"value-not-found"}); + if (status != "ok") { + std::cerr << "status not ok: " << status << "\n"; + return false; + } + + return true; // healthy endpoint +} + +int64_t LlamaCppWeb::completeSelect(const std::string_view prompt, const std::vector& possible) { + if (possible.empty()) { + return -1; + } + if (possible.size() == 1) { + return 0; + } + + // see + // https://github.com/ggerganov/llama.cpp/tree/master/grammars#example + std::string grammar {"root ::= "}; + bool first = true; + for (const auto& it : possible) { + if (first) { + first = false; + } else { + grammar += "| "; + } + grammar += "\""; + //grammar += it; + grammar += convertToSafeGrammarString(it); + grammar += "\" "; + } + //grammar += ")"; + + //std::cout << "generated grammar:\n" << grammar << "\n"; + + auto ret = complete(nlohmann::json{ + {"prompt", prompt}, + {"grammar", grammar}, + {"min_p", 0.1}, // model dependent + {"repeat_penalty", 1.0}, // deactivate + {"temperature", 0.9}, // depends 1.0 for chat models + {"top_k", 60}, + {"top_p", 1.0}, // disable + {"n_predict", 256}, // unlikely to ever be so high + {"seed", _rng()}, + }); + + if (ret.empty()) { + return -2; + } + + if (!ret.count("content")) { + return -3; + } + + std::string selected = ret.at("content"); + if (selected.empty()) { + return -4; + } + + for (int64_t i = 0; i < (int64_t)possible.size(); i++) { + if (selected == possible[i]) { + return i; + } + } + + std::cerr << "complete failed j:'" << ret.dump() << "'\n"; + return -5; +} + +std::string LlamaCppWeb::completeLine(const std::string_view prompt) { + auto ret = complete(nlohmann::json{ + {"prompt", prompt}, + {"min_p", 0.1}, // model dependent + {"repeat_penalty", 1.0}, // deactivate + {"temperature", 0.9}, // depends 1.0 for chat models + {"top_k", 60}, + {"top_p", 1.0}, // disable + {"n_predict", 1000}, + {"seed", _rng()}, + {"stop", {"\n"}}, + }); + + return ret.dump(); +} + +nlohmann::json LlamaCppWeb::complete(const nlohmann::json& request_j) { + if (!isHealthy()) { + return {}; + } + + // completions can take very long + // steaming instead would be better + _cli.set_read_timeout(std::chrono::minutes(10)); + + //std::cout << "j dump: '" << request_j.dump(-1, ' ', true) << "'\n"; + + auto res = _cli.Post("/completion", request_j.dump(-1, ' ', true), "application/json"); + + //std::cerr << "res.error():" << res.error() << "\n"; + + if ( + res.error() != httplib::Error::Success || + res->status != 200 + //res->body.empty() || + //res->get_header_value("Content-Type") != "application/json" + ) { + std::cerr << "error posting\n"; + return {}; + } + + return nlohmann::json::parse(res->body, nullptr, false); +} + diff --git a/src/solanaceae/llama-cpp-web/llama_cpp_web_impl.hpp b/src/solanaceae/llama-cpp-web/llama_cpp_web_impl.hpp new file mode 100644 index 0000000..ed9391b --- /dev/null +++ b/src/solanaceae/llama-cpp-web/llama_cpp_web_impl.hpp @@ -0,0 +1,23 @@ +#pragma once + +#include "./llama_cpp_web_interface.hpp" + +#include +#include + +#include + +struct LlamaCppWeb : public LlamaCppWebI { + httplib::Client _cli{"http://localhost:8080"}; + std::minstd_rand _rng{std::random_device{}()}; + + ~LlamaCppWeb(void); + + bool isHealthy(void) override; + int64_t completeSelect(const std::string_view prompt, const std::vector& possible) override; + std::string completeLine(const std::string_view prompt) override; + + // TODO: expose? + nlohmann::json complete(const nlohmann::json& request_j); +}; + diff --git a/src/solanaceae/llama-cpp-web/llama_cpp_web_interface.hpp b/src/solanaceae/llama-cpp-web/llama_cpp_web_interface.hpp new file mode 100644 index 0000000..7805604 --- /dev/null +++ b/src/solanaceae/llama-cpp-web/llama_cpp_web_interface.hpp @@ -0,0 +1,20 @@ +#pragma once + +#include +#include +#include + +struct LlamaCppWebI { + virtual ~LlamaCppWebI(void) {} + + virtual bool isHealthy(void) = 0; + + // TODO: add more complex api + + virtual int64_t completeSelect(const std::string_view prompt, const std::vector& possible) = 0; + + // stops at newlines + // (and limit of 1000 and eos) + virtual std::string completeLine(const std::string_view prompt) = 0; +}; + diff --git a/src/test1.cpp b/src/test1.cpp new file mode 100644 index 0000000..139afad --- /dev/null +++ b/src/test1.cpp @@ -0,0 +1,56 @@ +#include + +#include + +#include +#include +#include +#include +#include + +int main(void) { + LlamaCppWeb lcw; + + if (!lcw.isHealthy()) { + std::cerr << lcw._cli.host() << " " << lcw._cli.port() << " endpoint not healthy\n"; + return 1; + } + std::cerr << lcw._cli.host() << " " << lcw._cli.port() << " endpoint healthy\n"; + + std::cout << "The meaning of life is to" + << lcw.complete(nlohmann::json{ + {"prompt", "The meaning of life is to"}, + {"min_p", 0.1}, // model dependent + {"repeat_penalty", 1.0}, // deactivate + {"temperature", 0.9}, // depends 1.0 for chat models + {"top_k", 60}, + {"top_p", 1.0}, // disable + {"n_predict", 16}, + {"stop", {".", "\n"}}, + {"gramar", ""} + }) + << "\n"; + + std::cout << "-------------------------\n"; + + std::cout << "complete from select:\n"; + std::vector possible { + " die", + " die.", + " live", + " love", + " excersize", + " Hi", + }; + for (size_t i = 0; i < 10; i++) { + std::cout << "The meaning of life is to"; + auto res = lcw.completeSelect("The meaning of life is to", possible); + if (res < 0) { + std::cout << " error--\n"; + } else { + std::cout << possible[res] << "\n"; + } + } + + return 0; +}