simple llama.cpp server api usage works

This commit is contained in:
Green Sky 2024-01-22 21:14:33 +01:00
commit c497b19b20
No known key found for this signature in database
11 changed files with 537 additions and 0 deletions

26
.gitignore vendored Normal file
View File

@ -0,0 +1,26 @@
.vs/
*.o
*.swp
~*
*~
.idea/
cmake-build-debug/
cmake-build-debugandtest/
cmake-build-release/
*.stackdump
*.coredump
compile_commands.json
/build*
/result*
.clangd
.cache
.DS_Store
.AppleDouble
.LSOverride
CMakeLists.txt.user*
CMakeCache.txt
*.tox
imgui.ini

75
CMakeLists.txt Normal file
View File

@ -0,0 +1,75 @@
cmake_minimum_required(VERSION 3.24 FATAL_ERROR)
# cmake setup begin
project(solanaceae_llama-cpp-web)
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
set(SOLANACEAE_LLAMA-CPP-WEB_STANDALONE ON)
# why the f do i need this >:(
set(NOT_SOLANACEAE_LLAMA-CPP-WEB_STANDALONE OFF)
else()
set(SOLANACEAE_LLAMA-CPP-WEB_STANDALONE OFF)
set(NOT_SOLANACEAE_LLAMA-CPP-WEB_STANDALONE ON)
endif()
message("II SOLANACEAE_LLAMA-CPP-WEB_STANDALONE " ${SOLANACEAE_LLAMA-CPP-WEB_STANDALONE})
option(SOLANACEAE_LLAMA-CPP-WEB_BUILD_PLUGINS "Build the llama-cpp-web plugins" ${SOLANACEAE_LLAMA-CPP-WEB_STANDALONE})
if (SOLANACEAE_LLAMA-CPP-WEB_STANDALONE)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
# defaulting to debug mode, if not specified
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Debug")
endif()
# setup my vim ycm :D
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# more paths
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")
endif()
# external libs
add_subdirectory(./external EXCLUDE_FROM_ALL) # before increasing warn levels, sad :(
if (SOLANACEAE_LLAMA-CPP-WEB_STANDALONE)
set(CMAKE_CXX_EXTENSIONS OFF)
# bump up warning levels appropriately for clang, gcc & msvc
if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
add_compile_options(
-Wall -Wextra # Reasonable and standard
-Wpedantic # Warn if non-standard C++ is used
-Wunused # Warn on anything being unused
#-Wconversion # Warn on type conversions that may lose data
#-Wsign-conversion # Warn on sign conversions
-Wshadow # Warn if a variable declaration shadows one from a parent context
)
if (NOT WIN32)
#link_libraries(-fsanitize=address)
#link_libraries(-fsanitize=address,undefined)
#link_libraries(-fsanitize-address-use-after-scope)
#link_libraries(-fsanitize=undefined)
endif()
elseif (${CMAKE_CXX_COMPILER_ID} STREQUAL "MSVC")
if (CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
endif()
endif()
endif()
# cmake setup end
add_subdirectory(./src)
if (SOLANACEAE_LLAMA-CPP-WEB_BUILD_PLUGINS)
add_subdirectory(./plugins)
endif()

1
README.md Normal file
View File

@ -0,0 +1 @@
used llama embedded webserver api for easier portability

80
external/CMakeLists.txt vendored Normal file
View File

@ -0,0 +1,80 @@
cmake_minimum_required(VERSION 3.24 FATAL_ERROR)
include(FetchContent)
# TODO: move entt dep into solanaceae_contact
if (NOT TARGET EnTT::EnTT)
FetchContent_Declare(EnTT
GIT_REPOSITORY https://github.com/skypjack/entt.git
GIT_TAG v3.12.2
EXCLUDE_FROM_ALL
)
FetchContent_MakeAvailable(EnTT)
endif()
if (NOT TARGET solanaceae_util)
FetchContent_Declare(solanaceae_util
GIT_REPOSITORY https://github.com/Green-Sky/solanaceae_util.git
GIT_TAG master
EXCLUDE_FROM_ALL
)
FetchContent_MakeAvailable(solanaceae_util)
endif()
if (NOT TARGET solanaceae_contact)
FetchContent_Declare(solanaceae_contact
GIT_REPOSITORY https://github.com/Green-Sky/solanaceae_contact.git
GIT_TAG master
EXCLUDE_FROM_ALL
)
FetchContent_MakeAvailable(solanaceae_contact)
endif()
if (NOT TARGET solanaceae_message3)
FetchContent_Declare(solanaceae_message3
GIT_REPOSITORY https://github.com/Green-Sky/solanaceae_message3.git
GIT_TAG master
EXCLUDE_FROM_ALL
)
FetchContent_MakeAvailable(solanaceae_message3)
endif()
if (NOT TARGET solanaceae_plugin)
FetchContent_Declare(solanaceae_plugin
GIT_REPOSITORY https://github.com/Green-Sky/solanaceae_plugin.git
GIT_TAG master
EXCLUDE_FROM_ALL
)
FetchContent_MakeAvailable(solanaceae_plugin)
endif()
#if (NOT TARGET oatpp)
#set(OATPP_INSTALL OFF CACHE BOOL "" FORCE)
#set(OATPP_BUILD_TESTS OFF CACHE BOOL "" FORCE)
#set(OATPP_LINK_TEST_LIBRARY OFF CACHE BOOL "" FORCE)
#FetchContent_Declare(oatpp
#GIT_REPOSITORY https://github.com/oatpp/oatpp.git
#GIT_TAG master
#EXCLUDE_FROM_ALL
#)
#FetchContent_MakeAvailable(oatpp)
#endif()
if (NOT TARGET httplib::httplib)
FetchContent_Declare(httplib
GIT_REPOSITORY https://github.com/yhirose/cpp-httplib.git
GIT_TAG master
EXCLUDE_FROM_ALL
)
FetchContent_MakeAvailable(httplib)
endif()
if (NOT TARGET nlohmann_json::nlohmann_json)
FetchContent_Declare(json
URL https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz
URL_HASH SHA256=d6c65aca6b1ed68e7a182f4757257b107ae403032760ed6ef121c9d55e81757d
EXCLUDE_FROM_ALL
)
FetchContent_MakeAvailable(json)
endif()

11
plugins/CMakeLists.txt Normal file
View File

@ -0,0 +1,11 @@
cmake_minimum_required(VERSION 3.14...3.24 FATAL_ERROR)
add_library(plugin_llama-cpp-web SHARED
./plugin_llama-cpp-web.cpp
)
target_link_libraries(plugin_llama-cpp-web PUBLIC
solanaceae_plugin
solanaceae_llama-cpp-web
)

View File

@ -0,0 +1,62 @@
#include <solanaceae/plugin/solana_plugin_v1.h>
#include <solanaceae/llama-cpp-web/llama_cpp_web_impl.hpp>
#include <memory>
#include <iostream>
#include <limits>
static std::unique_ptr<LlamaCppWeb> g_lcw = nullptr;
constexpr const char* plugin_name = "llama-cpp-web";
extern "C" {
SOLANA_PLUGIN_EXPORT const char* solana_plugin_get_name(void) {
return plugin_name;
}
SOLANA_PLUGIN_EXPORT uint32_t solana_plugin_get_version(void) {
return SOLANA_PLUGIN_VERSION;
}
SOLANA_PLUGIN_EXPORT uint32_t solana_plugin_start(struct SolanaAPI* solana_api) {
std::cout << "PLUGIN " << plugin_name << " START()\n";
if (solana_api == nullptr) {
return 1;
}
try {
//auto* conf = PLUG_RESOLVE_INSTANCE(ConfigModelI);
// static store, could be anywhere tho
// construct with fetched dependencies
g_lcw = std::make_unique<LlamaCppWeb>();
// register types
PLUG_PROVIDE_INSTANCE(LlamaCppWeb, plugin_name, g_lcw.get());
PLUG_PROVIDE_INSTANCE(LlamaCppWebI, plugin_name, g_lcw.get());
} catch (const ResolveException& e) {
std::cerr << "PLUGIN " << plugin_name << " " << e.what << "\n";
return 2;
}
return 0;
}
SOLANA_PLUGIN_EXPORT void solana_plugin_stop(void) {
std::cout << "PLUGIN " << plugin_name << " STOP()\n";
g_lcw.reset();
}
SOLANA_PLUGIN_EXPORT float solana_plugin_tick(float delta) {
(void)delta;
//g_ircc->iterate(); // TODO: return interval, respect dcc etc
return std::numeric_limits<float>::max();
}
} // extern C

31
src/CMakeLists.txt Normal file
View File

@ -0,0 +1,31 @@
cmake_minimum_required(VERSION 3.9...3.24 FATAL_ERROR)
project(solanaceae)
add_library(solanaceae_llama-cpp-web
./solanaceae/llama-cpp-web/llama_cpp_web_interface.hpp
./solanaceae/llama-cpp-web/llama_cpp_web_impl.hpp
./solanaceae/llama-cpp-web/llama_cpp_web_impl.cpp
)
target_include_directories(solanaceae_llama-cpp-web PUBLIC .)
target_compile_features(solanaceae_llama-cpp-web PRIVATE cxx_std_20)
target_compile_features(solanaceae_llama-cpp-web INTERFACE cxx_std_17)
target_link_libraries(solanaceae_llama-cpp-web PUBLIC
httplib::httplib
nlohmann_json::nlohmann_json
solanaceae_util
solanaceae_message3
)
########################################
add_executable(test1
test1.cpp
)
target_link_libraries(test1 PUBLIC
solanaceae_llama-cpp-web
)

View File

@ -0,0 +1,152 @@
#include "./llama_cpp_web_impl.hpp"
#include <solanaceae/util/utils.hpp>
#include <nlohmann/json.hpp>
#include <chrono>
// TODO: variant that strips unicode?
static std::string convertToSafeGrammarString(std::string_view input) {
std::string res;
for (const char c : input) {
res += "\\x";
res += bin2hex({static_cast<uint8_t>(c)});
}
return res;
}
LlamaCppWeb::~LlamaCppWeb(void) {
}
bool LlamaCppWeb::isHealthy(void) {
auto res = _cli.Get("/health");
if (
res.error() != httplib::Error::Success ||
res->status != 200 ||
res->body.empty() ||
res->get_header_value("Content-Type") != "application/json"
) {
return false;
}
//std::cout << "/health code: " << res->status << " body: " << res->body << "\n";
//std::cout << "Content-Type: " << res->get_header_value("Content-Type") << "\n";
const auto response_body_j = nlohmann::json::parse(res->body, nullptr, false);
const std::string status = response_body_j.value("status", std::string{"value-not-found"});
if (status != "ok") {
std::cerr << "status not ok: " << status << "\n";
return false;
}
return true; // healthy endpoint
}
int64_t LlamaCppWeb::completeSelect(const std::string_view prompt, const std::vector<std::string_view>& possible) {
if (possible.empty()) {
return -1;
}
if (possible.size() == 1) {
return 0;
}
// see
// https://github.com/ggerganov/llama.cpp/tree/master/grammars#example
std::string grammar {"root ::= "};
bool first = true;
for (const auto& it : possible) {
if (first) {
first = false;
} else {
grammar += "| ";
}
grammar += "\"";
//grammar += it;
grammar += convertToSafeGrammarString(it);
grammar += "\" ";
}
//grammar += ")";
//std::cout << "generated grammar:\n" << grammar << "\n";
auto ret = complete(nlohmann::json{
{"prompt", prompt},
{"grammar", grammar},
{"min_p", 0.1}, // model dependent
{"repeat_penalty", 1.0}, // deactivate
{"temperature", 0.9}, // depends 1.0 for chat models
{"top_k", 60},
{"top_p", 1.0}, // disable
{"n_predict", 256}, // unlikely to ever be so high
{"seed", _rng()},
});
if (ret.empty()) {
return -2;
}
if (!ret.count("content")) {
return -3;
}
std::string selected = ret.at("content");
if (selected.empty()) {
return -4;
}
for (int64_t i = 0; i < (int64_t)possible.size(); i++) {
if (selected == possible[i]) {
return i;
}
}
std::cerr << "complete failed j:'" << ret.dump() << "'\n";
return -5;
}
std::string LlamaCppWeb::completeLine(const std::string_view prompt) {
auto ret = complete(nlohmann::json{
{"prompt", prompt},
{"min_p", 0.1}, // model dependent
{"repeat_penalty", 1.0}, // deactivate
{"temperature", 0.9}, // depends 1.0 for chat models
{"top_k", 60},
{"top_p", 1.0}, // disable
{"n_predict", 1000},
{"seed", _rng()},
{"stop", {"\n"}},
});
return ret.dump();
}
nlohmann::json LlamaCppWeb::complete(const nlohmann::json& request_j) {
if (!isHealthy()) {
return {};
}
// completions can take very long
// steaming instead would be better
_cli.set_read_timeout(std::chrono::minutes(10));
//std::cout << "j dump: '" << request_j.dump(-1, ' ', true) << "'\n";
auto res = _cli.Post("/completion", request_j.dump(-1, ' ', true), "application/json");
//std::cerr << "res.error():" << res.error() << "\n";
if (
res.error() != httplib::Error::Success ||
res->status != 200
//res->body.empty() ||
//res->get_header_value("Content-Type") != "application/json"
) {
std::cerr << "error posting\n";
return {};
}
return nlohmann::json::parse(res->body, nullptr, false);
}

View File

@ -0,0 +1,23 @@
#pragma once
#include "./llama_cpp_web_interface.hpp"
#include <httplib.h>
#include <nlohmann/json_fwd.hpp>
#include <random>
struct LlamaCppWeb : public LlamaCppWebI {
httplib::Client _cli{"http://localhost:8080"};
std::minstd_rand _rng{std::random_device{}()};
~LlamaCppWeb(void);
bool isHealthy(void) override;
int64_t completeSelect(const std::string_view prompt, const std::vector<std::string_view>& possible) override;
std::string completeLine(const std::string_view prompt) override;
// TODO: expose?
nlohmann::json complete(const nlohmann::json& request_j);
};

View File

@ -0,0 +1,20 @@
#pragma once
#include <string>
#include <string_view>
#include <vector>
struct LlamaCppWebI {
virtual ~LlamaCppWebI(void) {}
virtual bool isHealthy(void) = 0;
// TODO: add more complex api
virtual int64_t completeSelect(const std::string_view prompt, const std::vector<std::string_view>& possible) = 0;
// stops at newlines
// (and limit of 1000 and eos)
virtual std::string completeLine(const std::string_view prompt) = 0;
};

56
src/test1.cpp Normal file
View File

@ -0,0 +1,56 @@
#include <solanaceae/llama-cpp-web/llama_cpp_web_impl.hpp>
#include <nlohmann/json.hpp>
#include <iostream>
#include <random>
#include <vector>
#include <chrono>
#include <cstdint>
int main(void) {
LlamaCppWeb lcw;
if (!lcw.isHealthy()) {
std::cerr << lcw._cli.host() << " " << lcw._cli.port() << " endpoint not healthy\n";
return 1;
}
std::cerr << lcw._cli.host() << " " << lcw._cli.port() << " endpoint healthy\n";
std::cout << "The meaning of life is to"
<< lcw.complete(nlohmann::json{
{"prompt", "The meaning of life is to"},
{"min_p", 0.1}, // model dependent
{"repeat_penalty", 1.0}, // deactivate
{"temperature", 0.9}, // depends 1.0 for chat models
{"top_k", 60},
{"top_p", 1.0}, // disable
{"n_predict", 16},
{"stop", {".", "\n"}},
{"gramar", ""}
})
<< "\n";
std::cout << "-------------------------\n";
std::cout << "complete from select:\n";
std::vector<std::string_view> possible {
" die",
" die.",
" live",
" love",
" excersize",
" Hi",
};
for (size_t i = 0; i < 10; i++) {
std::cout << "The meaning of life is to";
auto res = lcw.completeSelect("The meaning of life is to", possible);
if (res < 0) {
std::cout << " error--\n";
} else {
std::cout << possible[res] << "\n";
}
}
return 0;
}