simple llama.cpp server api usage works
This commit is contained in:
commit
c497b19b20
26
.gitignore
vendored
Normal file
26
.gitignore
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
.vs/
|
||||
*.o
|
||||
*.swp
|
||||
~*
|
||||
*~
|
||||
.idea/
|
||||
cmake-build-debug/
|
||||
cmake-build-debugandtest/
|
||||
cmake-build-release/
|
||||
*.stackdump
|
||||
*.coredump
|
||||
compile_commands.json
|
||||
/build*
|
||||
/result*
|
||||
.clangd
|
||||
.cache
|
||||
|
||||
.DS_Store
|
||||
.AppleDouble
|
||||
.LSOverride
|
||||
|
||||
CMakeLists.txt.user*
|
||||
CMakeCache.txt
|
||||
|
||||
*.tox
|
||||
imgui.ini
|
75
CMakeLists.txt
Normal file
75
CMakeLists.txt
Normal file
@ -0,0 +1,75 @@
|
||||
cmake_minimum_required(VERSION 3.24 FATAL_ERROR)
|
||||
|
||||
# cmake setup begin
|
||||
project(solanaceae_llama-cpp-web)
|
||||
|
||||
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
||||
set(SOLANACEAE_LLAMA-CPP-WEB_STANDALONE ON)
|
||||
# why the f do i need this >:(
|
||||
set(NOT_SOLANACEAE_LLAMA-CPP-WEB_STANDALONE OFF)
|
||||
else()
|
||||
set(SOLANACEAE_LLAMA-CPP-WEB_STANDALONE OFF)
|
||||
set(NOT_SOLANACEAE_LLAMA-CPP-WEB_STANDALONE ON)
|
||||
endif()
|
||||
message("II SOLANACEAE_LLAMA-CPP-WEB_STANDALONE " ${SOLANACEAE_LLAMA-CPP-WEB_STANDALONE})
|
||||
|
||||
option(SOLANACEAE_LLAMA-CPP-WEB_BUILD_PLUGINS "Build the llama-cpp-web plugins" ${SOLANACEAE_LLAMA-CPP-WEB_STANDALONE})
|
||||
|
||||
if (SOLANACEAE_LLAMA-CPP-WEB_STANDALONE)
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
# defaulting to debug mode, if not specified
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE "Debug")
|
||||
endif()
|
||||
|
||||
# setup my vim ycm :D
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
# more paths
|
||||
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")
|
||||
endif()
|
||||
|
||||
# external libs
|
||||
add_subdirectory(./external EXCLUDE_FROM_ALL) # before increasing warn levels, sad :(
|
||||
|
||||
if (SOLANACEAE_LLAMA-CPP-WEB_STANDALONE)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
# bump up warning levels appropriately for clang, gcc & msvc
|
||||
if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
|
||||
add_compile_options(
|
||||
-Wall -Wextra # Reasonable and standard
|
||||
-Wpedantic # Warn if non-standard C++ is used
|
||||
-Wunused # Warn on anything being unused
|
||||
#-Wconversion # Warn on type conversions that may lose data
|
||||
#-Wsign-conversion # Warn on sign conversions
|
||||
-Wshadow # Warn if a variable declaration shadows one from a parent context
|
||||
)
|
||||
|
||||
if (NOT WIN32)
|
||||
#link_libraries(-fsanitize=address)
|
||||
#link_libraries(-fsanitize=address,undefined)
|
||||
#link_libraries(-fsanitize-address-use-after-scope)
|
||||
#link_libraries(-fsanitize=undefined)
|
||||
endif()
|
||||
elseif (${CMAKE_CXX_COMPILER_ID} STREQUAL "MSVC")
|
||||
if (CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
|
||||
string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
# cmake setup end
|
||||
|
||||
add_subdirectory(./src)
|
||||
|
||||
if (SOLANACEAE_LLAMA-CPP-WEB_BUILD_PLUGINS)
|
||||
add_subdirectory(./plugins)
|
||||
endif()
|
||||
|
1
README.md
Normal file
1
README.md
Normal file
@ -0,0 +1 @@
|
||||
used llama embedded webserver api for easier portability
|
80
external/CMakeLists.txt
vendored
Normal file
80
external/CMakeLists.txt
vendored
Normal file
@ -0,0 +1,80 @@
|
||||
cmake_minimum_required(VERSION 3.24 FATAL_ERROR)
|
||||
|
||||
include(FetchContent)
|
||||
|
||||
# TODO: move entt dep into solanaceae_contact
|
||||
if (NOT TARGET EnTT::EnTT)
|
||||
FetchContent_Declare(EnTT
|
||||
GIT_REPOSITORY https://github.com/skypjack/entt.git
|
||||
GIT_TAG v3.12.2
|
||||
EXCLUDE_FROM_ALL
|
||||
)
|
||||
FetchContent_MakeAvailable(EnTT)
|
||||
endif()
|
||||
|
||||
if (NOT TARGET solanaceae_util)
|
||||
FetchContent_Declare(solanaceae_util
|
||||
GIT_REPOSITORY https://github.com/Green-Sky/solanaceae_util.git
|
||||
GIT_TAG master
|
||||
EXCLUDE_FROM_ALL
|
||||
)
|
||||
FetchContent_MakeAvailable(solanaceae_util)
|
||||
endif()
|
||||
|
||||
if (NOT TARGET solanaceae_contact)
|
||||
FetchContent_Declare(solanaceae_contact
|
||||
GIT_REPOSITORY https://github.com/Green-Sky/solanaceae_contact.git
|
||||
GIT_TAG master
|
||||
EXCLUDE_FROM_ALL
|
||||
)
|
||||
FetchContent_MakeAvailable(solanaceae_contact)
|
||||
endif()
|
||||
|
||||
if (NOT TARGET solanaceae_message3)
|
||||
FetchContent_Declare(solanaceae_message3
|
||||
GIT_REPOSITORY https://github.com/Green-Sky/solanaceae_message3.git
|
||||
GIT_TAG master
|
||||
EXCLUDE_FROM_ALL
|
||||
)
|
||||
FetchContent_MakeAvailable(solanaceae_message3)
|
||||
endif()
|
||||
|
||||
if (NOT TARGET solanaceae_plugin)
|
||||
FetchContent_Declare(solanaceae_plugin
|
||||
GIT_REPOSITORY https://github.com/Green-Sky/solanaceae_plugin.git
|
||||
GIT_TAG master
|
||||
EXCLUDE_FROM_ALL
|
||||
)
|
||||
FetchContent_MakeAvailable(solanaceae_plugin)
|
||||
endif()
|
||||
|
||||
#if (NOT TARGET oatpp)
|
||||
#set(OATPP_INSTALL OFF CACHE BOOL "" FORCE)
|
||||
#set(OATPP_BUILD_TESTS OFF CACHE BOOL "" FORCE)
|
||||
#set(OATPP_LINK_TEST_LIBRARY OFF CACHE BOOL "" FORCE)
|
||||
#FetchContent_Declare(oatpp
|
||||
#GIT_REPOSITORY https://github.com/oatpp/oatpp.git
|
||||
#GIT_TAG master
|
||||
#EXCLUDE_FROM_ALL
|
||||
#)
|
||||
#FetchContent_MakeAvailable(oatpp)
|
||||
#endif()
|
||||
|
||||
if (NOT TARGET httplib::httplib)
|
||||
FetchContent_Declare(httplib
|
||||
GIT_REPOSITORY https://github.com/yhirose/cpp-httplib.git
|
||||
GIT_TAG master
|
||||
EXCLUDE_FROM_ALL
|
||||
)
|
||||
FetchContent_MakeAvailable(httplib)
|
||||
endif()
|
||||
|
||||
if (NOT TARGET nlohmann_json::nlohmann_json)
|
||||
FetchContent_Declare(json
|
||||
URL https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz
|
||||
URL_HASH SHA256=d6c65aca6b1ed68e7a182f4757257b107ae403032760ed6ef121c9d55e81757d
|
||||
EXCLUDE_FROM_ALL
|
||||
)
|
||||
FetchContent_MakeAvailable(json)
|
||||
endif()
|
||||
|
11
plugins/CMakeLists.txt
Normal file
11
plugins/CMakeLists.txt
Normal file
@ -0,0 +1,11 @@
|
||||
cmake_minimum_required(VERSION 3.14...3.24 FATAL_ERROR)
|
||||
|
||||
add_library(plugin_llama-cpp-web SHARED
|
||||
./plugin_llama-cpp-web.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(plugin_llama-cpp-web PUBLIC
|
||||
solanaceae_plugin
|
||||
solanaceae_llama-cpp-web
|
||||
)
|
||||
|
62
plugins/plugin_llama-cpp-web.cpp
Normal file
62
plugins/plugin_llama-cpp-web.cpp
Normal file
@ -0,0 +1,62 @@
|
||||
#include <solanaceae/plugin/solana_plugin_v1.h>
|
||||
|
||||
#include <solanaceae/llama-cpp-web/llama_cpp_web_impl.hpp>
|
||||
|
||||
#include <memory>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
|
||||
static std::unique_ptr<LlamaCppWeb> g_lcw = nullptr;
|
||||
|
||||
constexpr const char* plugin_name = "llama-cpp-web";
|
||||
|
||||
extern "C" {
|
||||
|
||||
SOLANA_PLUGIN_EXPORT const char* solana_plugin_get_name(void) {
|
||||
return plugin_name;
|
||||
}
|
||||
|
||||
SOLANA_PLUGIN_EXPORT uint32_t solana_plugin_get_version(void) {
|
||||
return SOLANA_PLUGIN_VERSION;
|
||||
}
|
||||
|
||||
SOLANA_PLUGIN_EXPORT uint32_t solana_plugin_start(struct SolanaAPI* solana_api) {
|
||||
std::cout << "PLUGIN " << plugin_name << " START()\n";
|
||||
|
||||
if (solana_api == nullptr) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
try {
|
||||
//auto* conf = PLUG_RESOLVE_INSTANCE(ConfigModelI);
|
||||
|
||||
// static store, could be anywhere tho
|
||||
// construct with fetched dependencies
|
||||
g_lcw = std::make_unique<LlamaCppWeb>();
|
||||
|
||||
// register types
|
||||
PLUG_PROVIDE_INSTANCE(LlamaCppWeb, plugin_name, g_lcw.get());
|
||||
PLUG_PROVIDE_INSTANCE(LlamaCppWebI, plugin_name, g_lcw.get());
|
||||
} catch (const ResolveException& e) {
|
||||
std::cerr << "PLUGIN " << plugin_name << " " << e.what << "\n";
|
||||
return 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SOLANA_PLUGIN_EXPORT void solana_plugin_stop(void) {
|
||||
std::cout << "PLUGIN " << plugin_name << " STOP()\n";
|
||||
|
||||
g_lcw.reset();
|
||||
}
|
||||
|
||||
SOLANA_PLUGIN_EXPORT float solana_plugin_tick(float delta) {
|
||||
(void)delta;
|
||||
//g_ircc->iterate(); // TODO: return interval, respect dcc etc
|
||||
|
||||
return std::numeric_limits<float>::max();
|
||||
}
|
||||
|
||||
} // extern C
|
||||
|
31
src/CMakeLists.txt
Normal file
31
src/CMakeLists.txt
Normal file
@ -0,0 +1,31 @@
|
||||
cmake_minimum_required(VERSION 3.9...3.24 FATAL_ERROR)
|
||||
|
||||
project(solanaceae)
|
||||
|
||||
add_library(solanaceae_llama-cpp-web
|
||||
./solanaceae/llama-cpp-web/llama_cpp_web_interface.hpp
|
||||
./solanaceae/llama-cpp-web/llama_cpp_web_impl.hpp
|
||||
./solanaceae/llama-cpp-web/llama_cpp_web_impl.cpp
|
||||
)
|
||||
|
||||
target_include_directories(solanaceae_llama-cpp-web PUBLIC .)
|
||||
target_compile_features(solanaceae_llama-cpp-web PRIVATE cxx_std_20)
|
||||
target_compile_features(solanaceae_llama-cpp-web INTERFACE cxx_std_17)
|
||||
target_link_libraries(solanaceae_llama-cpp-web PUBLIC
|
||||
httplib::httplib
|
||||
nlohmann_json::nlohmann_json
|
||||
|
||||
solanaceae_util
|
||||
solanaceae_message3
|
||||
)
|
||||
|
||||
########################################
|
||||
|
||||
add_executable(test1
|
||||
test1.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(test1 PUBLIC
|
||||
solanaceae_llama-cpp-web
|
||||
)
|
||||
|
152
src/solanaceae/llama-cpp-web/llama_cpp_web_impl.cpp
Normal file
152
src/solanaceae/llama-cpp-web/llama_cpp_web_impl.cpp
Normal file
@ -0,0 +1,152 @@
|
||||
#include "./llama_cpp_web_impl.hpp"
|
||||
|
||||
#include <solanaceae/util/utils.hpp>
|
||||
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
#include <chrono>
|
||||
|
||||
// TODO: variant that strips unicode?
|
||||
static std::string convertToSafeGrammarString(std::string_view input) {
|
||||
std::string res;
|
||||
for (const char c : input) {
|
||||
res += "\\x";
|
||||
res += bin2hex({static_cast<uint8_t>(c)});
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
LlamaCppWeb::~LlamaCppWeb(void) {
|
||||
}
|
||||
|
||||
bool LlamaCppWeb::isHealthy(void) {
|
||||
auto res = _cli.Get("/health");
|
||||
if (
|
||||
res.error() != httplib::Error::Success ||
|
||||
res->status != 200 ||
|
||||
res->body.empty() ||
|
||||
res->get_header_value("Content-Type") != "application/json"
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
//std::cout << "/health code: " << res->status << " body: " << res->body << "\n";
|
||||
//std::cout << "Content-Type: " << res->get_header_value("Content-Type") << "\n";
|
||||
|
||||
const auto response_body_j = nlohmann::json::parse(res->body, nullptr, false);
|
||||
|
||||
const std::string status = response_body_j.value("status", std::string{"value-not-found"});
|
||||
if (status != "ok") {
|
||||
std::cerr << "status not ok: " << status << "\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
return true; // healthy endpoint
|
||||
}
|
||||
|
||||
int64_t LlamaCppWeb::completeSelect(const std::string_view prompt, const std::vector<std::string_view>& possible) {
|
||||
if (possible.empty()) {
|
||||
return -1;
|
||||
}
|
||||
if (possible.size() == 1) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// see
|
||||
// https://github.com/ggerganov/llama.cpp/tree/master/grammars#example
|
||||
std::string grammar {"root ::= "};
|
||||
bool first = true;
|
||||
for (const auto& it : possible) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
grammar += "| ";
|
||||
}
|
||||
grammar += "\"";
|
||||
//grammar += it;
|
||||
grammar += convertToSafeGrammarString(it);
|
||||
grammar += "\" ";
|
||||
}
|
||||
//grammar += ")";
|
||||
|
||||
//std::cout << "generated grammar:\n" << grammar << "\n";
|
||||
|
||||
auto ret = complete(nlohmann::json{
|
||||
{"prompt", prompt},
|
||||
{"grammar", grammar},
|
||||
{"min_p", 0.1}, // model dependent
|
||||
{"repeat_penalty", 1.0}, // deactivate
|
||||
{"temperature", 0.9}, // depends 1.0 for chat models
|
||||
{"top_k", 60},
|
||||
{"top_p", 1.0}, // disable
|
||||
{"n_predict", 256}, // unlikely to ever be so high
|
||||
{"seed", _rng()},
|
||||
});
|
||||
|
||||
if (ret.empty()) {
|
||||
return -2;
|
||||
}
|
||||
|
||||
if (!ret.count("content")) {
|
||||
return -3;
|
||||
}
|
||||
|
||||
std::string selected = ret.at("content");
|
||||
if (selected.empty()) {
|
||||
return -4;
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < (int64_t)possible.size(); i++) {
|
||||
if (selected == possible[i]) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
std::cerr << "complete failed j:'" << ret.dump() << "'\n";
|
||||
return -5;
|
||||
}
|
||||
|
||||
std::string LlamaCppWeb::completeLine(const std::string_view prompt) {
|
||||
auto ret = complete(nlohmann::json{
|
||||
{"prompt", prompt},
|
||||
{"min_p", 0.1}, // model dependent
|
||||
{"repeat_penalty", 1.0}, // deactivate
|
||||
{"temperature", 0.9}, // depends 1.0 for chat models
|
||||
{"top_k", 60},
|
||||
{"top_p", 1.0}, // disable
|
||||
{"n_predict", 1000},
|
||||
{"seed", _rng()},
|
||||
{"stop", {"\n"}},
|
||||
});
|
||||
|
||||
return ret.dump();
|
||||
}
|
||||
|
||||
nlohmann::json LlamaCppWeb::complete(const nlohmann::json& request_j) {
|
||||
if (!isHealthy()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
// completions can take very long
|
||||
// steaming instead would be better
|
||||
_cli.set_read_timeout(std::chrono::minutes(10));
|
||||
|
||||
//std::cout << "j dump: '" << request_j.dump(-1, ' ', true) << "'\n";
|
||||
|
||||
auto res = _cli.Post("/completion", request_j.dump(-1, ' ', true), "application/json");
|
||||
|
||||
//std::cerr << "res.error():" << res.error() << "\n";
|
||||
|
||||
if (
|
||||
res.error() != httplib::Error::Success ||
|
||||
res->status != 200
|
||||
//res->body.empty() ||
|
||||
//res->get_header_value("Content-Type") != "application/json"
|
||||
) {
|
||||
std::cerr << "error posting\n";
|
||||
return {};
|
||||
}
|
||||
|
||||
return nlohmann::json::parse(res->body, nullptr, false);
|
||||
}
|
||||
|
23
src/solanaceae/llama-cpp-web/llama_cpp_web_impl.hpp
Normal file
23
src/solanaceae/llama-cpp-web/llama_cpp_web_impl.hpp
Normal file
@ -0,0 +1,23 @@
|
||||
#pragma once
|
||||
|
||||
#include "./llama_cpp_web_interface.hpp"
|
||||
|
||||
#include <httplib.h>
|
||||
#include <nlohmann/json_fwd.hpp>
|
||||
|
||||
#include <random>
|
||||
|
||||
struct LlamaCppWeb : public LlamaCppWebI {
|
||||
httplib::Client _cli{"http://localhost:8080"};
|
||||
std::minstd_rand _rng{std::random_device{}()};
|
||||
|
||||
~LlamaCppWeb(void);
|
||||
|
||||
bool isHealthy(void) override;
|
||||
int64_t completeSelect(const std::string_view prompt, const std::vector<std::string_view>& possible) override;
|
||||
std::string completeLine(const std::string_view prompt) override;
|
||||
|
||||
// TODO: expose?
|
||||
nlohmann::json complete(const nlohmann::json& request_j);
|
||||
};
|
||||
|
20
src/solanaceae/llama-cpp-web/llama_cpp_web_interface.hpp
Normal file
20
src/solanaceae/llama-cpp-web/llama_cpp_web_interface.hpp
Normal file
@ -0,0 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
struct LlamaCppWebI {
|
||||
virtual ~LlamaCppWebI(void) {}
|
||||
|
||||
virtual bool isHealthy(void) = 0;
|
||||
|
||||
// TODO: add more complex api
|
||||
|
||||
virtual int64_t completeSelect(const std::string_view prompt, const std::vector<std::string_view>& possible) = 0;
|
||||
|
||||
// stops at newlines
|
||||
// (and limit of 1000 and eos)
|
||||
virtual std::string completeLine(const std::string_view prompt) = 0;
|
||||
};
|
||||
|
56
src/test1.cpp
Normal file
56
src/test1.cpp
Normal file
@ -0,0 +1,56 @@
|
||||
#include <solanaceae/llama-cpp-web/llama_cpp_web_impl.hpp>
|
||||
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
|
||||
int main(void) {
|
||||
LlamaCppWeb lcw;
|
||||
|
||||
if (!lcw.isHealthy()) {
|
||||
std::cerr << lcw._cli.host() << " " << lcw._cli.port() << " endpoint not healthy\n";
|
||||
return 1;
|
||||
}
|
||||
std::cerr << lcw._cli.host() << " " << lcw._cli.port() << " endpoint healthy\n";
|
||||
|
||||
std::cout << "The meaning of life is to"
|
||||
<< lcw.complete(nlohmann::json{
|
||||
{"prompt", "The meaning of life is to"},
|
||||
{"min_p", 0.1}, // model dependent
|
||||
{"repeat_penalty", 1.0}, // deactivate
|
||||
{"temperature", 0.9}, // depends 1.0 for chat models
|
||||
{"top_k", 60},
|
||||
{"top_p", 1.0}, // disable
|
||||
{"n_predict", 16},
|
||||
{"stop", {".", "\n"}},
|
||||
{"gramar", ""}
|
||||
})
|
||||
<< "\n";
|
||||
|
||||
std::cout << "-------------------------\n";
|
||||
|
||||
std::cout << "complete from select:\n";
|
||||
std::vector<std::string_view> possible {
|
||||
" die",
|
||||
" die.",
|
||||
" live",
|
||||
" love",
|
||||
" excersize",
|
||||
" Hi",
|
||||
};
|
||||
for (size_t i = 0; i < 10; i++) {
|
||||
std::cout << "The meaning of life is to";
|
||||
auto res = lcw.completeSelect("The meaning of life is to", possible);
|
||||
if (res < 0) {
|
||||
std::cout << " error--\n";
|
||||
} else {
|
||||
std::cout << possible[res] << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user