From 55e3a8a6f8afa23faafaaba1433d389f01530e54 Mon Sep 17 00:00:00 2001 From: Green Sky Date: Sun, 6 Oct 2024 11:44:19 +0200 Subject: [PATCH] forgotten wip changes --- src/solanaceae/llama-cpp-web/llama_cpp_web_impl.cpp | 13 +++++++++---- src/solanaceae/llama-cpp-web/llama_cpp_web_impl.hpp | 2 +- src/test1.cpp | 12 ++++++------ 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/solanaceae/llama-cpp-web/llama_cpp_web_impl.cpp b/src/solanaceae/llama-cpp-web/llama_cpp_web_impl.cpp index 45678ca..1a94505 100644 --- a/src/solanaceae/llama-cpp-web/llama_cpp_web_impl.cpp +++ b/src/solanaceae/llama-cpp-web/llama_cpp_web_impl.cpp @@ -77,7 +77,7 @@ int64_t LlamaCppWeb::completeSelect(const std::string_view prompt, const std::ve } //grammar += ")"; - //std::cout << "generated grammar:\n" << grammar << "\n"; + std::cerr << "generated grammar:\n" << grammar << "\n"; auto ret = complete(nlohmann::json{ {"prompt", prompt}, @@ -89,19 +89,23 @@ int64_t LlamaCppWeb::completeSelect(const std::string_view prompt, const std::ve {"top_p", 1.0}, // disable {"n_predict", 256}, // unlikely to ever be so high {"seed", _rng()}, + {"ignore_eos", true}, {"cache_prompt", static_cast(_use_server_cache)}, }); if (ret.empty()) { + assert("ret empty" && false); return -2; } if (!ret.count("content")) { + assert("no content" && false); return -3; } std::string selected = ret.at("content"); if (selected.empty()) { + assert("content empty" && false); return -4; } @@ -111,6 +115,7 @@ int64_t LlamaCppWeb::completeSelect(const std::string_view prompt, const std::ve } } + std::cerr << "content does not contain match\n"; std::cerr << "complete failed j:'" << ret.dump() << "'\n"; return -5; } @@ -125,7 +130,7 @@ std::string LlamaCppWeb::completeLine(const std::string_view prompt) { {"top_p", 1.0}, // disable {"n_predict", 400}, {"seed", _rng()}, - {"stop", {"\n"}}, + {"stop", nlohmann::json::array({"\n"})}, {"cache_prompt", static_cast(_use_server_cache)}, }); @@ -147,7 +152,7 @@ nlohmann::json LlamaCppWeb::complete(const nlohmann::json& request_j) { // steaming instead would be better _cli.set_read_timeout(std::chrono::minutes(10)); - //std::cout << "j dump: '" << request_j.dump(-1, ' ', true) << "'\n"; + std::cerr << "j dump: '" << request_j.dump(-1, ' ', true) << "'\n"; auto res = _cli.Post("/completion", request_j.dump(-1, ' ', true), "application/json"); @@ -159,7 +164,7 @@ nlohmann::json LlamaCppWeb::complete(const nlohmann::json& request_j) { //res->body.empty() || //res->get_header_value("Content-Type") != "application/json" ) { - std::cerr << "error posting\n"; + std::cerr << "error posting: '" << res->body << "'\n"; return {}; } diff --git a/src/solanaceae/llama-cpp-web/llama_cpp_web_impl.hpp b/src/solanaceae/llama-cpp-web/llama_cpp_web_impl.hpp index 1485874..893e337 100644 --- a/src/solanaceae/llama-cpp-web/llama_cpp_web_impl.hpp +++ b/src/solanaceae/llama-cpp-web/llama_cpp_web_impl.hpp @@ -19,7 +19,7 @@ struct LlamaCppWeb : public TextCompletionI { // this is a bad idea static std::minstd_rand thread_local _rng; - std::atomic _use_server_cache {true}; + std::atomic _use_server_cache {false}; LlamaCppWeb( ConfigModelI& conf diff --git a/src/test1.cpp b/src/test1.cpp index 293dab1..dd72335 100644 --- a/src/test1.cpp +++ b/src/test1.cpp @@ -20,7 +20,7 @@ int main(void) { } std::cerr << lcw._cli.host() << " " << lcw._cli.port() << " endpoint healthy\n"; - std::cout << "The meaning of life is to" + std::cerr << "The meaning of life is to" << lcw.complete(nlohmann::json{ {"prompt", "The meaning of life is to"}, {"min_p", 0.1}, // model dependent @@ -34,9 +34,9 @@ int main(void) { }) << "\n"; - std::cout << "-------------------------\n"; + std::cerr << "-------------------------\n"; - std::cout << "complete from select:\n"; + std::cerr << "complete from select:\n"; std::vector possible { " die", " die.", @@ -46,12 +46,12 @@ int main(void) { " Hi", }; for (size_t i = 0; i < 10; i++) { - std::cout << "The meaning of life is to"; + std::cerr << "The meaning of life is to"; auto res = lcw.completeSelect("The meaning of life is to", possible); if (res < 0) { - std::cout << " error--\n"; + std::cerr << " error\n"; } else { - std::cout << possible[res] << "\n"; + std::cerr << possible[res] << "\n"; } }