diff --git a/autoload/llama.vim b/autoload/llama.vim index 3ad3459..9625f29 100644 --- a/autoload/llama.vim +++ b/autoload/llama.vim @@ -7,6 +7,7 @@ highlight default llama_hl_info guifg=#77ff2f ctermfg=119 " " endpoint: llama.cpp server endpoint " api_key: llama.cpp server api key (optional) +" model: model name in case when multiple models are loaded (optional) " n_prefix: number of lines before the cursor location to include in the local prefix " n_suffix: number of lines after the cursor location to include in the local suffix " n_predict: max number of tokens to predict @@ -46,6 +47,7 @@ highlight default llama_hl_info guifg=#77ff2f ctermfg=119 let s:default_config = { \ 'endpoint': 'http://127.0.0.1:8012/infill', \ 'api_key': '', + \ 'model': '', \ 'n_prefix': 256, \ 'n_suffix': 64, \ 'n_predict': 128, @@ -385,7 +387,7 @@ function! s:ring_update() endfor " no samplers needed here - let l:request = json_encode({ + let l:request = { \ 'input_prefix': "", \ 'input_suffix': "", \ 'input_extra': l:extra_context, @@ -398,31 +400,37 @@ function! s:ring_update() \ 't_max_prompt_ms': 1, \ 't_max_predict_ms': 1, \ 'response_fields': [""] - \ }) + \ } let l:curl_command = [ \ "curl", \ "--silent", \ "--no-buffer", + \ "--include", \ "--request", "POST", \ "--url", g:llama_config.endpoint, \ "--header", "Content-Type: application/json", \ "--data", "@-", \ ] + if exists ("g:llama_config.model") && len("g:llama_config.model") > 0 + let l:request['model'] = g:llama_config.model + end + if exists ("g:llama_config.api_key") && len("g:llama_config.api_key") > 0 call extend(l:curl_command, ['--header', 'Authorization: Bearer ' .. g:llama_config.api_key]) endif " no callbacks because we don't need to process the response + let l:request_json = json_encode(l:request) if s:ghost_text_nvim let jobid = jobstart(l:curl_command, {}) - call chansend(jobid, l:request) + call chansend(jobid, l:request_json) call chanclose(jobid, 'stdin') elseif s:ghost_text_vim let jobid = job_start(l:curl_command, {}) let channel = job_getchannel(jobid) - call ch_sendraw(channel, l:request) + call ch_sendraw(channel, l:request_json) call ch_close_in(channel) endif endfunction @@ -622,7 +630,7 @@ function! llama#fim(pos_x, pos_y, is_auto, prev, use_cache) abort \ }) endfor - let l:request = json_encode({ + let l:request = { \ 'input_prefix': l:prefix, \ 'input_suffix': l:suffix, \ 'input_extra': l:extra_ctx, @@ -650,18 +658,23 @@ function! llama#fim(pos_x, pos_y, is_auto, prev, use_cache) abort \ "truncated", \ "tokens_cached", \ ], - \ }) + \ } let l:curl_command = [ \ "curl", \ "--silent", \ "--no-buffer", + \ "--include", \ "--request", "POST", \ "--url", g:llama_config.endpoint, \ "--header", "Content-Type: application/json", \ "--data", "@-", \ ] + if exists ("g:llama_config.model") && len("g:llama_config.model") > 0 + let l:request['model'] = g:llama_config.model + end + if exists ("g:llama_config.api_key") && len("g:llama_config.api_key") > 0 call extend(l:curl_command, ['--header', 'Authorization: Bearer ' .. g:llama_config.api_key]) endif @@ -675,13 +688,14 @@ function! llama#fim(pos_x, pos_y, is_auto, prev, use_cache) abort endif " send the request asynchronously + let l:request_json = json_encode(l:request) if s:ghost_text_nvim let s:current_job = jobstart(l:curl_command, { \ 'on_stdout': function('s:fim_on_response', [l:hashes]), \ 'on_exit': function('s:fim_on_exit'), \ 'stdout_buffered': v:true \ }) - call chansend(s:current_job, l:request) + call chansend(s:current_job, l:request_json) call chanclose(s:current_job, 'stdin') elseif s:ghost_text_vim let s:current_job = job_start(l:curl_command, { @@ -690,7 +704,7 @@ function! llama#fim(pos_x, pos_y, is_auto, prev, use_cache) abort \ }) let channel = job_getchannel(s:current_job) - call ch_sendraw(channel, l:request) + call ch_sendraw(channel, l:request_json) call ch_close_in(channel) endif @@ -726,19 +740,31 @@ function! s:fim_on_response(hashes, job_id, data, event = v:null) return endif + let l:http_resp = s:curl_stdout_to_response(l:raw) + + if l:http_resp['code'] >= 400 + echohl WarningMsg + echo "HTTP error: " . l:http_resp['code'] . " " . l:http_resp['message'] + echohl None + echon l:http_resp['body'] + return + endif + + let l:body = l:http_resp['body'] + " ensure the response is valid JSON, starting with a fast check before full decode - if l:raw !~# '^\s*{' || l:raw !~# '\v"content"\s*:"' + if l:body !~# '^\s*{' || l:body !~# '\v"content"\s*:"' return endif try - let l:response = json_decode(l:raw) + let l:response = json_decode(l:body) catch return endtry " put the response in the cache for l:hash in a:hashes - call s:cache_insert(l:hash, l:raw) + call s:cache_insert(l:hash, l:body) endfor " if nothing is currently displayed - show the hint directly @@ -758,6 +784,52 @@ function! s:fim_on_exit(job_id, exit_code, event = v:null) let s:current_job = v:null endfunction +" converts curl output to response struct +" +" Example intput: +" HTTP/1.1 200 OK +" Access-Control-Allow-Origin: +" Content-Length: 906 +" Content-Type: application/json; charset=utf-8 +" Keep-Alive: timeout=5, max=100 +" Server: llama.cpp +" Date: Fri, 29 Aug 2025 16:03:46 GMT +" +" {"choices":[{"finish_reason":"stop", .... +" +" Result: +" status - 'HTTP/1.1 200 OK' +" message - 'OK' +" code - 200 +" headers - ['Access-Control-Allow-Origin:', ....] +" body - '{["choices":.....' +function! s:curl_stdout_to_response(raw) + let l:parts = split(a:raw, "\r\n\r\n") + if len(l:parts) < 2 + throw "curl output missing head part, use --include flag to fix this" + end + + let l:head = remove(l:parts, 0) + let l:body = join(l:parts, "\r\n\r\n") + + let l:headers = split(l:head, "\r\n") + let l:status = remove(l:headers, 0) + + let l:status_parts = split(l:status, ' ') + let l:http_ver = remove(l:status_parts, 0) + let l:code = remove(l:status_parts, 0) + let l:message = join(l:status_parts, ' ') + + return { + \ 'status': l:status, + \ 'message': l:message, + \ 'code': str2nr(l:code), + \ 'headers': l:headers, + \ 'body': l:body, + \ } + endfunction + + function! s:on_move() let s:t_last_move = reltime() diff --git a/doc/llama.txt b/doc/llama.txt index 67e7412..1068c00 100644 --- a/doc/llama.txt +++ b/doc/llama.txt @@ -86,6 +86,7 @@ Currently the default config is: let s:default_config = { \ 'endpoint': 'http://127.0.0.1:8012/infill', \ 'api_key': '', + \ 'model': '', \ 'n_prefix': 256, \ 'n_suffix': 64, \ 'n_predict': 128, @@ -112,6 +113,9 @@ Currently the default config is: - {api_key} llama.cpp server api key (optional) +- {model} model name in case if multiple models are + loaded (optional) + - {n_prefix} number of lines before the cursor location to include in the local prefix