diff --git a/autoload/llama.vim b/autoload/llama.vim
index 3ad3459..9625f29 100644
--- a/autoload/llama.vim
+++ b/autoload/llama.vim
@@ -7,6 +7,7 @@ highlight default llama_hl_info guifg=#77ff2f ctermfg=119
 "
 "   endpoint:         llama.cpp server endpoint
 "   api_key:          llama.cpp server api key (optional)
+"   model:            model name in case when multiple models are loaded (optional)
 "   n_prefix:         number of lines before the cursor location to include in the local prefix
 "   n_suffix:         number of lines after  the cursor location to include in the local suffix
 "   n_predict:        max number of tokens to predict
@@ -46,6 +47,7 @@ highlight default llama_hl_info guifg=#77ff2f ctermfg=119
 let s:default_config = {
     \ 'endpoint':           'http://127.0.0.1:8012/infill',
     \ 'api_key':            '',
+    \ 'model':              '',
     \ 'n_prefix':           256,
     \ 'n_suffix':           64,
     \ 'n_predict':          128,
@@ -385,7 +387,7 @@ function! s:ring_update()
     endfor
 
     " no samplers needed here
-    let l:request = json_encode({
+    let l:request = {
         \ 'input_prefix':     "",
         \ 'input_suffix':     "",
         \ 'input_extra':      l:extra_context,
@@ -398,31 +400,37 @@ function! s:ring_update()
         \ 't_max_prompt_ms':  1,
         \ 't_max_predict_ms': 1,
         \ 'response_fields':  [""]
-        \ })
+        \ }
 
     let l:curl_command = [
         \ "curl",
         \ "--silent",
         \ "--no-buffer",
+        \ "--include",
         \ "--request", "POST",
         \ "--url", g:llama_config.endpoint,
         \ "--header", "Content-Type: application/json",
         \ "--data", "@-",
         \ ]
 
+    if exists ("g:llama_config.model") && len("g:llama_config.model") > 0
+        let l:request['model'] = g:llama_config.model
+    end
+
     if exists ("g:llama_config.api_key") && len("g:llama_config.api_key") > 0
         call extend(l:curl_command, ['--header', 'Authorization: Bearer ' .. g:llama_config.api_key])
     endif
 
     " no callbacks because we don't need to process the response
+    let l:request_json = json_encode(l:request)
     if s:ghost_text_nvim
         let jobid = jobstart(l:curl_command, {})
-        call chansend(jobid, l:request)
+        call chansend(jobid, l:request_json)
         call chanclose(jobid, 'stdin')
     elseif s:ghost_text_vim
         let jobid = job_start(l:curl_command, {})
         let channel = job_getchannel(jobid)
-        call ch_sendraw(channel, l:request)
+        call ch_sendraw(channel, l:request_json)
         call ch_close_in(channel)
     endif
 endfunction
@@ -622,7 +630,7 @@ function! llama#fim(pos_x, pos_y, is_auto, prev, use_cache) abort
             \ })
     endfor
 
-    let l:request = json_encode({
+    let l:request = {
         \ 'input_prefix':     l:prefix,
         \ 'input_suffix':     l:suffix,
         \ 'input_extra':      l:extra_ctx,
@@ -650,18 +658,23 @@ function! llama#fim(pos_x, pos_y, is_auto, prev, use_cache) abort
         \                       "truncated",
         \                       "tokens_cached",
         \                     ],
-        \ })
+        \ }
 
     let l:curl_command = [
         \ "curl",
         \ "--silent",
         \ "--no-buffer",
+        \ "--include",
         \ "--request", "POST",
         \ "--url", g:llama_config.endpoint,
         \ "--header", "Content-Type: application/json",
         \ "--data", "@-",
         \ ]
 
+    if exists ("g:llama_config.model") && len("g:llama_config.model") > 0
+        let l:request['model'] = g:llama_config.model
+    end
+
     if exists ("g:llama_config.api_key") && len("g:llama_config.api_key") > 0
         call extend(l:curl_command, ['--header', 'Authorization: Bearer ' .. g:llama_config.api_key])
     endif
@@ -675,13 +688,14 @@ function! llama#fim(pos_x, pos_y, is_auto, prev, use_cache) abort
     endif
 
     " send the request asynchronously
+    let l:request_json = json_encode(l:request)
     if s:ghost_text_nvim
         let s:current_job = jobstart(l:curl_command, {
             \ 'on_stdout': function('s:fim_on_response', [l:hashes]),
             \ 'on_exit':   function('s:fim_on_exit'),
             \ 'stdout_buffered': v:true
             \ })
-        call chansend(s:current_job, l:request)
+        call chansend(s:current_job, l:request_json)
         call chanclose(s:current_job, 'stdin')
     elseif s:ghost_text_vim
         let s:current_job = job_start(l:curl_command, {
@@ -690,7 +704,7 @@ function! llama#fim(pos_x, pos_y, is_auto, prev, use_cache) abort
             \ })
 
         let channel = job_getchannel(s:current_job)
-        call ch_sendraw(channel, l:request)
+        call ch_sendraw(channel, l:request_json)
         call ch_close_in(channel)
     endif
 
@@ -726,19 +740,31 @@ function! s:fim_on_response(hashes, job_id, data, event = v:null)
         return
     endif
 
+    let l:http_resp = s:curl_stdout_to_response(l:raw)
+
+    if l:http_resp['code'] >= 400
+        echohl WarningMsg
+        echo "HTTP error: " . l:http_resp['code'] . " " . l:http_resp['message']
+        echohl None
+        echon l:http_resp['body']
+        return
+    endif
+
+    let l:body = l:http_resp['body']
+
     " ensure the response is valid JSON, starting with a fast check before full decode
-    if l:raw !~# '^\s*{' || l:raw !~# '\v"content"\s*:"'
+    if l:body !~# '^\s*{' || l:body !~# '\v"content"\s*:"'
         return
     endif
     try
-        let l:response = json_decode(l:raw)
+        let l:response = json_decode(l:body)
     catch
         return
     endtry
 
     " put the response in the cache
     for l:hash in a:hashes
-        call s:cache_insert(l:hash, l:raw)
+        call s:cache_insert(l:hash, l:body)
     endfor
 
     " if nothing is currently displayed - show the hint directly
@@ -758,6 +784,52 @@ function! s:fim_on_exit(job_id, exit_code, event = v:null)
     let s:current_job = v:null
 endfunction
 
+" converts curl output to response struct
+"
+" Example intput:
+"   HTTP/1.1 200 OK
+"   Access-Control-Allow-Origin:
+"   Content-Length: 906
+"   Content-Type: application/json; charset=utf-8
+"   Keep-Alive: timeout=5, max=100
+"   Server: llama.cpp
+"   Date: Fri, 29 Aug 2025 16:03:46 GMT
+"
+"   {"choices":[{"finish_reason":"stop", ....
+" 
+" Result:
+"   status -  'HTTP/1.1 200 OK'
+"   message - 'OK'
+"   code -     200 
+"   headers -  ['Access-Control-Allow-Origin:', ....]
+"   body    -  '{["choices":.....'
+function! s:curl_stdout_to_response(raw)
+    let l:parts = split(a:raw, "\r\n\r\n")
+    if len(l:parts) < 2
+        throw "curl output missing head part, use --include flag to fix this"
+    end
+
+    let l:head = remove(l:parts, 0)
+    let l:body = join(l:parts, "\r\n\r\n")
+
+    let l:headers = split(l:head, "\r\n")
+    let l:status = remove(l:headers, 0)
+
+    let l:status_parts = split(l:status, ' ')
+    let l:http_ver = remove(l:status_parts, 0)
+    let l:code = remove(l:status_parts, 0)
+    let l:message = join(l:status_parts, ' ')
+
+    return {
+          \ 'status':      l:status,
+          \ 'message':     l:message,
+          \ 'code':        str2nr(l:code),
+          \ 'headers':     l:headers,
+          \ 'body':        l:body,
+          \ }
+  endfunction
+
+
 function! s:on_move()
     let s:t_last_move = reltime()
 
diff --git a/doc/llama.txt b/doc/llama.txt
index 67e7412..1068c00 100644
--- a/doc/llama.txt
+++ b/doc/llama.txt
@@ -86,6 +86,7 @@ Currently the default config is:
 		let s:default_config = {
 		    \ 'endpoint':           'http://127.0.0.1:8012/infill',
 		    \ 'api_key':            '',
+		    \ 'model':              '',
 		    \ 'n_prefix':           256,
 		    \ 'n_suffix':           64,
 		    \ 'n_predict':          128,
@@ -112,6 +113,9 @@ Currently the default config is:
 
 - {api_key}				llama.cpp server api key (optional)
 
+- {model}				model name in case if multiple models are
+						loaded (optional)
+
 - {n_prefix}			number of lines before the cursor location to include
 						in the local prefix