[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/llm fe064b2bc3 04/10: Create streaming method and imple
From: |
ELPA Syncer |
Subject: |
[elpa] externals/llm fe064b2bc3 04/10: Create streaming method and implement it for vertex |
Date: |
Sun, 1 Oct 2023 18:58:35 -0400 (EDT) |
branch: externals/llm
commit fe064b2bc386d070c34a73d58b61ad6d3d9b8ac2
Author: Andrew Hyatt <ahyatt@gmail.com>
Commit: Andrew Hyatt <ahyatt@gmail.com>
Create streaming method and implement it for vertex
Also create a tester for streaming
---
llm-tester.el | 13 +++--
llm-vertex.el | 173 +++++++++++++++++++++++++++++++++++++---------------------
llm.el | 19 ++++++-
3 files changed, 136 insertions(+), 69 deletions(-)
diff --git a/llm-tester.el b/llm-tester.el
index e5c6eaa02a..bbfe5faea1 100644
--- a/llm-tester.el
+++ b/llm-tester.el
@@ -111,7 +111,7 @@
(defun llm-tester-chat-streaming (provider)
"Test that PROVIDER can stream back LLM chat responses."
(message "Testing provider %s for streaming chat" (type-of provider))
- (let ((accum)
+ (let ((streamed)
(counter 0))
(llm-chat-streaming
provider
@@ -123,11 +123,12 @@
:temperature 0.5
:max-tokens 200)
(lambda (text)
- (if text (progn (message "Chunk retrieved")
- (cl-incf counter)
- (setq accum text))
- (message "SUCCESS: Provider %s provided a response %s in %d parts"
- (type-of provider) accum counter)))
+ (cl-incf counter)
+ (setq streamed text))
+ (lambda (text)
+ (message "SUCCESS: Provider %s provided a streamed response %s in %d
parts, complete text is: %s" (type-of provider) streamed counter text)
+ (if (= 0 counter)
+ (message "ERROR: Provider %s streaming request never happened!"
(type-of provider))))
(lambda (type message)
(message "ERROR: Provider %s returned an error of type %s with message
%s" (type-of provider) type message)))))
diff --git a/llm-vertex.el b/llm-vertex.el
index b878e92fbb..775d51317f 100644
--- a/llm-vertex.el
+++ b/llm-vertex.el
@@ -118,6 +118,14 @@ KEY-GENTIME keeps track of when the key was generated,
because the key must be r
'error
(llm-vertex--error-message data)))))
+(cl-defmethod llm-embedding ((provider llm-vertex) string)
+ (llm-vertex-refresh-key provider)
+ (llm-vertex--handle-response
+ (llm-request-sync (llm-vertex--embedding-url provider)
+ :headers `(("Authorization" . ,(format "Bearer %s"
(llm-vertex-key provider))))
+ :data `(("instances" . [(("content" . ,string))])))
+ #'llm-vertex--embedding-extract-response))
+
(defun llm-vertex--parameters-ui (prompt)
"Return a alist setting parameters, appropriate for the ui API.
If nothing needs to be set, return nil."
@@ -126,31 +134,54 @@ If nothing needs to be set, return nil."
(push `("temperature" . (("float_val" . ,(llm-chat-prompt-temperature
prompt)))) param-struct-alist))
(when (llm-chat-prompt-max-tokens prompt)
(push `("maxOutputTokens" . (("int_val" . ,(llm-chat-prompt-max-tokens
prompt)))) param-struct-alist))
- ;; Wrap in the "parameters" and "struct_val" keys
+ ;; Wrap in the "parameters" and "struct_val" keys
(if param-struct-alist
`(("parameters" . (("struct_val" . ,param-struct-alist)))))))
-(defun llm-vertex--parameters-v1 (prompt)
- "Return an alist setting parameters, appropriate for the v1 API.
-If nothing needs to be set, return nil."
- (let ((param-struct-alist))
- (when (llm-chat-prompt-temperature prompt)
- (push `("temperature" . ,(llm-chat-prompt-temperature prompt))
param-struct-alist))
- (when (llm-chat-prompt-max-tokens prompt)
- (push `("maxOutputTokens" . ,(llm-chat-prompt-max-tokens prompt))
param-struct-alist))
- ;; Wrap in the "parameters" and "struct_val" keys
- (if param-struct-alist
- `(("parameters" . ,param-struct-alist)))))
+(defun llm-vertex--get-chat-response-ui (response)
+ "Return the actual response from the RESPONSE struct returned."
+ (pcase (type-of response)
+ ('vector (mapconcat #'llm-vertex--get-chat-response-ui
+ response ""))
+ ('cons (let* ((outputs (cdr (assoc 'outputs response)))
+ (structVal-list (cdr (assoc 'structVal (aref outputs 0))))
+ (candidates (cdr (assoc 'candidates structVal-list)))
+ (listVal (cdr (assoc 'listVal candidates)))
+ (structVal (cdr (assoc 'structVal (aref listVal 0))))
+ (content (cdr (assoc 'content structVal)))
+ (stringVal (aref (cdr (assoc 'stringVal content)) 0)))
+ stringVal))))
-(cl-defmethod llm-embedding ((provider llm-vertex) string)
- (llm-vertex-refresh-key provider)
- (llm-vertex--handle-response
- (llm-request-sync (llm-vertex--embedding-url provider)
- :headers `(("Authorization" . ,(format "Bearer %s"
(llm-vertex-key provider))))
- :data `(("instances" . [(("content" . ,string))])))
- #'llm-vertex--embedding-extract-response))
+(defun llm--vertex--get-partial-chat-ui-repsonse (response)
+ "Return the partial response from as much of RESPONSE as we can parse.
+If the response is not parseable, return nil."
+ (with-temp-buffer
+ (insert response)
+ (let ((start (point-min))
+ (end-of-valid-chunk
+ (save-excursion
+ (goto-char (point-max))
+ (search-backward "\n," nil t)
+ (point))))
+ (when (and start end-of-valid-chunk)
+ ;; It'd be nice if our little algorithm always worked, but doesn't, so
let's
+ ;; just ignore when it fails. As long as it mostly succeeds, it
should be fine.
+ (condition-case nil
+ (when-let
+ ((json (ignore-errors
+ (json-read-from-string
+ (concat
+ (buffer-substring-no-properties
+ start end-of-valid-chunk)
+ ;; Close off the json
+ "]")))))
+ (llm-vertex--get-chat-response-ui json))
+ (error (message "Unparseable buffer saved to
*llm-vertex-unparseable*")
+ (with-current-buffer (get-buffer-create
"*llm-vertex-unparseable*")
+ (erase-buffer)
+ (insert response))))))))
-(defun llm-vertex--input-ui (prompt)
+(defun llm-vertex--chat-request-ui (prompt)
"Return an alist with chat input, appropriate for ui API.
PROMPT contains the input to the call to the chat API."
(let ((system-prompt))
@@ -161,38 +192,41 @@ PROMPT contains the input to the call to the chat API."
(mapconcat (lambda (example)
(concat "User:\n" (car example)
"\nAssistant:\n" (cdr example)))
(llm-chat-prompt-examples prompt) "\n"))
- system-prompt)))
- `(("inputs" . ((("struct_val" .
- (("messages" .
- (("list_val" .
- ,(mapcar (lambda (interaction)
- `(("struct_val" . (("content" .
- (("string_val" .
- (,(format "'\"%s\"'"
-
(llm-chat-prompt-interaction-content
-
interaction))))))
- ("author" .
- (("string_val" .
- ,(format "'\"%s\"'"
- (pcase
(llm-chat-prompt-interaction-role interaction)
- ('user
"user")
- ('system
"system")
- ('assistant
"assistant"))))))))))
- (if system-prompt
- (cons (make-llm-chat-prompt-interaction
- :role 'system
- :content (mapconcat #'identity
(nreverse system-prompt) "\n"))
- (llm-chat-prompt-interactions
prompt))
- (llm-chat-prompt-interactions
prompt))))))))))))))
-
-(defun llm-vertex--input-v1 (prompt)
- "Return an alist with chat input, appropriate for v1 API.
-PROMPT contains the input to the call to the chat API."
- (let ((param-alist))
+ system-prompt))
+ (append
+ `(("inputs" . ((("struct_val" .
+ (("messages" .
+ (("list_val" .
+ ,(mapcar (lambda (interaction)
+ `(("struct_val" . (("content" .
+ (("string_val" .
+ (,(format "'\"%s\"'"
+
(llm-chat-prompt-interaction-content
+
interaction))))))
+ ("author" .
+ (("string_val" .
+ ,(format "'\"%s\"'"
+ (pcase
(llm-chat-prompt-interaction-role interaction)
+ ('user
"user")
+ ('system
"system")
+
('assistant "assistant"))))))))))
+ (if system-prompt
+ (cons (make-llm-chat-prompt-interaction
+ :role 'system
+ :content (mapconcat #'identity
(nreverse system-prompt) "\n"))
+ (llm-chat-prompt-interactions
prompt))
+ (llm-chat-prompt-interactions
prompt))))))))))))
+ (llm-vertex--parameters-ui prompt))))
+
+(defun llm-vertex--chat-request-v1 (prompt)
+ "From PROMPT, create the data to in the vertex chat request."
+ (let ((prompt-alist)
+ (params-alist))
(when (llm-chat-prompt-context prompt)
- (push `("context" . ,(llm-chat-prompt-context prompt)) param-alist))
+ (push `("context" . ,(llm-chat-prompt-context prompt)) prompt-alist))
(when (llm-chat-prompt-examples prompt)
- (push `("examples" . ,(mapcar (lambda (example)
+ (push `("examples" . ,(apply #'vector
+ (mapcar (lambda (example)
`(("input" . (("content" . ,(car
example))))
("output" . (("content" . ,(cdr
example))))))
(llm-chat-prompt-examples prompt))))
@@ -214,13 +248,16 @@ PROMPT contains the input to the call to the chat API."
`(("instances" . [,prompt-alist])
("parameters" . ,params-alist))))
-(defun llm-vertex--chat-url (provider)
- "Return the correct url to use for PROVIDER."
- (format
"https://%s-aiplatform.googleapis.com/v1/projects/%s/locations/%s/publishers/google/models/%s:predict"
- llm-vertex-gcloud-region
- (llm-vertex-project provider)
- llm-vertex-gcloud-region
- (or (llm-vertex-chat-model provider)
"chat-bison")))
+(defun llm-vertex--chat-url (provider streaming)
+"Return the correct url to use for PROVIDER.
+If STREAMING is non-nil, use the URL for the streaming API."
+ (format
"https://%s-aiplatform.googleapis.com/%s/projects/%s/locations/%s/publishers/google/models/%s:%s"
+ llm-vertex-gcloud-region
+ (if streaming "ui" "v1")
+ (llm-vertex-project provider)
+ llm-vertex-gcloud-region
+ (or (llm-vertex-chat-model provider) "chat-bison")
+ (if streaming "serverStreamingPredict" "predict")))
(defun llm-vertex--chat-extract-response (response)
"Return the chat response contained in the server RESPONSE."
@@ -228,9 +265,9 @@ PROMPT contains the input to the call to the chat API."
(cl-defmethod llm-chat-async ((provider llm-vertex) prompt response-callback
error-callback)
(llm-vertex-refresh-key provider)
- (llm-request-async (llm-vertex--chat-url provider)
+ (llm-request-async (llm-vertex--chat-url provider nil)
:headers `(("Authorization" . ,(format "Bearer %s"
(llm-vertex-key provider))))
- :data (llm-vertex--chat-request prompt)
+ :data (llm-vertex--chat-request-v1 prompt)
:on-success (lambda (data)
(funcall response-callback
(llm-vertex--chat-extract-response data)))
:on-error (lambda (_ data)
@@ -240,11 +277,25 @@ PROMPT contains the input to the call to the chat API."
(cl-defmethod llm-chat ((provider llm-vertex) prompt)
(llm-vertex-refresh-key provider)
(llm-vertex--handle-response
- (llm-request-sync (llm-vertex--chat-url provider)
+ (llm-request-sync (llm-vertex--chat-url provider nil)
:headers `(("Authorization" . ,(format "Bearer %s"
(llm-vertex-key provider))))
- :data (llm-vertex--chat-request prompt))
+ :data (llm-vertex--chat-request-v1 prompt))
#'llm-vertex--chat-extract-response))
+(cl-defmethod llm-chat-streaming ((provider llm-vertex) prompt
partial-callback response-callback error-callback)
+ (llm-vertex-refresh-key provider)
+ (llm-request-async (llm-vertex--chat-url provider t)
+ :headers `(("Authorization" . ,(format "Bearer %s"
(llm-vertex-key provider))))
+ :data (llm-vertex--chat-request-ui prompt)
+ :on-partial (lambda (partial)
+ (when-let ((response
(llm--vertex--get-partial-chat-ui-repsonse partial)))
+ (funcall partial-callback response)))
+ :on-success (lambda (data)
+ (funcall response-callback
(llm-vertex--get-chat-response-ui data)))
+ :on-error (lambda (_ data)
+ (funcall error-callback 'error
+ (llm-vertex--error-message data)))))
+
(provide 'llm-vertex)
;;; llm-vertex.el ends here
diff --git a/llm.el b/llm.el
index d10222aa1e..2f5dea6f74 100644
--- a/llm.el
+++ b/llm.el
@@ -129,14 +129,29 @@ ERROR-CALLBACK receives the error response."
(ignore provider prompt response-callback error-callback)
(signal 'not-implemented nil))
-(cl-defgeneric llm-chat-streaming (provider prompt response-callback
error-callback)
+(cl-defgeneric llm-chat-streaming (provider prompt partial-callback
response-callback error-callback)
"Stream a response to PROMPT from PROVIDER.
PROMPT is a `llm-chat-prompt'.
+
+PARTIAL-CALLBACK is called with the output of the string response
+as it is built up. The callback is called with the entire
+response that has been received, as it is streamed back.
+
RESPONSE-CALLBACK receives the each piece of the string response.
+
ERROR-CALLBACK receives the error response."
- (ignore provider prompt response-callback error-callback)
+ (ignore provider prompt partial-callback response-callback error-callback)
(signal 'not-implemented nil))
+(cl-defmethod llm-chat-streaming ((_ (eql nil)) _ _ _ _)
+ "Catch trivial configuration mistake."
+ (error "LLM provider was nil. Please set the provider in the application
you are using"))
+
+(cl-defmethod llm-chat-streaming :before (provider _ _ _ _)
+ "Issue a warning if the LLM is non-free."
+ (when-let (info (llm-nonfree-message-info provider))
+ (llm--warn-on-nonfree (car info) (cdr info))))
+
(cl-defmethod llm-chat-async ((_ (eql nil)) _ _ _)
"Catch trivial configuration mistake."
(error "LLM provider was nil. Please set the provider in the application
you are using"))
- [elpa] externals/llm updated (8dee3d059a -> 7954a92d7c), ELPA Syncer, 2023/10/01
- [elpa] externals/llm 38a627409c 01/10: Beginning of a streaming option, with only llm-vertex implemented, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 0faa9e5cc7 02/10: Merge branch 'main' into streaming, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 46feff756f 03/10: Change request functionality to better handle streaming, ELPA Syncer, 2023/10/01
- [elpa] externals/llm fe064b2bc3 04/10: Create streaming method and implement it for vertex,
ELPA Syncer <=
- [elpa] externals/llm 95b907c32a 06/10: Add Open AI streaming and allow for raw processing on success, ELPA Syncer, 2023/10/01
- [elpa] externals/llm c7a1e06e3e 07/10: Test streaming as well in the normal suite of tests, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 7e9b1f8c60 09/10: Add streaming to README, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 7954a92d7c 10/10: Bump version to 0.3.0, and note changes in NEWS.org, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 454ec53fd3 05/10: Merge branch 'main' into streaming, ELPA Syncer, 2023/10/01
- [elpa] externals/llm a16338f31a 08/10: Add streaming to llm-fake, ELPA Syncer, 2023/10/01