[elpa] externals/llm fe064b2bc3 04/10: Create streaming method and imple

emacs-elpa-diffs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/llm fe064b2bc3 04/10: Create streaming method and imple

From:	ELPA Syncer
Subject:	[elpa] externals/llm fe064b2bc3 04/10: Create streaming method and implement it for vertex
Date:	Sun, 1 Oct 2023 18:58:35 -0400 (EDT)

branch: externals/llm
commit fe064b2bc386d070c34a73d58b61ad6d3d9b8ac2
Author: Andrew Hyatt <ahyatt@gmail.com>
Commit: Andrew Hyatt <ahyatt@gmail.com>

    Create streaming method and implement it for vertex
    
    Also create a tester for streaming
---
 llm-tester.el |  13 +++--
 llm-vertex.el | 173 +++++++++++++++++++++++++++++++++++++---------------------
 llm.el        |  19 ++++++-
 3 files changed, 136 insertions(+), 69 deletions(-)

diff --git a/llm-tester.el b/llm-tester.el
index e5c6eaa02a..bbfe5faea1 100644
--- a/llm-tester.el
+++ b/llm-tester.el
@@ -111,7 +111,7 @@
 (defun llm-tester-chat-streaming (provider)
   "Test that PROVIDER can stream back LLM chat responses."
   (message "Testing provider %s for streaming chat" (type-of provider))
-  (let ((accum)
+  (let ((streamed)
         (counter 0))
     (llm-chat-streaming
      provider
@@ -123,11 +123,12 @@
       :temperature 0.5
       :max-tokens 200)
      (lambda (text)
-       (if text (progn (message "Chunk retrieved")
-                       (cl-incf counter)
-                       (setq accum text))
-         (message "SUCCESS: Provider %s provided a response %s in %d parts"
-                  (type-of provider) accum counter)))
+       (cl-incf counter)
+       (setq streamed text))
+     (lambda (text)
+       (message "SUCCESS: Provider %s provided a streamed response %s in %d 
parts, complete text is: %s" (type-of provider) streamed counter text)
+       (if (= 0 counter)
+           (message "ERROR: Provider %s streaming request never happened!" 
(type-of provider))))
      (lambda (type message)
        (message "ERROR: Provider %s returned an error of type %s with message 
%s" (type-of provider) type message)))))
 
diff --git a/llm-vertex.el b/llm-vertex.el
index b878e92fbb..775d51317f 100644
--- a/llm-vertex.el
+++ b/llm-vertex.el
@@ -118,6 +118,14 @@ KEY-GENTIME keeps track of when the key was generated, 
because the key must be r
                                           'error
                                           (llm-vertex--error-message data)))))
 
+(cl-defmethod llm-embedding ((provider llm-vertex) string)
+  (llm-vertex-refresh-key provider)
+  (llm-vertex--handle-response
+   (llm-request-sync (llm-vertex--embedding-url provider)
+                     :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-vertex-key provider))))
+                     :data `(("instances" . [(("content" . ,string))])))
+   #'llm-vertex--embedding-extract-response))
+
 (defun llm-vertex--parameters-ui (prompt)
   "Return a alist setting parameters, appropriate for the ui API.
 If nothing needs to be set, return nil."
@@ -126,31 +134,54 @@ If nothing needs to be set, return nil."
       (push `("temperature" . (("float_val" . ,(llm-chat-prompt-temperature 
prompt)))) param-struct-alist))
     (when (llm-chat-prompt-max-tokens prompt)
       (push `("maxOutputTokens" . (("int_val" . ,(llm-chat-prompt-max-tokens 
prompt)))) param-struct-alist))
-      ;; Wrap in the "parameters" and "struct_val" keys
+    ;; Wrap in the "parameters" and "struct_val" keys
     (if param-struct-alist
         `(("parameters" . (("struct_val" . ,param-struct-alist)))))))
 
-(defun llm-vertex--parameters-v1 (prompt)
-  "Return an alist setting parameters, appropriate for the v1 API.
-If nothing needs to be set, return nil."
-  (let ((param-struct-alist))
-    (when (llm-chat-prompt-temperature prompt)
-      (push `("temperature" . ,(llm-chat-prompt-temperature prompt)) 
param-struct-alist))
-    (when (llm-chat-prompt-max-tokens prompt)
-      (push `("maxOutputTokens" . ,(llm-chat-prompt-max-tokens prompt)) 
param-struct-alist))
-      ;; Wrap in the "parameters" and "struct_val" keys
-    (if param-struct-alist
-        `(("parameters" . ,param-struct-alist)))))
+(defun llm-vertex--get-chat-response-ui (response)
+  "Return the actual response from the RESPONSE struct returned."
+  (pcase (type-of response)
+    ('vector (mapconcat #'llm-vertex--get-chat-response-ui
+                        response ""))
+    ('cons (let* ((outputs (cdr (assoc 'outputs response)))
+                  (structVal-list (cdr (assoc 'structVal (aref outputs 0))))
+                  (candidates (cdr (assoc 'candidates structVal-list)))
+                  (listVal (cdr (assoc 'listVal candidates)))
+                  (structVal (cdr (assoc 'structVal (aref listVal 0))))
+                  (content (cdr (assoc 'content structVal)))
+                  (stringVal (aref (cdr (assoc 'stringVal content)) 0)))
+             stringVal))))
 
-(cl-defmethod llm-embedding ((provider llm-vertex) string)
-  (llm-vertex-refresh-key provider)
-  (llm-vertex--handle-response
-   (llm-request-sync (llm-vertex--embedding-url provider)
-                     :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-vertex-key provider))))
-                     :data `(("instances" . [(("content" . ,string))])))
-   #'llm-vertex--embedding-extract-response))
+(defun llm--vertex--get-partial-chat-ui-repsonse (response)
+  "Return the partial response from as much of RESPONSE as we can parse.
+If the response is not parseable, return nil."
+  (with-temp-buffer
+    (insert response)
+    (let ((start (point-min))
+          (end-of-valid-chunk
+           (save-excursion
+             (goto-char (point-max))
+             (search-backward "\n," nil t)
+             (point))))
+      (when (and start end-of-valid-chunk)
+        ;; It'd be nice if our little algorithm always worked, but doesn't, so 
let's
+        ;; just ignore when it fails.  As long as it mostly succeeds, it 
should be fine.
+        (condition-case nil
+            (when-let
+                ((json (ignore-errors
+                        (json-read-from-string
+                         (concat
+                          (buffer-substring-no-properties
+                           start end-of-valid-chunk)
+                          ;; Close off the json
+                          "]")))))
+              (llm-vertex--get-chat-response-ui json))
+          (error (message "Unparseable buffer saved to 
*llm-vertex-unparseable*")
+                 (with-current-buffer (get-buffer-create 
"*llm-vertex-unparseable*")
+                     (erase-buffer)
+                     (insert response))))))))
 
-(defun llm-vertex--input-ui (prompt)
+(defun llm-vertex--chat-request-ui (prompt)
   "Return an alist with chat input, appropriate for ui API.
 PROMPT contains the input to the call to the chat API."
   (let ((system-prompt))
@@ -161,38 +192,41 @@ PROMPT contains the input to the call to the chat API."
                     (mapconcat (lambda (example)
                                  (concat "User:\n" (car example) 
"\nAssistant:\n" (cdr example)))
                                (llm-chat-prompt-examples prompt) "\n"))
-            system-prompt)))
-    `(("inputs" . ((("struct_val" .
-                   (("messages" .
-                     (("list_val" .
-                       ,(mapcar (lambda (interaction)
-                                  `(("struct_val" . (("content" .
-                                                      (("string_val" .
-                                                        (,(format "'\"%s\"'"
-                                                                  
(llm-chat-prompt-interaction-content
-                                                                   
interaction))))))
-                                                     ("author" .
-                                                      (("string_val" .
-                                                        ,(format "'\"%s\"'"
-                                                                 (pcase 
(llm-chat-prompt-interaction-role interaction)
-                                                                   ('user 
"user")
-                                                                   ('system 
"system")
-                                                                   ('assistant 
"assistant"))))))))))
-                                (if system-prompt
-                                    (cons (make-llm-chat-prompt-interaction
-                                           :role 'system
-                                           :content (mapconcat #'identity 
(nreverse system-prompt) "\n"))
-                                          (llm-chat-prompt-interactions 
prompt))
-                                  (llm-chat-prompt-interactions 
prompt))))))))))))))
-
-(defun llm-vertex--input-v1 (prompt)
-  "Return an alist with chat input, appropriate for v1 API.
-PROMPT contains the input to the call to the chat API."
-  (let ((param-alist))
+            system-prompt))
+    (append
+     `(("inputs" . ((("struct_val" .
+                     (("messages" .
+                       (("list_val" .
+                         ,(mapcar (lambda (interaction)
+                                    `(("struct_val" . (("content" .
+                                                        (("string_val" .
+                                                          (,(format "'\"%s\"'"
+                                                                    
(llm-chat-prompt-interaction-content
+                                                                     
interaction))))))
+                                                       ("author" .
+                                                        (("string_val" .
+                                                          ,(format "'\"%s\"'"
+                                                                   (pcase 
(llm-chat-prompt-interaction-role interaction)
+                                                                     ('user 
"user")
+                                                                     ('system 
"system")
+                                                                     
('assistant "assistant"))))))))))
+                                  (if system-prompt
+                                      (cons (make-llm-chat-prompt-interaction
+                                             :role 'system
+                                             :content (mapconcat #'identity 
(nreverse system-prompt) "\n"))
+                                            (llm-chat-prompt-interactions 
prompt))
+                                    (llm-chat-prompt-interactions 
prompt))))))))))))
+     (llm-vertex--parameters-ui prompt))))
+
+(defun llm-vertex--chat-request-v1 (prompt)
+  "From PROMPT, create the data to in the vertex chat request."
+  (let ((prompt-alist)
+        (params-alist))
     (when (llm-chat-prompt-context prompt)
-      (push `("context" . ,(llm-chat-prompt-context prompt)) param-alist))
+      (push `("context" . ,(llm-chat-prompt-context prompt)) prompt-alist))
     (when (llm-chat-prompt-examples prompt)
-      (push `("examples" . ,(mapcar (lambda (example)
+      (push `("examples" . ,(apply #'vector
+                                   (mapcar (lambda (example)
                                       `(("input" . (("content" . ,(car 
example))))
                                         ("output" . (("content" . ,(cdr 
example))))))
                                            (llm-chat-prompt-examples prompt))))
@@ -214,13 +248,16 @@ PROMPT contains the input to the call to the chat API."
     `(("instances" . [,prompt-alist])
       ("parameters" . ,params-alist))))
 
-(defun llm-vertex--chat-url (provider)
-  "Return the correct url to use for PROVIDER."
-  (format 
"https://%s-aiplatform.googleapis.com/v1/projects/%s/locations/%s/publishers/google/models/%s:predict";
-                                   llm-vertex-gcloud-region
-                                   (llm-vertex-project provider)
-                                   llm-vertex-gcloud-region
-                                   (or (llm-vertex-chat-model provider) 
"chat-bison")))
+(defun llm-vertex--chat-url (provider streaming)
+"Return the correct url to use for PROVIDER.
+If STREAMING is non-nil, use the URL for the streaming API."
+  (format 
"https://%s-aiplatform.googleapis.com/%s/projects/%s/locations/%s/publishers/google/models/%s:%s";
+          llm-vertex-gcloud-region
+          (if streaming "ui" "v1")
+          (llm-vertex-project provider)
+          llm-vertex-gcloud-region
+          (or (llm-vertex-chat-model provider) "chat-bison")
+          (if streaming "serverStreamingPredict" "predict")))
 
 (defun llm-vertex--chat-extract-response (response)
   "Return the chat response contained in the server RESPONSE."
@@ -228,9 +265,9 @@ PROMPT contains the input to the call to the chat API."
 
 (cl-defmethod llm-chat-async ((provider llm-vertex) prompt response-callback 
error-callback)
   (llm-vertex-refresh-key provider)
-  (llm-request-async (llm-vertex--chat-url provider)
+  (llm-request-async (llm-vertex--chat-url provider nil)
                      :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-vertex-key provider))))
-                     :data (llm-vertex--chat-request prompt)
+                     :data (llm-vertex--chat-request-v1 prompt)
                      :on-success (lambda (data)
                                    (funcall response-callback 
(llm-vertex--chat-extract-response data)))
                      :on-error (lambda (_ data)
@@ -240,11 +277,25 @@ PROMPT contains the input to the call to the chat API."
 (cl-defmethod llm-chat ((provider llm-vertex) prompt)
   (llm-vertex-refresh-key provider)
   (llm-vertex--handle-response
-   (llm-request-sync (llm-vertex--chat-url provider)
+   (llm-request-sync (llm-vertex--chat-url provider nil)
                      :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-vertex-key provider))))
-                     :data (llm-vertex--chat-request prompt))
+                     :data (llm-vertex--chat-request-v1 prompt))
    #'llm-vertex--chat-extract-response))
 
+(cl-defmethod llm-chat-streaming ((provider llm-vertex) prompt 
partial-callback response-callback error-callback)
+  (llm-vertex-refresh-key provider)
+  (llm-request-async (llm-vertex--chat-url provider t)
+                     :headers `(("Authorization" . ,(format "Bearer %s" 
(llm-vertex-key provider))))
+                     :data (llm-vertex--chat-request-ui prompt)
+                     :on-partial (lambda (partial)
+                                   (when-let ((response 
(llm--vertex--get-partial-chat-ui-repsonse partial)))
+                                     (funcall partial-callback response)))
+                     :on-success (lambda (data)
+                                   (funcall response-callback 
(llm-vertex--get-chat-response-ui data)))
+                     :on-error (lambda (_ data)
+                                 (funcall error-callback 'error
+                                          (llm-vertex--error-message data)))))
+
 (provide 'llm-vertex)
 
 ;;; llm-vertex.el ends here
diff --git a/llm.el b/llm.el
index d10222aa1e..2f5dea6f74 100644
--- a/llm.el
+++ b/llm.el
@@ -129,14 +129,29 @@ ERROR-CALLBACK receives the error response."
   (ignore provider prompt response-callback error-callback)
   (signal 'not-implemented nil))
 
-(cl-defgeneric llm-chat-streaming (provider prompt response-callback 
error-callback)
+(cl-defgeneric llm-chat-streaming (provider prompt partial-callback 
response-callback error-callback)
   "Stream a response to PROMPT from PROVIDER.
 PROMPT is a `llm-chat-prompt'.
+
+PARTIAL-CALLBACK is called with the output of the string response
+as it is built up. The callback is called with the entire
+response that has been received, as it is streamed back.
+
 RESPONSE-CALLBACK receives the each piece of the string response.
+
 ERROR-CALLBACK receives the error response."
-  (ignore provider prompt response-callback error-callback)
+  (ignore provider prompt partial-callback response-callback error-callback)
   (signal 'not-implemented nil))
 
+(cl-defmethod llm-chat-streaming ((_ (eql nil)) _ _ _ _)
+  "Catch trivial configuration mistake."
+  (error "LLM provider was nil.  Please set the provider in the application 
you are using"))
+
+(cl-defmethod llm-chat-streaming :before (provider _ _ _ _)
+  "Issue a warning if the LLM is non-free."
+  (when-let (info (llm-nonfree-message-info provider))
+    (llm--warn-on-nonfree (car info) (cdr info))))
+
 (cl-defmethod llm-chat-async ((_ (eql nil)) _ _ _)
   "Catch trivial configuration mistake."
   (error "LLM provider was nil.  Please set the provider in the application 
you are using"))

[Prev in Thread]

Current Thread

[Next in Thread]

[elpa] externals/llm updated (8dee3d059a -> 7954a92d7c), ELPA Syncer, 2023/10/01
- [elpa] externals/llm 38a627409c 01/10: Beginning of a streaming option, with only llm-vertex implemented, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 0faa9e5cc7 02/10: Merge branch 'main' into streaming, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 46feff756f 03/10: Change request functionality to better handle streaming, ELPA Syncer, 2023/10/01
- [elpa] externals/llm fe064b2bc3 04/10: Create streaming method and implement it for vertex, ELPA Syncer <=
- [elpa] externals/llm 95b907c32a 06/10: Add Open AI streaming and allow for raw processing on success, ELPA Syncer, 2023/10/01
- [elpa] externals/llm c7a1e06e3e 07/10: Test streaming as well in the normal suite of tests, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 7e9b1f8c60 09/10: Add streaming to README, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 7954a92d7c 10/10: Bump version to 0.3.0, and note changes in NEWS.org, ELPA Syncer, 2023/10/01
- [elpa] externals/llm 454ec53fd3 05/10: Merge branch 'main' into streaming, ELPA Syncer, 2023/10/01
- [elpa] externals/llm a16338f31a 08/10: Add streaming to llm-fake, ELPA Syncer, 2023/10/01

Prev by Date: [elpa] externals/llm 46feff756f 03/10: Change request functionality to better handle streaming
Next by Date: [elpa] externals/llm 95b907c32a 06/10: Add Open AI streaming and allow for raw processing on success
Previous by thread: [elpa] externals/llm 46feff756f 03/10: Change request functionality to better handle streaming
Next by thread: [elpa] externals/llm 95b907c32a 06/10: Add Open AI streaming and allow for raw processing on success
Index(es):
- Date
- Thread