emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] Changes to emacs/lisp/gnus/nnweb.el [gnus-5_10-branch]


From: Andreas Schwab
Subject: [Emacs-diffs] Changes to emacs/lisp/gnus/nnweb.el [gnus-5_10-branch]
Date: Thu, 22 Jul 2004 13:12:00 -0400

Index: emacs/lisp/gnus/nnweb.el
diff -c /dev/null emacs/lisp/gnus/nnweb.el:1.11.2.1
*** /dev/null   Thu Jul 22 16:47:20 2004
--- emacs/lisp/gnus/nnweb.el    Thu Jul 22 16:45:51 2004
***************
*** 0 ****
--- 1,591 ----
+ ;;; nnweb.el --- retrieving articles via web search engines
+ ;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
+ ;;        Free Software Foundation, Inc.
+ 
+ ;; Author: Lars Magne Ingebrigtsen <address@hidden>
+ ;; Keywords: news
+ 
+ ;; This file is part of GNU Emacs.
+ 
+ ;; GNU Emacs is free software; you can redistribute it and/or modify
+ ;; it under the terms of the GNU General Public License as published by
+ ;; the Free Software Foundation; either version 2, or (at your option)
+ ;; any later version.
+ 
+ ;; GNU Emacs is distributed in the hope that it will be useful,
+ ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ ;; GNU General Public License for more details.
+ 
+ ;; You should have received a copy of the GNU General Public License
+ ;; along with GNU Emacs; see the file COPYING.  If not, write to the
+ ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ ;; Boston, MA 02111-1307, USA.
+ 
+ ;;; Commentary:
+ 
+ ;; Note: You need to have `w3' installed for some functions to work.
+ 
+ ;;; Code:
+ 
+ (eval-when-compile (require 'cl))
+ 
+ (require 'nnoo)
+ (require 'message)
+ (require 'gnus-util)
+ (require 'gnus)
+ (require 'nnmail)
+ (require 'mm-util)
+ (require 'mm-url)
+ (eval-and-compile
+   (ignore-errors
+     (require 'url)))
+ (autoload 'w3-parse-buffer "w3-parse")
+ 
+ (nnoo-declare nnweb)
+ 
+ (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/")
+   "Where nnweb will save its files.")
+ 
+ (defvoo nnweb-type 'google
+   "What search engine type is being used.
+ Valid types include `google', `dejanews', and `gmane'.")
+ 
+ (defvar nnweb-type-definition
+   '((google
+      (article . ignore)
+      (id . "http://groups.google.com/groups?selm=%s&output=gplain";)
+      (reference . identity)
+      (map . nnweb-google-create-mapping)
+      (search . nnweb-google-search)
+      (address . "http://groups.google.com/groups";)
+      (identifier . nnweb-google-identity))
+     (dejanews ;; alias of google
+      (article . ignore)
+      (id . "http://groups.google.com/groups?selm=%s&output=gplain";)
+      (reference . identity)
+      (map . nnweb-google-create-mapping)
+      (search . nnweb-google-search)
+      (address . "http://groups.google.com/groups";)
+      (identifier . nnweb-google-identity))
+     (gmane
+      (article . nnweb-gmane-wash-article)
+      (id . "http://gmane.org/view.php?group=%s";)
+      (reference . identity)
+      (map . nnweb-gmane-create-mapping)
+      (search . nnweb-gmane-search)
+      (address . "http://gmane.org/";)
+      (identifier . nnweb-gmane-identity)))
+   "Type-definition alist.")
+ 
+ (defvoo nnweb-search nil
+   "Search string to feed to Google.")
+ 
+ (defvoo nnweb-max-hits 999
+   "Maximum number of hits to display.")
+ 
+ (defvoo nnweb-ephemeral-p nil
+   "Whether this nnweb server is ephemeral.")
+ 
+ ;;; Internal variables
+ 
+ (defvoo nnweb-articles nil)
+ (defvoo nnweb-buffer nil)
+ (defvoo nnweb-group-alist nil)
+ (defvoo nnweb-group nil)
+ (defvoo nnweb-hashtb nil)
+ 
+ ;;; Interface functions
+ 
+ (nnoo-define-basics nnweb)
+ 
+ (deffoo nnweb-retrieve-headers (articles &optional group server fetch-old)
+   (nnweb-possibly-change-server group server)
+   (save-excursion
+     (set-buffer nntp-server-buffer)
+     (erase-buffer)
+     (let (article header)
+       (mm-with-unibyte-current-buffer
+       (while (setq article (pop articles))
+         (when (setq header (cadr (assq article nnweb-articles)))
+           (nnheader-insert-nov header))))
+       'nov)))
+ 
+ (deffoo nnweb-request-scan (&optional group server)
+   (nnweb-possibly-change-server group server)
+   (if nnweb-ephemeral-p
+       (setq nnweb-hashtb (gnus-make-hashtable 4095)))
+   (funcall (nnweb-definition 'map))
+   (unless nnweb-ephemeral-p
+     (nnweb-write-active)
+     (nnweb-write-overview group)))
+ 
+ (deffoo nnweb-request-group (group &optional server dont-check)
+   (nnweb-possibly-change-server nil server)
+   (when (and group
+            (not (equal group nnweb-group))
+            (not nnweb-ephemeral-p))
+     (setq nnweb-group group
+         nnweb-articles nil)
+     (let ((info (assoc group nnweb-group-alist)))
+       (when info
+       (setq nnweb-type (nth 2 info))
+       (setq nnweb-search (nth 3 info))
+       (unless dont-check
+         (nnweb-read-overview group)))))
+   (cond
+    ((not nnweb-articles)
+     (nnheader-report 'nnweb "No matching articles"))
+    (t
+     (let ((active (if nnweb-ephemeral-p
+                     (cons (caar nnweb-articles)
+                           (caar (last nnweb-articles)))
+                   (cadr (assoc group nnweb-group-alist)))))
+       (nnheader-report 'nnweb "Opened group %s" group)
+       (nnheader-insert
+        "211 %d %d %d %s\n" (length nnweb-articles)
+        (car active) (cdr active) group)))))
+ 
+ (deffoo nnweb-close-group (group &optional server)
+   (nnweb-possibly-change-server group server)
+   (when (gnus-buffer-live-p nnweb-buffer)
+     (save-excursion
+       (set-buffer nnweb-buffer)
+       (set-buffer-modified-p nil)
+       (kill-buffer nnweb-buffer)))
+   t)
+ 
+ (deffoo nnweb-request-article (article &optional group server buffer)
+   (nnweb-possibly-change-server group server)
+   (save-excursion
+     (set-buffer (or buffer nntp-server-buffer))
+     (let* ((header (cadr (assq article nnweb-articles)))
+          (url (and header (mail-header-xref header))))
+       (when (or (and url
+                    (mm-with-unibyte-current-buffer
+                      (mm-url-insert url)))
+               (and (stringp article)
+                    (nnweb-definition 'id t)
+                    (let ((fetch (nnweb-definition 'id))
+                          art active)
+                      (when (string-match "^<\\(.*\\)>$" article)
+                        (setq art (match-string 1 article)))
+                      (when (and fetch art)
+                        (setq url (format fetch art))
+                        (mm-with-unibyte-current-buffer
+                          (mm-url-insert url))
+                        (if (nnweb-definition 'reference t)
+                            (setq article
+                                  (funcall (nnweb-definition
+                                            'reference) article)))))))
+       (unless nnheader-callback-function
+         (funcall (nnweb-definition 'article)))
+       (nnheader-report 'nnweb "Fetched article %s" article)
+       (cons group (and (numberp article) article))))))
+ 
+ (deffoo nnweb-close-server (&optional server)
+   (when (and (nnweb-server-opened server)
+            (gnus-buffer-live-p nnweb-buffer))
+     (save-excursion
+       (set-buffer nnweb-buffer)
+       (set-buffer-modified-p nil)
+       (kill-buffer nnweb-buffer)))
+   (nnoo-close-server 'nnweb server))
+ 
+ (deffoo nnweb-request-list (&optional server)
+   (nnweb-possibly-change-server nil server)
+   (save-excursion
+     (set-buffer nntp-server-buffer)
+     (nnmail-generate-active nnweb-group-alist)
+     t))
+ 
+ (deffoo nnweb-request-update-info (group info &optional server)
+   (nnweb-possibly-change-server group server))
+ 
+ (deffoo nnweb-asynchronous-p ()
+   nil)
+ 
+ (deffoo nnweb-request-create-group (group &optional server args)
+   (nnweb-possibly-change-server nil server)
+   (nnweb-request-delete-group group)
+   (push `(,group ,(cons 1 0) ,@args) nnweb-group-alist)
+   (nnweb-write-active)
+   t)
+ 
+ (deffoo nnweb-request-delete-group (group &optional force server)
+   (nnweb-possibly-change-server group server)
+   (gnus-pull group nnweb-group-alist t)
+   (nnweb-write-active)
+   (gnus-delete-file (nnweb-overview-file group))
+   t)
+ 
+ (nnoo-define-skeleton nnweb)
+ 
+ ;;; Internal functions
+ 
+ (defun nnweb-read-overview (group)
+   "Read the overview of GROUP and build the map."
+   (when (file-exists-p (nnweb-overview-file group))
+     (mm-with-unibyte-buffer
+       (nnheader-insert-file-contents (nnweb-overview-file group))
+       (goto-char (point-min))
+       (let (header)
+       (while (not (eobp))
+         (setq header (nnheader-parse-nov))
+         (forward-line 1)
+         (push (list (mail-header-number header)
+                     header (mail-header-xref header))
+               nnweb-articles)
+         (nnweb-set-hashtb header (car nnweb-articles)))))))
+ 
+ (defun nnweb-write-overview (group)
+   "Write the overview file for GROUP."
+   (with-temp-file (nnweb-overview-file group)
+     (let ((articles nnweb-articles))
+       (while articles
+       (nnheader-insert-nov (cadr (pop articles)))))))
+ 
+ (defun nnweb-set-hashtb (header data)
+   (gnus-sethash (nnweb-identifier (mail-header-xref header))
+               data nnweb-hashtb))
+ 
+ (defun nnweb-get-hashtb (url)
+   (gnus-gethash (nnweb-identifier url) nnweb-hashtb))
+ 
+ (defun nnweb-identifier (ident)
+   (funcall (nnweb-definition 'identifier) ident))
+ 
+ (defun nnweb-overview-file (group)
+   "Return the name of the overview file of GROUP."
+   (nnheader-concat nnweb-directory group ".overview"))
+ 
+ (defun nnweb-write-active ()
+   "Save the active file."
+   (gnus-make-directory nnweb-directory)
+   (with-temp-file (nnheader-concat nnweb-directory "active")
+     (prin1 `(setq nnweb-group-alist ',nnweb-group-alist) (current-buffer))))
+ 
+ (defun nnweb-read-active ()
+   "Read the active file."
+   (load (nnheader-concat nnweb-directory "active") t t t))
+ 
+ (defun nnweb-definition (type &optional noerror)
+   "Return the definition of TYPE."
+   (let ((def (cdr (assq type (assq nnweb-type nnweb-type-definition)))))
+     (when (and (not def)
+              (not noerror))
+       (error "Undefined definition %s" type))
+     def))
+ 
+ (defun nnweb-possibly-change-server (&optional group server)
+   (nnweb-init server)
+   (when server
+     (unless (nnweb-server-opened server)
+       (nnweb-open-server server)))
+   (unless nnweb-group-alist
+     (nnweb-read-active))
+   (unless nnweb-hashtb
+     (setq nnweb-hashtb (gnus-make-hashtable 4095)))
+   (when group
+     (when (and (not nnweb-ephemeral-p)
+              (equal group nnweb-group))
+       (nnweb-request-group group nil t))))
+ 
+ (defun nnweb-init (server)
+   "Initialize buffers and such."
+   (unless (gnus-buffer-live-p nnweb-buffer)
+     (setq nnweb-buffer
+         (save-excursion
+           (mm-with-unibyte
+             (nnheader-set-temp-buffer
+              (format " *nnweb %s %s %s*"
+                      nnweb-type nnweb-search server))
+             (current-buffer))))))
+ 
+ ;;;
+ ;;; Deja bought by google.com
+ ;;;
+ 
+ (defun nnweb-google-wash-article ()
+   (let ((case-fold-search t) url)
+     (goto-char (point-min))
+     (re-search-forward "^<pre>" nil t)
+     (narrow-to-region (point-min) (point))
+     (search-backward "<table " nil t 2)
+     (delete-region (point-min) (point))
+     (if (re-search-forward "Search Result [0-9]+" nil t)
+       (replace-match ""))
+     (if (re-search-forward "View complete thread ([0-9]+ articles?)" nil t)
+       (replace-match ""))
+     (goto-char (point-min))
+     (while (search-forward "<br>" nil t)
+       (replace-match "\n"))
+     (mm-url-remove-markup)
+     (goto-char (point-min))
+     (while (re-search-forward "^[ \t]*\n" nil t)
+       (replace-match ""))
+     (goto-char (point-max))
+     (insert "\n")
+     (widen)
+     (narrow-to-region (point) (point-max))
+     (search-forward "</pre>" nil t)
+     (delete-region (point) (point-max))
+     (mm-url-remove-markup)
+     (widen)))
+ 
+ (defun nnweb-google-parse-1 (&optional Message-ID)
+   (let ((i 0)
+       (case-fold-search t)
+       (active (cadr (assoc nnweb-group nnweb-group-alist)))
+       Subject Score Date Newsgroups From
+       map url mid)
+     (unless active
+       (push (list nnweb-group (setq active (cons 1 0))
+                 nnweb-type nnweb-search)
+           nnweb-group-alist))
+     ;; Go through all the article hits on this page.
+     (goto-char (point-min))
+     (while (re-search-forward
+           "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t)
+       (setq mid (match-string 2)
+           url (format
+                "http://groups.google.com/groups?selm=%s&output=gplain"; mid))
+       (narrow-to-region (search-forward ">" nil t)
+                       (search-forward "</a>" nil t))
+       (mm-url-remove-markup)
+       (mm-url-decode-entities)
+       (setq Subject (buffer-string))
+       (goto-char (point-max))
+       (widen)
+       (forward-line 2)
+       (when (looking-at "<br><font[^>]+>")
+       (goto-char (match-end 0)))
+       (if (not (looking-at "<a[^>]+>"))
+         (skip-chars-forward " \t")
+       (narrow-to-region (point)
+                         (search-forward "</a>" nil t))
+       (mm-url-remove-markup)
+       (mm-url-decode-entities)
+       (setq Newsgroups (buffer-string))
+       (goto-char (point-max))
+       (widen)
+       (skip-chars-forward "- \t"))
+       (when (looking-at
+            "\\([0-9]+\\)[/ ]\\([A-Za-z]+\\)[/ ]\\([0-9]+\\)[ \t]*by[ 
\t]*\\([^<]*\\) - <a")
+       (setq From (match-string 4)
+             Date (format "%s %s 00:00:00 %s"
+                          (match-string 2) (match-string 1)
+                          (match-string 3))))
+       (forward-line 1)
+       (incf i)
+       (unless (nnweb-get-hashtb url)
+       (push
+        (list
+         (incf (cdr active))
+         (make-full-mail-header
+          (cdr active) (if Newsgroups
+                           (concat  "(" Newsgroups ") " Subject)
+                         Subject)
+          From Date (or Message-ID mid)
+          nil 0 0 url))
+        map)
+       (nnweb-set-hashtb (cadar map) (car map))))
+     map))
+ 
+ (defun nnweb-google-reference (id)
+   (let ((map (nnweb-google-parse-1 id)) header)
+     (setq nnweb-articles
+         (nconc nnweb-articles map))
+     (when (setq header (cadar map))
+       (mm-with-unibyte-current-buffer
+       (mm-url-insert (mail-header-xref header)))
+       (caar map))))
+ 
+ (defun nnweb-google-create-mapping ()
+   "Perform the search and create a number-to-url alist."
+   (save-excursion
+     (set-buffer nnweb-buffer)
+     (erase-buffer)
+     (when (funcall (nnweb-definition 'search) nnweb-search)
+       (let ((more t)
+             (i 0))
+         (while more
+           (setq nnweb-articles
+                 (nconc nnweb-articles (nnweb-google-parse-1)))
+           ;; Check if there are more articles to fetch
+           (goto-char (point-min))
+           (incf i 100)
+           (if (or (not (re-search-forward
+                         "<td nowrap><a href=\\([^>]+\\).*<span 
class=b>Next</span>" nil t))
+                   (>= i nnweb-max-hits))
+               (setq more nil)
+             ;; Yup, there are more articles
+             (setq more (concat "http://groups.google.com"; (match-string 1)))
+           (when more
+             (erase-buffer)
+             (mm-url-insert more))))
+         ;; Return the articles in the right order.
+         (setq nnweb-articles
+               (sort nnweb-articles 'car-less-than-car))))))
+ 
+ (defun nnweb-google-search (search)
+   (mm-url-insert
+    (concat
+     (nnweb-definition 'address)
+     "?"
+     (mm-url-encode-www-form-urlencoded
+      `(("q" . ,search)
+        ("num". "100")
+        ("hq" . "")
+        ("hl" . "")
+        ("lr" . "")
+        ("safe" . "off")
+        ("sites" . "groups")))))
+   t)
+ 
+ (defun nnweb-google-identity (url)
+   "Return an unique identifier based on URL."
+   (if (string-match "selm=\\([^ &>]+\\)" url)
+       (match-string 1 url)
+     url))
+ 
+ ;;;
+ ;;; gmane.org
+ ;;;
+ (defun nnweb-gmane-create-mapping ()
+   "Perform the search and create a number-to-url alist."
+   (save-excursion
+     (set-buffer nnweb-buffer)
+     (erase-buffer)
+     (when (funcall (nnweb-definition 'search) nnweb-search)
+       (let ((more t)
+           (case-fold-search t)
+           (active (or (cadr (assoc nnweb-group nnweb-group-alist))
+                       (cons 1 0)))
+           subject group url
+           map)
+         ;; Remove stuff from the beginning of results
+       (goto-char (point-min))
+       (search-forward "Search Results</h1><ul>" nil t)
+       (delete-region (point-min) (point))
+       (goto-char (point-min))
+       ;; Iterate over the actual hits
+       (while (re-search-forward ".*href=\"\\([^\"]+\\)\">\\(.*\\)" nil t)
+           (setq url (concat "http://gmane.org/"; (match-string 1)))
+           (setq subject (match-string 2))
+         (unless (nnweb-get-hashtb url)
+           (push
+            (list
+             (incf (cdr active))
+             (make-full-mail-header
+              (cdr active) (concat  "(" group ") " subject) nil nil
+              nil nil 0 0 url))
+            map)
+           (nnweb-set-hashtb (cadar map) (car map))))
+       ;; Return the articles in the right order.
+       (setq nnweb-articles
+             (sort (nconc nnweb-articles map) 'car-less-than-car))))))
+ 
+ (defun nnweb-gmane-wash-article ()
+   (let ((case-fold-search t))
+     (goto-char (point-min))
+     (re-search-forward "<!--X-Head-of-Message-->" nil t)
+     (delete-region (point-min) (point))
+     (goto-char (point-min))
+     (while (looking-at "^<li><em>\\([^ ]+\\)</em>.*</li>")
+       (replace-match "\\1\\2" t)
+       (forward-line 1))
+     (mm-url-remove-markup)))
+ 
+ (defun nnweb-gmane-search (search)
+   (mm-url-insert
+    (concat
+     (nnweb-definition 'address)
+     "?"
+     (mm-url-encode-www-form-urlencoded
+      `(("query" . ,search)))))
+   (setq buffer-file-name nil)
+   t)
+ 
+ 
+ (defun nnweb-gmane-identity (url)
+   "Return a unique identifier based on URL."
+   (if (string-match "group=\\(.+\\)" url)
+       (match-string 1 url)
+     url))
+ 
+ ;;;
+ ;;; General web/w3 interface utility functions
+ ;;;
+ 
+ (defun nnweb-insert-html (parse)
+   "Insert HTML based on a w3 parse tree."
+   (if (stringp parse)
+       (insert (nnheader-string-as-multibyte parse))
+     (insert "<" (symbol-name (car parse)) " ")
+     (insert (mapconcat
+            (lambda (param)
+              (concat (symbol-name (car param)) "="
+                      (prin1-to-string
+                       (if (consp (cdr param))
+                           (cadr param)
+                         (cdr param)))))
+            (nth 1 parse)
+            " "))
+     (insert ">\n")
+     (mapcar 'nnweb-insert-html (nth 2 parse))
+     (insert "</" (symbol-name (car parse)) ">\n")))
+ 
+ (defun nnweb-parse-find (type parse &optional maxdepth)
+   "Find the element of TYPE in PARSE."
+   (catch 'found
+     (nnweb-parse-find-1 type parse maxdepth)))
+ 
+ (defun nnweb-parse-find-1 (type contents maxdepth)
+   (when (or (null maxdepth)
+           (not (zerop maxdepth)))
+     (when (consp contents)
+       (when (eq (car contents) type)
+       (throw 'found contents))
+       (when (listp (cdr contents))
+       (dolist (element contents)
+         (when (consp element)
+           (nnweb-parse-find-1 type element
+                               (and maxdepth (1- maxdepth)))))))))
+ 
+ (defun nnweb-parse-find-all (type parse)
+   "Find all elements of TYPE in PARSE."
+   (catch 'found
+     (nnweb-parse-find-all-1 type parse)))
+ 
+ (defun nnweb-parse-find-all-1 (type contents)
+   (let (result)
+     (when (consp contents)
+       (if (eq (car contents) type)
+         (push contents result)
+       (when (listp (cdr contents))
+         (dolist (element contents)
+           (when (consp element)
+             (setq result
+                   (nconc result (nnweb-parse-find-all-1 type element))))))))
+     result))
+ 
+ (defvar nnweb-text)
+ (defun nnweb-text (parse)
+   "Return a list of text contents in PARSE."
+   (let ((nnweb-text nil))
+     (nnweb-text-1 parse)
+     (nreverse nnweb-text)))
+ 
+ (defun nnweb-text-1 (contents)
+   (dolist (element contents)
+     (if (stringp element)
+       (push element nnweb-text)
+       (when (and (consp element)
+                (listp (cdr element)))
+       (nnweb-text-1 element)))))
+ 
+ (provide 'nnweb)
+ 
+ ;;; arch-tag: f59307eb-c90f-479f-b7d2-dbd8bf51b697
+ ;;; nnweb.el ends here




reply via email to

[Prev in Thread] Current Thread [Next in Thread]