[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Emacs-diffs] Changes to emacs/lisp/gnus/nnweb.el [gnus-5_10-branch]
From: |
Andreas Schwab |
Subject: |
[Emacs-diffs] Changes to emacs/lisp/gnus/nnweb.el [gnus-5_10-branch] |
Date: |
Thu, 22 Jul 2004 13:12:00 -0400 |
Index: emacs/lisp/gnus/nnweb.el
diff -c /dev/null emacs/lisp/gnus/nnweb.el:1.11.2.1
*** /dev/null Thu Jul 22 16:47:20 2004
--- emacs/lisp/gnus/nnweb.el Thu Jul 22 16:45:51 2004
***************
*** 0 ****
--- 1,591 ----
+ ;;; nnweb.el --- retrieving articles via web search engines
+ ;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
+ ;; Free Software Foundation, Inc.
+
+ ;; Author: Lars Magne Ingebrigtsen <address@hidden>
+ ;; Keywords: news
+
+ ;; This file is part of GNU Emacs.
+
+ ;; GNU Emacs is free software; you can redistribute it and/or modify
+ ;; it under the terms of the GNU General Public License as published by
+ ;; the Free Software Foundation; either version 2, or (at your option)
+ ;; any later version.
+
+ ;; GNU Emacs is distributed in the hope that it will be useful,
+ ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ;; GNU General Public License for more details.
+
+ ;; You should have received a copy of the GNU General Public License
+ ;; along with GNU Emacs; see the file COPYING. If not, write to the
+ ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ ;; Boston, MA 02111-1307, USA.
+
+ ;;; Commentary:
+
+ ;; Note: You need to have `w3' installed for some functions to work.
+
+ ;;; Code:
+
+ (eval-when-compile (require 'cl))
+
+ (require 'nnoo)
+ (require 'message)
+ (require 'gnus-util)
+ (require 'gnus)
+ (require 'nnmail)
+ (require 'mm-util)
+ (require 'mm-url)
+ (eval-and-compile
+ (ignore-errors
+ (require 'url)))
+ (autoload 'w3-parse-buffer "w3-parse")
+
+ (nnoo-declare nnweb)
+
+ (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/")
+ "Where nnweb will save its files.")
+
+ (defvoo nnweb-type 'google
+ "What search engine type is being used.
+ Valid types include `google', `dejanews', and `gmane'.")
+
+ (defvar nnweb-type-definition
+ '((google
+ (article . ignore)
+ (id . "http://groups.google.com/groups?selm=%s&output=gplain")
+ (reference . identity)
+ (map . nnweb-google-create-mapping)
+ (search . nnweb-google-search)
+ (address . "http://groups.google.com/groups")
+ (identifier . nnweb-google-identity))
+ (dejanews ;; alias of google
+ (article . ignore)
+ (id . "http://groups.google.com/groups?selm=%s&output=gplain")
+ (reference . identity)
+ (map . nnweb-google-create-mapping)
+ (search . nnweb-google-search)
+ (address . "http://groups.google.com/groups")
+ (identifier . nnweb-google-identity))
+ (gmane
+ (article . nnweb-gmane-wash-article)
+ (id . "http://gmane.org/view.php?group=%s")
+ (reference . identity)
+ (map . nnweb-gmane-create-mapping)
+ (search . nnweb-gmane-search)
+ (address . "http://gmane.org/")
+ (identifier . nnweb-gmane-identity)))
+ "Type-definition alist.")
+
+ (defvoo nnweb-search nil
+ "Search string to feed to Google.")
+
+ (defvoo nnweb-max-hits 999
+ "Maximum number of hits to display.")
+
+ (defvoo nnweb-ephemeral-p nil
+ "Whether this nnweb server is ephemeral.")
+
+ ;;; Internal variables
+
+ (defvoo nnweb-articles nil)
+ (defvoo nnweb-buffer nil)
+ (defvoo nnweb-group-alist nil)
+ (defvoo nnweb-group nil)
+ (defvoo nnweb-hashtb nil)
+
+ ;;; Interface functions
+
+ (nnoo-define-basics nnweb)
+
+ (deffoo nnweb-retrieve-headers (articles &optional group server fetch-old)
+ (nnweb-possibly-change-server group server)
+ (save-excursion
+ (set-buffer nntp-server-buffer)
+ (erase-buffer)
+ (let (article header)
+ (mm-with-unibyte-current-buffer
+ (while (setq article (pop articles))
+ (when (setq header (cadr (assq article nnweb-articles)))
+ (nnheader-insert-nov header))))
+ 'nov)))
+
+ (deffoo nnweb-request-scan (&optional group server)
+ (nnweb-possibly-change-server group server)
+ (if nnweb-ephemeral-p
+ (setq nnweb-hashtb (gnus-make-hashtable 4095)))
+ (funcall (nnweb-definition 'map))
+ (unless nnweb-ephemeral-p
+ (nnweb-write-active)
+ (nnweb-write-overview group)))
+
+ (deffoo nnweb-request-group (group &optional server dont-check)
+ (nnweb-possibly-change-server nil server)
+ (when (and group
+ (not (equal group nnweb-group))
+ (not nnweb-ephemeral-p))
+ (setq nnweb-group group
+ nnweb-articles nil)
+ (let ((info (assoc group nnweb-group-alist)))
+ (when info
+ (setq nnweb-type (nth 2 info))
+ (setq nnweb-search (nth 3 info))
+ (unless dont-check
+ (nnweb-read-overview group)))))
+ (cond
+ ((not nnweb-articles)
+ (nnheader-report 'nnweb "No matching articles"))
+ (t
+ (let ((active (if nnweb-ephemeral-p
+ (cons (caar nnweb-articles)
+ (caar (last nnweb-articles)))
+ (cadr (assoc group nnweb-group-alist)))))
+ (nnheader-report 'nnweb "Opened group %s" group)
+ (nnheader-insert
+ "211 %d %d %d %s\n" (length nnweb-articles)
+ (car active) (cdr active) group)))))
+
+ (deffoo nnweb-close-group (group &optional server)
+ (nnweb-possibly-change-server group server)
+ (when (gnus-buffer-live-p nnweb-buffer)
+ (save-excursion
+ (set-buffer nnweb-buffer)
+ (set-buffer-modified-p nil)
+ (kill-buffer nnweb-buffer)))
+ t)
+
+ (deffoo nnweb-request-article (article &optional group server buffer)
+ (nnweb-possibly-change-server group server)
+ (save-excursion
+ (set-buffer (or buffer nntp-server-buffer))
+ (let* ((header (cadr (assq article nnweb-articles)))
+ (url (and header (mail-header-xref header))))
+ (when (or (and url
+ (mm-with-unibyte-current-buffer
+ (mm-url-insert url)))
+ (and (stringp article)
+ (nnweb-definition 'id t)
+ (let ((fetch (nnweb-definition 'id))
+ art active)
+ (when (string-match "^<\\(.*\\)>$" article)
+ (setq art (match-string 1 article)))
+ (when (and fetch art)
+ (setq url (format fetch art))
+ (mm-with-unibyte-current-buffer
+ (mm-url-insert url))
+ (if (nnweb-definition 'reference t)
+ (setq article
+ (funcall (nnweb-definition
+ 'reference) article)))))))
+ (unless nnheader-callback-function
+ (funcall (nnweb-definition 'article)))
+ (nnheader-report 'nnweb "Fetched article %s" article)
+ (cons group (and (numberp article) article))))))
+
+ (deffoo nnweb-close-server (&optional server)
+ (when (and (nnweb-server-opened server)
+ (gnus-buffer-live-p nnweb-buffer))
+ (save-excursion
+ (set-buffer nnweb-buffer)
+ (set-buffer-modified-p nil)
+ (kill-buffer nnweb-buffer)))
+ (nnoo-close-server 'nnweb server))
+
+ (deffoo nnweb-request-list (&optional server)
+ (nnweb-possibly-change-server nil server)
+ (save-excursion
+ (set-buffer nntp-server-buffer)
+ (nnmail-generate-active nnweb-group-alist)
+ t))
+
+ (deffoo nnweb-request-update-info (group info &optional server)
+ (nnweb-possibly-change-server group server))
+
+ (deffoo nnweb-asynchronous-p ()
+ nil)
+
+ (deffoo nnweb-request-create-group (group &optional server args)
+ (nnweb-possibly-change-server nil server)
+ (nnweb-request-delete-group group)
+ (push `(,group ,(cons 1 0) ,@args) nnweb-group-alist)
+ (nnweb-write-active)
+ t)
+
+ (deffoo nnweb-request-delete-group (group &optional force server)
+ (nnweb-possibly-change-server group server)
+ (gnus-pull group nnweb-group-alist t)
+ (nnweb-write-active)
+ (gnus-delete-file (nnweb-overview-file group))
+ t)
+
+ (nnoo-define-skeleton nnweb)
+
+ ;;; Internal functions
+
+ (defun nnweb-read-overview (group)
+ "Read the overview of GROUP and build the map."
+ (when (file-exists-p (nnweb-overview-file group))
+ (mm-with-unibyte-buffer
+ (nnheader-insert-file-contents (nnweb-overview-file group))
+ (goto-char (point-min))
+ (let (header)
+ (while (not (eobp))
+ (setq header (nnheader-parse-nov))
+ (forward-line 1)
+ (push (list (mail-header-number header)
+ header (mail-header-xref header))
+ nnweb-articles)
+ (nnweb-set-hashtb header (car nnweb-articles)))))))
+
+ (defun nnweb-write-overview (group)
+ "Write the overview file for GROUP."
+ (with-temp-file (nnweb-overview-file group)
+ (let ((articles nnweb-articles))
+ (while articles
+ (nnheader-insert-nov (cadr (pop articles)))))))
+
+ (defun nnweb-set-hashtb (header data)
+ (gnus-sethash (nnweb-identifier (mail-header-xref header))
+ data nnweb-hashtb))
+
+ (defun nnweb-get-hashtb (url)
+ (gnus-gethash (nnweb-identifier url) nnweb-hashtb))
+
+ (defun nnweb-identifier (ident)
+ (funcall (nnweb-definition 'identifier) ident))
+
+ (defun nnweb-overview-file (group)
+ "Return the name of the overview file of GROUP."
+ (nnheader-concat nnweb-directory group ".overview"))
+
+ (defun nnweb-write-active ()
+ "Save the active file."
+ (gnus-make-directory nnweb-directory)
+ (with-temp-file (nnheader-concat nnweb-directory "active")
+ (prin1 `(setq nnweb-group-alist ',nnweb-group-alist) (current-buffer))))
+
+ (defun nnweb-read-active ()
+ "Read the active file."
+ (load (nnheader-concat nnweb-directory "active") t t t))
+
+ (defun nnweb-definition (type &optional noerror)
+ "Return the definition of TYPE."
+ (let ((def (cdr (assq type (assq nnweb-type nnweb-type-definition)))))
+ (when (and (not def)
+ (not noerror))
+ (error "Undefined definition %s" type))
+ def))
+
+ (defun nnweb-possibly-change-server (&optional group server)
+ (nnweb-init server)
+ (when server
+ (unless (nnweb-server-opened server)
+ (nnweb-open-server server)))
+ (unless nnweb-group-alist
+ (nnweb-read-active))
+ (unless nnweb-hashtb
+ (setq nnweb-hashtb (gnus-make-hashtable 4095)))
+ (when group
+ (when (and (not nnweb-ephemeral-p)
+ (equal group nnweb-group))
+ (nnweb-request-group group nil t))))
+
+ (defun nnweb-init (server)
+ "Initialize buffers and such."
+ (unless (gnus-buffer-live-p nnweb-buffer)
+ (setq nnweb-buffer
+ (save-excursion
+ (mm-with-unibyte
+ (nnheader-set-temp-buffer
+ (format " *nnweb %s %s %s*"
+ nnweb-type nnweb-search server))
+ (current-buffer))))))
+
+ ;;;
+ ;;; Deja bought by google.com
+ ;;;
+
+ (defun nnweb-google-wash-article ()
+ (let ((case-fold-search t) url)
+ (goto-char (point-min))
+ (re-search-forward "^<pre>" nil t)
+ (narrow-to-region (point-min) (point))
+ (search-backward "<table " nil t 2)
+ (delete-region (point-min) (point))
+ (if (re-search-forward "Search Result [0-9]+" nil t)
+ (replace-match ""))
+ (if (re-search-forward "View complete thread ([0-9]+ articles?)" nil t)
+ (replace-match ""))
+ (goto-char (point-min))
+ (while (search-forward "<br>" nil t)
+ (replace-match "\n"))
+ (mm-url-remove-markup)
+ (goto-char (point-min))
+ (while (re-search-forward "^[ \t]*\n" nil t)
+ (replace-match ""))
+ (goto-char (point-max))
+ (insert "\n")
+ (widen)
+ (narrow-to-region (point) (point-max))
+ (search-forward "</pre>" nil t)
+ (delete-region (point) (point-max))
+ (mm-url-remove-markup)
+ (widen)))
+
+ (defun nnweb-google-parse-1 (&optional Message-ID)
+ (let ((i 0)
+ (case-fold-search t)
+ (active (cadr (assoc nnweb-group nnweb-group-alist)))
+ Subject Score Date Newsgroups From
+ map url mid)
+ (unless active
+ (push (list nnweb-group (setq active (cons 1 0))
+ nnweb-type nnweb-search)
+ nnweb-group-alist))
+ ;; Go through all the article hits on this page.
+ (goto-char (point-min))
+ (while (re-search-forward
+ "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t)
+ (setq mid (match-string 2)
+ url (format
+ "http://groups.google.com/groups?selm=%s&output=gplain" mid))
+ (narrow-to-region (search-forward ">" nil t)
+ (search-forward "</a>" nil t))
+ (mm-url-remove-markup)
+ (mm-url-decode-entities)
+ (setq Subject (buffer-string))
+ (goto-char (point-max))
+ (widen)
+ (forward-line 2)
+ (when (looking-at "<br><font[^>]+>")
+ (goto-char (match-end 0)))
+ (if (not (looking-at "<a[^>]+>"))
+ (skip-chars-forward " \t")
+ (narrow-to-region (point)
+ (search-forward "</a>" nil t))
+ (mm-url-remove-markup)
+ (mm-url-decode-entities)
+ (setq Newsgroups (buffer-string))
+ (goto-char (point-max))
+ (widen)
+ (skip-chars-forward "- \t"))
+ (when (looking-at
+ "\\([0-9]+\\)[/ ]\\([A-Za-z]+\\)[/ ]\\([0-9]+\\)[ \t]*by[
\t]*\\([^<]*\\) - <a")
+ (setq From (match-string 4)
+ Date (format "%s %s 00:00:00 %s"
+ (match-string 2) (match-string 1)
+ (match-string 3))))
+ (forward-line 1)
+ (incf i)
+ (unless (nnweb-get-hashtb url)
+ (push
+ (list
+ (incf (cdr active))
+ (make-full-mail-header
+ (cdr active) (if Newsgroups
+ (concat "(" Newsgroups ") " Subject)
+ Subject)
+ From Date (or Message-ID mid)
+ nil 0 0 url))
+ map)
+ (nnweb-set-hashtb (cadar map) (car map))))
+ map))
+
+ (defun nnweb-google-reference (id)
+ (let ((map (nnweb-google-parse-1 id)) header)
+ (setq nnweb-articles
+ (nconc nnweb-articles map))
+ (when (setq header (cadar map))
+ (mm-with-unibyte-current-buffer
+ (mm-url-insert (mail-header-xref header)))
+ (caar map))))
+
+ (defun nnweb-google-create-mapping ()
+ "Perform the search and create a number-to-url alist."
+ (save-excursion
+ (set-buffer nnweb-buffer)
+ (erase-buffer)
+ (when (funcall (nnweb-definition 'search) nnweb-search)
+ (let ((more t)
+ (i 0))
+ (while more
+ (setq nnweb-articles
+ (nconc nnweb-articles (nnweb-google-parse-1)))
+ ;; Check if there are more articles to fetch
+ (goto-char (point-min))
+ (incf i 100)
+ (if (or (not (re-search-forward
+ "<td nowrap><a href=\\([^>]+\\).*<span
class=b>Next</span>" nil t))
+ (>= i nnweb-max-hits))
+ (setq more nil)
+ ;; Yup, there are more articles
+ (setq more (concat "http://groups.google.com" (match-string 1)))
+ (when more
+ (erase-buffer)
+ (mm-url-insert more))))
+ ;; Return the articles in the right order.
+ (setq nnweb-articles
+ (sort nnweb-articles 'car-less-than-car))))))
+
+ (defun nnweb-google-search (search)
+ (mm-url-insert
+ (concat
+ (nnweb-definition 'address)
+ "?"
+ (mm-url-encode-www-form-urlencoded
+ `(("q" . ,search)
+ ("num". "100")
+ ("hq" . "")
+ ("hl" . "")
+ ("lr" . "")
+ ("safe" . "off")
+ ("sites" . "groups")))))
+ t)
+
+ (defun nnweb-google-identity (url)
+ "Return an unique identifier based on URL."
+ (if (string-match "selm=\\([^ &>]+\\)" url)
+ (match-string 1 url)
+ url))
+
+ ;;;
+ ;;; gmane.org
+ ;;;
+ (defun nnweb-gmane-create-mapping ()
+ "Perform the search and create a number-to-url alist."
+ (save-excursion
+ (set-buffer nnweb-buffer)
+ (erase-buffer)
+ (when (funcall (nnweb-definition 'search) nnweb-search)
+ (let ((more t)
+ (case-fold-search t)
+ (active (or (cadr (assoc nnweb-group nnweb-group-alist))
+ (cons 1 0)))
+ subject group url
+ map)
+ ;; Remove stuff from the beginning of results
+ (goto-char (point-min))
+ (search-forward "Search Results</h1><ul>" nil t)
+ (delete-region (point-min) (point))
+ (goto-char (point-min))
+ ;; Iterate over the actual hits
+ (while (re-search-forward ".*href=\"\\([^\"]+\\)\">\\(.*\\)" nil t)
+ (setq url (concat "http://gmane.org/" (match-string 1)))
+ (setq subject (match-string 2))
+ (unless (nnweb-get-hashtb url)
+ (push
+ (list
+ (incf (cdr active))
+ (make-full-mail-header
+ (cdr active) (concat "(" group ") " subject) nil nil
+ nil nil 0 0 url))
+ map)
+ (nnweb-set-hashtb (cadar map) (car map))))
+ ;; Return the articles in the right order.
+ (setq nnweb-articles
+ (sort (nconc nnweb-articles map) 'car-less-than-car))))))
+
+ (defun nnweb-gmane-wash-article ()
+ (let ((case-fold-search t))
+ (goto-char (point-min))
+ (re-search-forward "<!--X-Head-of-Message-->" nil t)
+ (delete-region (point-min) (point))
+ (goto-char (point-min))
+ (while (looking-at "^<li><em>\\([^ ]+\\)</em>.*</li>")
+ (replace-match "\\1\\2" t)
+ (forward-line 1))
+ (mm-url-remove-markup)))
+
+ (defun nnweb-gmane-search (search)
+ (mm-url-insert
+ (concat
+ (nnweb-definition 'address)
+ "?"
+ (mm-url-encode-www-form-urlencoded
+ `(("query" . ,search)))))
+ (setq buffer-file-name nil)
+ t)
+
+
+ (defun nnweb-gmane-identity (url)
+ "Return a unique identifier based on URL."
+ (if (string-match "group=\\(.+\\)" url)
+ (match-string 1 url)
+ url))
+
+ ;;;
+ ;;; General web/w3 interface utility functions
+ ;;;
+
+ (defun nnweb-insert-html (parse)
+ "Insert HTML based on a w3 parse tree."
+ (if (stringp parse)
+ (insert (nnheader-string-as-multibyte parse))
+ (insert "<" (symbol-name (car parse)) " ")
+ (insert (mapconcat
+ (lambda (param)
+ (concat (symbol-name (car param)) "="
+ (prin1-to-string
+ (if (consp (cdr param))
+ (cadr param)
+ (cdr param)))))
+ (nth 1 parse)
+ " "))
+ (insert ">\n")
+ (mapcar 'nnweb-insert-html (nth 2 parse))
+ (insert "</" (symbol-name (car parse)) ">\n")))
+
+ (defun nnweb-parse-find (type parse &optional maxdepth)
+ "Find the element of TYPE in PARSE."
+ (catch 'found
+ (nnweb-parse-find-1 type parse maxdepth)))
+
+ (defun nnweb-parse-find-1 (type contents maxdepth)
+ (when (or (null maxdepth)
+ (not (zerop maxdepth)))
+ (when (consp contents)
+ (when (eq (car contents) type)
+ (throw 'found contents))
+ (when (listp (cdr contents))
+ (dolist (element contents)
+ (when (consp element)
+ (nnweb-parse-find-1 type element
+ (and maxdepth (1- maxdepth)))))))))
+
+ (defun nnweb-parse-find-all (type parse)
+ "Find all elements of TYPE in PARSE."
+ (catch 'found
+ (nnweb-parse-find-all-1 type parse)))
+
+ (defun nnweb-parse-find-all-1 (type contents)
+ (let (result)
+ (when (consp contents)
+ (if (eq (car contents) type)
+ (push contents result)
+ (when (listp (cdr contents))
+ (dolist (element contents)
+ (when (consp element)
+ (setq result
+ (nconc result (nnweb-parse-find-all-1 type element))))))))
+ result))
+
+ (defvar nnweb-text)
+ (defun nnweb-text (parse)
+ "Return a list of text contents in PARSE."
+ (let ((nnweb-text nil))
+ (nnweb-text-1 parse)
+ (nreverse nnweb-text)))
+
+ (defun nnweb-text-1 (contents)
+ (dolist (element contents)
+ (if (stringp element)
+ (push element nnweb-text)
+ (when (and (consp element)
+ (listp (cdr element)))
+ (nnweb-text-1 element)))))
+
+ (provide 'nnweb)
+
+ ;;; arch-tag: f59307eb-c90f-479f-b7d2-dbd8bf51b697
+ ;;; nnweb.el ends here
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Emacs-diffs] Changes to emacs/lisp/gnus/nnweb.el [gnus-5_10-branch],
Andreas Schwab <=