www/server/source/linc linc.py

www-commits

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

www/server/source/linc linc.py

From:	Pavel Kharitonov
Subject:	www/server/source/linc linc.py
Date:	Mon, 03 Nov 2014 13:11:33 +0000

CVSROOT:        /web/www
Module name:    www
Changes by:     Pavel Kharitonov <ineiev>       14/11/03 13:11:33

Modified files:
        server/source/linc: linc.py 

Log message:
        Support HTTPS links.

CVSWeb URLs:
http://web.cvs.savannah.gnu.org/viewcvs/www/server/source/linc/linc.py?cvsroot=www&r1=1.22&r2=1.23

Patches:
Index: linc.py
===================================================================
RCS file: /web/www/www/server/source/linc/linc.py,v
retrieving revision 1.22
retrieving revision 1.23
diff -u -b -r1.22 -r1.23
--- linc.py     1 Nov 2014 07:54:01 -0000       1.22
+++ linc.py     3 Nov 2014 13:11:32 -0000       1.23
@@ -18,7 +18,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-LINC_VERSION = 'LINC 0.18'
+LINC_VERSION = 'LINC 0.19'
 USAGE = \
 '''Usage: %prog [options] [BASE_DIRECTORY]
 Check links in HTML files from BASE_DIRECTORY.'''
@@ -98,7 +98,7 @@
 HTTP_FORWARD_HEADER = \
   '(^|\r\n)HTTP/1\.1 (301 Moved Permanently|302 Found)(\r\n|$)'
 HTTP_LINK_REGEXP = \
-  'http://(?P<hostname>[^/:]+)(:(?P<port>[0-9]+))?(?P<resource>/[^#]*)?'
+  'http(s?)://(?P<hostname>[^/:]+)(:(?P<port>[0-9]+))?(?P<resource>/[^#]*)?'
 HTTP_NEW_LOCATION_HEADER = '(^|\r\n)Location: (?P<new_location>.+)(\r\n|$)'
 LINK_BEGIN = '(?i)(<a\s[^<]*)'
 # We want to parse links like href="URL" as well as href='URL';
@@ -117,6 +117,12 @@
 import os
 import re
 import socket
+NO_SSL = False
+try:
+    import ssl
+except:
+    print("Note: No SSL library found.")
+    NO_SSL = True
 import sys
 import time
 from optparse import OptionParser
@@ -159,7 +165,7 @@
        return None
 
 # forwarded_from is either None or a list
-def get_http_link_error(link, forwarded_from = None):
+def get_http_link_error(link, link_type, forwarded_from = None):
        if forwarded_from == None:
                forwarded_from = []
 
@@ -170,15 +176,27 @@
        port = connection_data.group( 'port' )
        resource = connection_data.group( 'resource' )
 
-       if port == None:
-               port = 80
-
        socketfd = socket_create()
        # if a socket couldn't be created,
        # just ignore this link this time.
        if socketfd == None:
                return None
 
+       if port == None:
+           if link_type == 'http':
+               port = 80
+           elif link_type == 'https':
+               port = 443
+           else:
+               report(1, 'Unexpected link type `' + link_type + "' found.")
+               if WICKED > 0:
+                   print 'Aborting due to an unexpected link type.'
+                   exit(1)
+               return None
+
+       if link_type == 'https':
+           socketfd = ssl.wrap_socket (socketfd)
+
        if socket_connect( socketfd, hostname, port ) == False:
                socketfd.close()
                return 'couldn\'t connect to host'
@@ -227,7 +245,7 @@
        new_location = match.group('new_location')
        if new_location in forwarded_from:
                return 'forward loop!'
-       return get_http_link_error(new_location, forwarded_from)
+       return get_http_link_error(new_location, link_type, forwarded_from)
 
 def is_inside_comment(head):
        start = head.rfind('<!--')
@@ -275,7 +293,7 @@
        # from which it is linked rather than the actual location
        # of the file.
        dir_name = symlink if (symlink != None) else filename
-       if re.search('^(mailto:|irc://|https://|rsync://)', link):
+       if re.search('^(mailto:|irc://|rsync://)', link):
                link_type = 'unsupported'
        elif link.find('http://') == 0:
                link_type = 'http'
@@ -284,6 +302,11 @@
                link = 'http:' + link
        elif link.find('ftp://') == 0:
                link_type = 'ftp'
+       elif link.find('https://') == 0:
+               if NO_SSL:
+                   link_type = 'unsupported'
+               else:
+                   link_type = 'https'
        elif link[0] == '/':
                link = remote_site_root + link[1:]
        else:
@@ -674,8 +697,8 @@
                link_error = None
        elif link_type == 'ftp':
                link_error = get_ftp_link_error(url)
-       elif link_type == 'http':
-               link_error = get_http_link_error(url)
+       elif link_type == 'http' or link_type == 'https':
+               link_error = get_http_link_error(url, link_type)
        else:
                report(1, 'Unexpected link type `' + link_type + "' found.")
                if WICKED > 0:

[Prev in Thread]

Current Thread

[Next in Thread]

www/server/source/linc linc.py, Pavel Kharitonov, 2014/11/01
- www/server/source/linc linc.py, Pavel Kharitonov <=

Prev by Date: www planetfeeds.ru.html gnu/about-gnu.ar.html g...
Next by Date: www/philosophy technological-neutrality.html
Previous by thread: www/server/source/linc linc.py
Next by thread: www server/sitemap.ru.html server/po/sitemap.ru...
Index(es):
- Date
- Thread