www/server/source/sitemap-generator sitemap-gen...

www-commits
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
www/server/source/sitemap-generator sitemap-gen...

From:	Pavel Kharitonov
Subject:	www/server/source/sitemap-generator sitemap-gen...
Date:	Tue, 11 Aug 2015 16:00:41 +0000
CVSROOT:        /web/www
Module name:    www
Changes by:     Pavel Kharitonov <ineiev>       15/08/11 16:00:41

Modified files:
        server/source/sitemap-generator: sitemap-generator.py 

Log message:
        Make it work with Python3; make PO output more deterministic.

CVSWeb URLs:
http://web.cvs.savannah.gnu.org/viewcvs/www/server/source/sitemap-generator/sitemap-generator.py?cvsroot=www&r1=1.14&r2=1.15

Patches:
Index: sitemap-generator.py
===================================================================
RCS file: /web/www/www/server/source/sitemap-generator/sitemap-generator.py,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -b -r1.14 -r1.15
--- sitemap-generator.py        7 May 2015 15:51:38 -0000       1.14
+++ sitemap-generator.py        11 Aug 2015 16:00:41 -0000      1.15
@@ -2,7 +2,7 @@
 #
 # Sitemap generator
 # Copyright Â© 2011-2012 WacÅaw Jacek
-# Copyright Â© 2014 Free Software Foundation, Inc.
+# Copyright Â© 2014, 2015 Free Software Foundation, Inc.
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -122,7 +122,7 @@
                        encoding = 'iso-8859-8'
                if encoding in VALID_ENCODINGS:
                        return encoding
-       print path + ': no encoding specified.'
+       print(path + ': no encoding specified.')
        # A non-ASCII file who declares no encoding has no right to exist.
        return 'utf-8'
 
@@ -135,7 +135,7 @@
        line = 'Found sitemap translations:'
        for l in sorted(linguas):
                line = line + ' ' + l
-       print line
+       print(line)
        return linguas
                        
 
@@ -184,12 +184,12 @@
                # Only complain about duplicate index files
                # if it isn't a deliberate instance of such.
                if not match_against_list(directory, no_index_checks):
-                       print 'Error: Directory ' + directory \
+                       print('Error: Directory ' + directory \
                                + ' has both an index file called "' \
                                + index_file \
                                + '" and a directoryname file called "' \
                                + dir_file \
-                               + '". Neither will be used as main page.'
+                               + '". Neither will be used as main page.')
        elif index_file:
                return index_file
        elif dir_file:
@@ -258,8 +258,9 @@
        match = match_against_list(path, replacement_titles)
        if match:
                return replacement_titles[match.re.pattern]
-       text = read_file(os.path.join(TOP_DIRECTORY, path))
-       encoding = determine_file_encoding(text, path)
+       data = read_file(os.path.join(TOP_DIRECTORY, path), 'b')
+       encoding = determine_file_encoding(data.decode('iso-8859-1'), path)
+       text = data.decode(encoding, 'replace')
        idx = text.find('<!--')
        while idx >= 0:
                head = text[:idx]
@@ -277,7 +278,7 @@
                title = re.sub('</center>', '', title)
                title = re.sub('<CENTER>', '', title)
                title = re.sub('</CENTER>', '', title)
-               return title.decode(encoding, 'replace')
+               return title
        # No <h?> tags found: use <title>, which needs trimming.
        title = extract_tags(text, ['title'])
        if not title:
@@ -287,7 +288,7 @@
                if match:
                        title = title[ : match.start() ] \
                                        + title[ match.end() : ]
-       return title.decode(encoding, 'replace')
+       return title
 
 def get_titles_for_files( directory, files ):
        titles = {}
@@ -301,7 +302,8 @@
        
 
 def is_file_a_redirect(path):
-       text = read_file(os.path.join(TOP_DIRECTORY, path))
+       data = read_file(os.path.join(TOP_DIRECTORY, path), 'b')
+       text = data.decode('iso-8859-1')
 
        if re.search(FORWARD_REGEXP, text, re.IGNORECASE):
                return True
@@ -350,8 +352,8 @@
 
 def output_translations(prefix):
        for lang in sitemap_linguas:
-               fd = open(prefix + '.' + lang + '.po', 'a')
-               for msgid in translations:
+               fd = open(prefix + '.' + lang + '.po', 'ab')
+               for msgid in sorted(translations):
                        string = ''
                        trans = translations[msgid]
                        if trans == None:
@@ -481,16 +483,16 @@
          or sitemap_urls + local_urls >= SITEMAP_MAX_URLS:
                sitemap_text = SITEMAP_ORG_HEADER + sitemap_text \
                                + SITEMAP_ORG_FOOTER
-               print 'writing next sitemap (' + str(sitemap_no) + '): ' \
+               print('writing next sitemap (' + str(sitemap_no) + '): ' \
                      + str(len(sitemap_text)) + ' bytes, ' \
-                     + str(sitemap_urls) + ' urls'
+                     + str(sitemap_urls) + ' urls')
                if len0 + len (entry) \
                     + SITEMAP_ORG_BOILERPLATE_LEN >= SITEMAP_MAX_LEN:
-                       print '  Maximum length (' \
-                               + str(SITEMAP_MAX_LEN) + ') reached'
+                       print('  Maximum length (' \
+                               + str(SITEMAP_MAX_LEN) + ') reached')
                if sitemap_urls >= SITEMAP_MAX_URLS:
-                       print '  Maximum URL number (' \
-                               + str(SITEMAP_MAX_URLS) + ') reached'
+                       print('  Maximum URL number (' \
+                               + str(SITEMAP_MAX_URLS) + ') reached')
                out_file = open(SITEMAP_BASE + str(sitemap_no) \
                                   + SITEMAP_EXT, 'w')
                out_file.write(sitemap_text.encode('utf-8'))
@@ -644,8 +646,8 @@
                if directory != '':
                        write( '</div>\n' )
                
-def read_file(filename):
-       fd = open(filename, 'r')
+def read_file(filename, mode = ''):
+       fd = open(filename, 'r' + mode)
        file_contents = fd.read()
        fd.close()
        
@@ -732,18 +734,18 @@
 print_map('', 0)
 write(read_file('output.tail'))
 
-output_file = open(OUTPUT_FILE_NAME, 'w')
+output_file = open(OUTPUT_FILE_NAME, 'wb')
 output_file.write(output_text.encode('utf-8'))
 output_file.close()
 
 output_translations(OUTPUT_FILE_NAME)
 
 if len(sitemap_text):
-       print 'writing last sitemap (' + str(sitemap_no) + '): ' \
+       print('writing last sitemap (' + str(sitemap_no) + '): ' \
              + str(len(sitemap_text)) + ' bytes, ' \
-             + str(sitemap_urls) + ' urls'
+             + str(sitemap_urls) + ' urls')
        output_file = open(SITEMAP_BASE + str(sitemap_no) \
-                          + SITEMAP_EXT, 'w')
+                          + SITEMAP_EXT, 'wb')
        sitemap_text = SITEMAP_ORG_HEADER + sitemap_text \
                        + SITEMAP_ORG_FOOTER
        output_file.write(sitemap_text.encode('utf-8'))
@@ -751,7 +753,7 @@
        sitemap_no += 1
 
 if sitemap_no > 0:
-       print 'writing sitemap index'
+       print('writing sitemap index')
        sitemap_text = SITEMAP_IDX_HEADER
        for i in range (sitemap_no):
                sitemap_text += '<sitemap>\n'
@@ -761,7 +763,7 @@
                sitemap_text += '</sitemap>\n'
        sitemap_text += SITEMAP_IDX_FOOTER
        output_file = open(SITEMAP_BASE + SITEMAP_IDX \
-                          + SITEMAP_EXT, 'w')
+                          + SITEMAP_EXT, 'wb')
        output_file.write(sitemap_text.encode('utf-8'))
        output_file.close()
 
@@ -773,6 +775,6 @@
                + linguas[1:] + '),/" -->\n<dl>' \
                + translist + '</dl><!--#endif -->\n'
 
-output_file = open(OUTPUT_FILE_NAME + '.translist', 'w')
+output_file = open(OUTPUT_FILE_NAME + '.translist', 'wb')
 output_file.write(translist.encode('utf-8'))
 output_file.close()
[Prev in Thread]
Current Thread
[Next in Thread]
www/server/source/sitemap-generator sitemap-gen..., Pavel Kharitonov <=
Prev by Date: www/server home-pkgselect.html
Next by Date: www/server/source/sitemap-generator directories...
Previous by thread: www/prep ftp.de.html po/ftp.de.po po/ftp.pot
Next by thread: www/server/source/sitemap-generator directories...
Index(es):
- Date
- Thread