[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
www/server/source/sitemap-generator sitemap-gen...
From: |
Pavel Kharitonov |
Subject: |
www/server/source/sitemap-generator sitemap-gen... |
Date: |
Tue, 11 Aug 2015 16:00:41 +0000 |
CVSROOT: /web/www
Module name: www
Changes by: Pavel Kharitonov <ineiev> 15/08/11 16:00:41
Modified files:
server/source/sitemap-generator: sitemap-generator.py
Log message:
Make it work with Python3; make PO output more deterministic.
CVSWeb URLs:
http://web.cvs.savannah.gnu.org/viewcvs/www/server/source/sitemap-generator/sitemap-generator.py?cvsroot=www&r1=1.14&r2=1.15
Patches:
Index: sitemap-generator.py
===================================================================
RCS file: /web/www/www/server/source/sitemap-generator/sitemap-generator.py,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -b -r1.14 -r1.15
--- sitemap-generator.py 7 May 2015 15:51:38 -0000 1.14
+++ sitemap-generator.py 11 Aug 2015 16:00:41 -0000 1.15
@@ -2,7 +2,7 @@
#
# Sitemap generator
# Copyright © 2011-2012 WacÅaw Jacek
-# Copyright © 2014 Free Software Foundation, Inc.
+# Copyright © 2014, 2015 Free Software Foundation, Inc.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -122,7 +122,7 @@
encoding = 'iso-8859-8'
if encoding in VALID_ENCODINGS:
return encoding
- print path + ': no encoding specified.'
+ print(path + ': no encoding specified.')
# A non-ASCII file who declares no encoding has no right to exist.
return 'utf-8'
@@ -135,7 +135,7 @@
line = 'Found sitemap translations:'
for l in sorted(linguas):
line = line + ' ' + l
- print line
+ print(line)
return linguas
@@ -184,12 +184,12 @@
# Only complain about duplicate index files
# if it isn't a deliberate instance of such.
if not match_against_list(directory, no_index_checks):
- print 'Error: Directory ' + directory \
+ print('Error: Directory ' + directory \
+ ' has both an index file called "' \
+ index_file \
+ '" and a directoryname file called "' \
+ dir_file \
- + '". Neither will be used as main page.'
+ + '". Neither will be used as main page.')
elif index_file:
return index_file
elif dir_file:
@@ -258,8 +258,9 @@
match = match_against_list(path, replacement_titles)
if match:
return replacement_titles[match.re.pattern]
- text = read_file(os.path.join(TOP_DIRECTORY, path))
- encoding = determine_file_encoding(text, path)
+ data = read_file(os.path.join(TOP_DIRECTORY, path), 'b')
+ encoding = determine_file_encoding(data.decode('iso-8859-1'), path)
+ text = data.decode(encoding, 'replace')
idx = text.find('<!--')
while idx >= 0:
head = text[:idx]
@@ -277,7 +278,7 @@
title = re.sub('</center>', '', title)
title = re.sub('<CENTER>', '', title)
title = re.sub('</CENTER>', '', title)
- return title.decode(encoding, 'replace')
+ return title
# No <h?> tags found: use <title>, which needs trimming.
title = extract_tags(text, ['title'])
if not title:
@@ -287,7 +288,7 @@
if match:
title = title[ : match.start() ] \
+ title[ match.end() : ]
- return title.decode(encoding, 'replace')
+ return title
def get_titles_for_files( directory, files ):
titles = {}
@@ -301,7 +302,8 @@
def is_file_a_redirect(path):
- text = read_file(os.path.join(TOP_DIRECTORY, path))
+ data = read_file(os.path.join(TOP_DIRECTORY, path), 'b')
+ text = data.decode('iso-8859-1')
if re.search(FORWARD_REGEXP, text, re.IGNORECASE):
return True
@@ -350,8 +352,8 @@
def output_translations(prefix):
for lang in sitemap_linguas:
- fd = open(prefix + '.' + lang + '.po', 'a')
- for msgid in translations:
+ fd = open(prefix + '.' + lang + '.po', 'ab')
+ for msgid in sorted(translations):
string = ''
trans = translations[msgid]
if trans == None:
@@ -481,16 +483,16 @@
or sitemap_urls + local_urls >= SITEMAP_MAX_URLS:
sitemap_text = SITEMAP_ORG_HEADER + sitemap_text \
+ SITEMAP_ORG_FOOTER
- print 'writing next sitemap (' + str(sitemap_no) + '): ' \
+ print('writing next sitemap (' + str(sitemap_no) + '): ' \
+ str(len(sitemap_text)) + ' bytes, ' \
- + str(sitemap_urls) + ' urls'
+ + str(sitemap_urls) + ' urls')
if len0 + len (entry) \
+ SITEMAP_ORG_BOILERPLATE_LEN >= SITEMAP_MAX_LEN:
- print ' Maximum length (' \
- + str(SITEMAP_MAX_LEN) + ') reached'
+ print(' Maximum length (' \
+ + str(SITEMAP_MAX_LEN) + ') reached')
if sitemap_urls >= SITEMAP_MAX_URLS:
- print ' Maximum URL number (' \
- + str(SITEMAP_MAX_URLS) + ') reached'
+ print(' Maximum URL number (' \
+ + str(SITEMAP_MAX_URLS) + ') reached')
out_file = open(SITEMAP_BASE + str(sitemap_no) \
+ SITEMAP_EXT, 'w')
out_file.write(sitemap_text.encode('utf-8'))
@@ -644,8 +646,8 @@
if directory != '':
write( '</div>\n' )
-def read_file(filename):
- fd = open(filename, 'r')
+def read_file(filename, mode = ''):
+ fd = open(filename, 'r' + mode)
file_contents = fd.read()
fd.close()
@@ -732,18 +734,18 @@
print_map('', 0)
write(read_file('output.tail'))
-output_file = open(OUTPUT_FILE_NAME, 'w')
+output_file = open(OUTPUT_FILE_NAME, 'wb')
output_file.write(output_text.encode('utf-8'))
output_file.close()
output_translations(OUTPUT_FILE_NAME)
if len(sitemap_text):
- print 'writing last sitemap (' + str(sitemap_no) + '): ' \
+ print('writing last sitemap (' + str(sitemap_no) + '): ' \
+ str(len(sitemap_text)) + ' bytes, ' \
- + str(sitemap_urls) + ' urls'
+ + str(sitemap_urls) + ' urls')
output_file = open(SITEMAP_BASE + str(sitemap_no) \
- + SITEMAP_EXT, 'w')
+ + SITEMAP_EXT, 'wb')
sitemap_text = SITEMAP_ORG_HEADER + sitemap_text \
+ SITEMAP_ORG_FOOTER
output_file.write(sitemap_text.encode('utf-8'))
@@ -751,7 +753,7 @@
sitemap_no += 1
if sitemap_no > 0:
- print 'writing sitemap index'
+ print('writing sitemap index')
sitemap_text = SITEMAP_IDX_HEADER
for i in range (sitemap_no):
sitemap_text += '<sitemap>\n'
@@ -761,7 +763,7 @@
sitemap_text += '</sitemap>\n'
sitemap_text += SITEMAP_IDX_FOOTER
output_file = open(SITEMAP_BASE + SITEMAP_IDX \
- + SITEMAP_EXT, 'w')
+ + SITEMAP_EXT, 'wb')
output_file.write(sitemap_text.encode('utf-8'))
output_file.close()
@@ -773,6 +775,6 @@
+ linguas[1:] + '),/" -->\n<dl>' \
+ translist + '</dl><!--#endif -->\n'
-output_file = open(OUTPUT_FILE_NAME + '.translist', 'w')
+output_file = open(OUTPUT_FILE_NAME + '.translist', 'wb')
output_file.write(translist.encode('utf-8'))
output_file.close()
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- www/server/source/sitemap-generator sitemap-gen...,
Pavel Kharitonov <=