#! /usr/bin/env python ID = '$Id: cl2xhtml,v 1.10 2013/01/04 04:26:20 eagle Exp $' # # cl2xhtml -- Convert a GNU-style ChangeLog to XHTML Strict. # # Copyright 2002, 2003, 2006, 2008, 2013 Russ Allbery # # See the documentation at the end of this file for the license. import getopt, re, sys, time # This page header is common to all generated pages. Variables are in all # caps surrounded by %%, and are substituted when the header is printed. page_header = ''' %NAME% Change History

%NAME% Change History

''' def version(): """Return the version number of this program. Returns the verison number of this program as a string containing the program name, the CVS revision number, and the last modification date in parentheses. This is taken from the global ID variable. """ # Make sure CVS doesn't see a variable here to substitute. if ID != '$' + 'Id$': data = ID.split() date = data[3].replace("/", "-") program = data[1][:-2] version = data[2] return program + " " + version + " (" + date + ")" else: return '' def usage(): """Print usage information for this program. """ print "Usage: cl2xhtml [-hv] -n -s <style> <file>" def strip_email(author): """Strip the email address from an author. """ mailstart = author.find(" <") if mailstart != -1: return author[:mailstart] else: return author def process_heading(lines): """Convert heading lines into XHTML and output it. Given a list of heading lines, parse it and output the wanted XHTML. The first line must be in the form date, author, e-mail. Any subsequent lines will contain only the author and e-mail. Strip off the e-mail address since we're going to be putting this information on the web. """ first = lines[0] (date, author) = first.split(None, 1) if len(author) > 3 and author[2] == ':': (time, author) = author.split(None, 1) date += ' ' + time authors = [author] for line in lines[1:]: authors.append(strip_email(line)) print '<dt>' print ' <span class="date">' + date + '</span> ' if len(authors) == 1: print ' <span class="author">' + authors[0] + '</span>' elif len(authors) == 2: print ' <span class="author">' + authors[0] + ' and ' + authors[1] \ + '</span>' else: print ' <span class="author">', for author in authors[:-1]: print author + ', ', print 'and ' + authors[-1] + '</span>' print '</dt>' print '<dd>' print '<p>' def process_files(line, viewcvs): """Add markup to file and function portions of a ChangeLog entri. Given a line that contains file and function entries, add the appropriate markup. This is fairly simple if we're not using ViewCVS. If we are, add links to the right ViewCVS URLs for each revision. Returns a tuple of the modified line and a flag saying if there are more files coming. If there are more files coming, don't modify the line, since otherwise we may not be able to pair up versions and files.""" colon = line.find(':') if colon == -1: return (line, 1) else: prefix = line[:colon] suffix = line[colon:] # Strip off the initial *, if any. We append that elsewhere. Then, build # a list of filenames and their associated information. This is really # obnoxious because we have to deal with commas inside (), [], or <>. prefix = re.sub(r'^\s*\*?\s*', '', prefix, 1) start = 0 files = [] nested = '' parens = { '(': ')', '[': ']', '<': '>' } for i in range(len(prefix)): if prefix[i] == ',' and not nested: if i != 0: files.append(prefix[start:i]) start = i + 1 elif prefix[i] == ' ' and start == i and not nested: start = i + 1 elif prefix[i] in parens: nested = parens[prefix[i]] elif prefix[i] == nested: nested = '' if i >= start: files.append(prefix[start:]) # Now we have a list of files in files. This list may contain other # information: functions in parens, versions in parens, conditionals in # square brackets, or file parts in angle brackets. We want to mark up # the file name and any functions, and if ViewCVS processing is enabled, # link the file and any version numbers to the appropriate ViewCVS links. # # For the version links, we want to show the diff between this version and # the previous version, which requires calculating the previous version # with some guesswork (that doesn't always work but which tries to cope # with branches). # # This is horribly ugly. prefix = '' for entry in files: part = '<span class="file">' filename = re.match(r'(\S+)', entry).group(1) if viewcvs: part += '<a href="' + viewcvs + filename + '">' if re.search(r'\s', entry): part += re.sub(r'(\s)', r'</a></span>\1', entry, 1) else: part += entry + '</a></span>' elif re.search(r'\s', entry): part += re.sub(r'(\s)', r'</span>\1', entry, 1) else: part += entry + '</span>' if not viewcvs: part = re.sub(r'(\s)\(((?:\S+,?\s*)+)\)', r'\1(<span class="function">\2</span>)', part, 1) else: def add_link(match): prefix = match.group(1) version = match.group(2) parts = version.split('.') prev = int(parts[-1]) - 1 if prev == 0 and len(parts) > 2: parts = parts[:-2] prev = int(parts[-1]) if prev < 0: return prefix + '(' + version + ')' else: parts[-1] = prev url = '<a href="' + viewcvs + filename + '?r1=' url += '.'.join(["%s" % (v) for v in parts]) url += '&r2=' + version + '">' + version + '</a>' return prefix + '(' + url + ')' part = re.sub(r'(\s)\(([\d.]+)\)', add_link, part, 1) if prefix: prefix += ', ' prefix += part return (prefix + suffix, 0) def process_changelog(file, viewcvs = None): """Convert a file from ChangeLog format to XHTML and output it. Given a file containing ChangeLog entries, parse it and convert it to XHTML. Output a line break before any line beginning with an asterisk and convert the asterix to a bullet and mark files and functions. When blank lines are encountered, close the paragraph and open a new one. Takes the and the base ViewCVS URL (if any). """ line = file.readline() paragraph = files = 0 begin = first = 1 while line: if line[0] == ' ' or line[0] == '\t' or line[0] == "\n": line = line.strip() line = line.replace("&", "&") line = line.replace("<", "<") line = line.replace(">", ">") if line == "": begin = 1 paragraph = 1 files = 0 elif line[0] == "*": if not begin: print '<br />' elif paragraph: print '</p>\n\n<p>' # If we haven't seen the end of the file list, we have to grab # another line until we do. Otherwise, we may not be able to # associate the version number with the file. This is an ugly # hack; there must be a better way to do this. Probably we # need to parse all of an entry at once and then print it out. (line, files) = process_files(line, viewcvs) while files: more = file.readline().strip() more = more.replace("&", "&") more = more.replace("<", "<") more = more.replace(">", ">") (line, files) = process_files(line + more, viewcvs) line = '• ' + line print line begin = 0 paragraph = 0 elif re.match(r'\(\S+\): ', line): if not begin: print '<br />' elif paragraph: print '</p>\n\n<p>' line = re.sub(r'\((\S+)\)', r'(<span class="function">\1</span>)', line, 1) print line begin = 0 paragraph = 0 else: if paragraph: print '</p>\n\n<p>' elif files: (line, files) = process_files(line, viewcvs) print line begin = 0 paragraph = 0 line = file.readline() else: if not first: print '</p>\n</dd>\n' lines = [line.strip()] line = file.readline() while line != "\n": lines.append(line.strip()) line = file.readline() process_heading(lines) while line == "\n": line = file.readline() begin = 1 first = 0 paragraph = 0 def print_header(name, style): """Print the HTML header for the CVS log page. Given the name of the file for which we're generating log data and the style sheet to refer to, generate the XHTML header for the output. """ header = page_header.replace("%STYLE%", style).replace("%NAME%", name) date = time.strftime("%Y-%m-%d %T -0000", time.gmtime()) print header.replace("%VERSION%", version()).replace("%DATE%", date) def print_footer(): """Print the HTML footer for the CVS log page. """ print '</p>\n</dd>\n\n</dl>\n\n</body>\n</html>' def main(): longopts = ['help', 'name=', 'style=', 'version', 'viewcvs='] options, arguments = getopt.getopt(sys.argv[1:], "hn:s:v", longopts) name = style = viewcvs = '' for o, a in options: if o in ("-h", "--help"): usage() sys.exit() elif o in ("-n", "--name"): name = a elif o in ("-s", "--style"): style = a elif o in ("-v", "--version"): print version() sys.exit() elif o in ("--viewcvs"): viewcvs = a file = None if len(arguments) > 0 and arguments[0] != "-": file = open(arguments[0], "r") else: file = sys.stdin print_header(name, style) process_changelog(file, viewcvs) print_footer() if __name__ == "__main__": main() sys.exit() documentation = """ =head1 NAME cl2xhtml - Convert a GNU-style ChangeLog to XHTML Strict =head1 SYNOPSIS B<cl2xhtml> [B<-hv>] B<-n> I<name> B<-s> I<style> [I<input>] =head1 DESCRIPTION B<cl2xhtml> converts a GNU-style ChangeLog to XHTML Strict, marking the various syntactic elements of the ChangeLog entries so that a style sheet can apply syntax highlighting or other desired formatting. Line breaks before entries for a new file or a new function are preserved, as are paragraph breaks in the ChangeLog. The e-mail address is stripped (the new-style date entry format is expected). The B<-n> option should be used to tell B<cl2xhtml> which package's ChangeLog it is formatting. Currently, the B<-s> option must also be used to specify the style sheet for the generated XHTML. ChangeLog data is read from I<input>, or from standard input if no I<input> argument is given. XHTML is written to standard output. The output is formatted as a description list, since that maintains the indentation of entries on most browsers. B<cl2xhtml> assumes that the ChangeLog is encoded in UTF-8 and specifies a character set of UTF-8 in its XHTML output. =head1 OPTIONS =over 4 =item B<-h>, B<--help> Print brief usage information and exit. =item B<-n> I<name>, B<--name>=I<name> Specifies the name of the program that the log data is for, used by B<cl2xhtml> to create the page title and top heading. =item B<-s> I<style>, B<--style>=I<style> Specifies the style sheet to which the generated XHTML page should refer. This should be a URL (possibly relative to the location where the XHTML page will be placed), not just a file name. =item B<--viewcvs>=I<url> Link file names to the ViewCVS page for that file and versions to the ViewCVS diff page for that version. Requires the revision numbers appear in the ChangeLog in paretheses after the file names to do the version links. (This is the output format of B<cvs2cl> with the B<--revisions> option.) =item B<-v>, B<--version> Print the version of B<cl2xhtml> and exit. =back =head1 EXAMPLES Generate the XHTML-formatted ChangeLog for a package named podlators and save it into a file called changes.html: cl2xhtml -n podlators -s '/styles/change.css' ChangeLog > changes.html The generated XHTML will refer to C</styles/change.css> as the URL to the style sheet. =head1 BUGS This program is very picky about format and may not fully support all ChangeLog conventions. Please let me know if there's any specific feature that you've missed. Currently, this program just throws uncaught exceptions if anything goes wrong, which is less than ideal. At least the common errors should probably be caught and result in good diagnostic output. There's no way not to generate a reference to a style sheet. The generated XHTML is extremely ugly. The parsing code is very difficult to maintain, and needs to be rewritten into a full ChangeLog parser and an HTML output layer that prints out an HTML representation of a parsed entry. =head1 SEE ALSO cvs2cl(1) The GNU Coding Standards, L<http://www.gnu.org/prep/standards/>. This may be available on your system as the info page F<standards>. This document specifies the format of the ChangeLog file. The XHTML 1.0 standard at L<http://www.w3.org/TR/xhtml1/>. Current versions of this program are available from my web tools page at L<http://www.eyrie.org/~eagle/software/web/>. =head1 AUTHOR Russ Allbery <rra@stanford.edu> =head1 COPYRIGHT AND LICENSE Copyright 2002, 2003, 2006, 2008, 2013 Russ Allbery <rra@stanford.edu> Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. =cut """