#!/usr/bin/env python # remove_purple.py # Jonathan Cheyer # 2004 November 14 # Remove all purple numbers from a PurpleWiki database # # License: GPL - http://www.fsf.org/licenses/gpl.html # # NOTE: requires Python 2.3 or newer # # Tested with PurpleWiki-0.94, but should work with any # database as long as the nid format does not change. # # example usage: # remove_purple.py /data/cop/colab/www/wikidb/page /data/cop/colab/www/wikidb-nopurple/page import os; import re; import sys; # remove purple numbers from a sequence of strings # the format of the purple number is: # " \{nid \w*\}" # ex: {nid 3G} def remove(strings): results = []; for line in strings: line = re.sub(" \{nid \w*\}", "", line); results.append(line); return results; # Remove purple numbers from all files under a source directory. # The original files will not be changed. Instead, new files # with the same names will be created under a destination directory. def remove_files(source, dest): for root, dirs, files in os.walk(source): for file in files: inname = root + "/" + file; infile = open(inname, "r"); strings = infile.readlines(); infile.close(); result = remove(strings); outname = inname.replace(source, dest); outdir = os.path.dirname(outname); if (not os.path.exists(outdir)): os.makedirs(outdir); outfile = open(outname, "w"); outfile.writelines(result); outfile.close(); def main(): if len(sys.argv) < 3: print sys.argv[0] + " source_dir dest_dir"; sys.exit(1); remove_files(sys.argv[1], sys.argv[2]); main()