#!/usr/bin/python
import os
import sys
import string
import random
import re
import csv
import wx
import getopt

havemac = True
try:
    import macostools
except ImportError:
    havemac = False

############################################################
# This program redacts (edits) files to remove sensitive
# data without changing anything else in the file, including
# whitespace.  All the digits in each of the supplied patterns
# will be replaced with X's.
#
# The program should be run from the command line:
# python redactatron.py
# Will start the program with a dialog to select a CSV file in the format
# produced by Identity Finder. It will then go through each file listed
# in the CSV, allow deselecting any numbers that are false positives,
# and redact the file.
#
# You can also specify the CSV file on the command line:
# python redactatron.py filename.csv
#
# The -s option causes the redactor to redact only one file, taking
# the information from standard input.  The first line is the path
# to the file, and the remaining lines are numbers to be redacted.
#
# Note: to run this on 64-bit Snow Leopard, you may need to set
# this environment variable first:
# export VERSIONER_PYTHON_PREFER_32_BIT=yes
#
# If the redaction fails to produce a file that is the same size as the
# original, an error message will come out, and the file will not be changed.
# The redacted file will be left in the same folder, with a name that is
# the name of the original plus a random number.
#
############################################################
# Copyright (c) 2011 Steve Gaarder and Cornell University
############################################################



def getaline(fd):
    buff = []
    gotnl = False
    while True:
        c = fd.read(1)
        if c == "":
            break
        buff.append(c)
        if c == '\r' or c == '\n':
            break
    buf = "".join(buff)
    return buf

def do_file(fpath,mats):
    pats = []
    subs = []
    choicewin = wx.MultiChoiceDialog(None,fpath,"Choose numbers, then click OK to redact, Cancel to leave alone",mats)
    selall = range(len(mats))
    choicewin.SetSelections(selall)
    answer = choicewin.ShowModal()
    selected = choicewin.GetSelections()
    choicewin.Close(True)    
    if answer == wx.ID_OK:
        print fpath
        for m in selected:
            pats.append(mats[m].rstrip())
            subs.append(re.sub(r'\d','X',mats[m].rstrip()))
        for i in range(len(pats)):
            print pats[i], subs[i]

        # generate a name for the output file and make sure it doesn't exist
        outname = fpath 
        while os.path.exists(outname):
            randnum = random.randrange(1,1000,1)
            outname = fpath + str(randnum)
                
        fin = open(fpath,"rb")
        #On the Mac, Make the new file a copy of the old one, so we get its resource fork.
        if havemac:
            macostools.copy(fpath,outname)
        fout = open(outname,"wb")
                
        line = getaline(fin)
        while line:
                for i in range(len(pats)):
                    try:
                        line = line.replace(pats[i],subs[i])
                    except UnicodeDecodeError:
                        pass
                fout.write(line)
                line = getaline(fin)
                        
        fin.close()
        fout.close()
                
        # now see if the size is right
            
        insize = os.path.getsize(fpath)
        outsize = os.path.getsize(outname)
            
        if outsize >= insize and outsize <= insize+2:
                os.remove(fpath)
                os.rename(outname, fpath)
                print "File successfully redacted."
        else:
                print "Error - file not replaced."
                dlg = wx.MessageDialog(None, "Error redacting " + fpath + "\nFile not replaced.","Error", wx.OK|wx.ICON_WARNING)
                dlg.ShowModal()
                dlg.Destroy()

    
def fix_tbird_path(filename):
    if os.name == "nt":
        filename = re.sub(r'/','_',filename)
    while re.search(r'<[\w\s\)\(:\-]+>$',filename) is not None or re.search(r'<[\w\s\)\(:\-]+>\s:',filename) is not None:
        (head, tail) = os.path.split(filename)
        filename = head
    (head, tail) = os.path.split(filename)
    filename = head
    return filename


app = wx.PySimpleApp()
app.MainLoop()

opts, args = getopt.getopt(sys.argv[1:],'st:T:')

single = False
tbirdpath = None
tbirdprefix = ""
for o, a in opts:
    if o == "-s":
        single = True
    if o == "-t":
        tbirdpath = a
    if o == "-T":
        tbirdprefix = a

if single:
    matches = []
    filepath = sys.stdin.readline().rstrip()
    for mat in sys.stdin.readlines():
        matches.append(mat.rstrip())
    do_file(filepath,matches)
    
else:
    if len(args) >= 1 and os.path.exists(args[0]):
        csvname = args[0]
    else:
        fd = wx.FileDialog(None,"Select IDF csv file","","","*.csv")
        answer = fd.ShowModal()
        if answer == wx.ID_OK:
            csvname = fd.GetPath()
        else:
            sys.exit()
        fd.Close(True)
    
    csvf = open(csvname,"rU")
    csvr = csv.reader(csvf)
    filename = None
    matches = []
    multimatch = False
    seenheader = False
    lastfilename = None
    lastmatches = None
    for row in csvr:
        if len(row) > 4:
         if seenheader and row[0] != "" and len(row[1]) > 5:
            if filename is not None:
                if tbirdpath is not None and filename.startswith("Thunderbird: "):
                    filename = filename.replace("Thunderbird: " + tbirdprefix,tbirdpath,1)
                    filename = fix_tbird_path(filename)
                if lastfilename != filename or lastmatches != matches:
                    do_file(filename, matches)
                    lastfilename = filename
                    lastmatches = matches
            filename = row[1]
            matches = []
            multimatch = False
            # On Windows, a single match is in field 5.  On Macs, it's field 3.
            # Look at field 5 first
            if len(row) < 6 or row[5] is None or row[5] == "":
                mf = 3
            else:
                mf = 5
            if row[mf] == "Multiple Matches":
                multimatch = True
            elif row[mf] is not None:
                matches.append(row[mf])
         elif multimatch and row[0] == "" and row[1] == "" and row[5] != "":
            matches.append(row[5])
         if not seenheader:
             if row[0] == "Type":
                 seenheader = True

    if filename is not None:
        if tbirdpath is not None and filename.startswith("Thunderbird: "):
            filename = filename.replace("Thunderbird: " + tbirdprefix,tbirdpath,1)
            filename = fix_tbird_path(filename)
        do_file(filename, matches)

    dlg = wx.MessageDialog(None, "All Done.","Finished", wx.OK|wx.ICON_INFORMATION)
    dlg.ShowModal()
    dlg.Destroy()
    
