# Copyright (c) 2004 - 2008 16 Systems, LLC
# Copyright (c) 2009 Virginia Polytechnic Institute and State University
# All rights reserved.
#
# License & Redistribution Information
# http://www.security.vt.edu/Find_SSNs/find_ssns_license.html


from pipes import quote
import re
import os
import os.path
import time
import version
import zipfile
#import subprocess


import detailsfile
import gv

havexlrd = True
try:
    from xlrd import open_workbook
except ImportError:
    print "XLRD is not installed; XLS files will be treated as generic"
    havexlrd = False


ssnflags = 0
ccnflags = 0
banflags = 0


def xstr(s):
	try:
		return str(s)
	except:
		return s

# This module should be broken into 2 or 3 smaller modules.
# (hunt, validate, report) when time permits.
# It has grown too large and is becoming difficult to change.

def go(cs, ag, ms, search_path, report_type, skipped_numbers, target_file_list, path_to_results, reg_exp, search_for_value, mode):

    global ssnflags
    global ccnflags
    global banflags

    if mode == 'gui':
        import wx

    # Delimiters to be removed.
    bad = re.compile(r'[-\s\.a-zA-Z_|]')
    justnumbers = re.compile(r'\b((?:\d[ -]*){5,16})')
    # Mbox from line
    fromline = re.compile(r'From\s+[\?\w\-\.\+=@]+\s+\w\w\w\s+\w\w\w\s')
    eudorafromline = re.compile(r'From\s+\?\?\?@\?\?\?\s+\w\w\w\s+\w\w\w\s')
    subjline = re.compile(r'Subject:\s')
    digitline = re.compile(r'\d+')
    pointzero = re.compile(r'^\d+\.0$')
    boundary = re.compile(r'BOUNDARY=(\S+)',re.IGNORECASE)
    tt_time = time.strftime("%a_%b_%d_%Y_%H_%M_%S_%p", time.localtime())
    ssnflagwords = re.compile(r'\bssn\b|social\s*security|\bsoc\s*sec|\bss\s*\#|\bss\s+n',re.IGNORECASE)
    ccnflagwords = re.compile(r'card\s*num|card\s+no|card\s*\#|\bcc\s*\#',re.IGNORECASE)
    banflagwords = re.compile(r'bank\b|\bcredit\s+union\b',re.IGNORECASE)
    ssnpattern = re.compile(r'\d\d\d\D\d\d\D\d\d\d\d')
    ccnpattern = re.compile(r'\d\d\d\d\D\d\d\d\d\D\d\d\d\d\D\d\d\d\d|\d\d\d\d\D\d\d\d\d\d\d\D\d\d\d\d\d')

    # The list of suspect files:
    suspect = []

    # Increase threshold to reduce false positives.
    # Set dthreshold (default threshold) to equal threshold
    threshold = 1
    dthreshold = threshold


    if os.path.isfile('threshold.txt'):
        fp = open('threshold.txt')
        # Change threshold to user specified number.
        try:
            threshold = int(fp.read().strip())
        # 'threshold.txt' contains a non-numeric, fallback to dthreshold.
        except ValueError:
            threshold = dthreshold
        fp.close()

    #print threshold

    #details = open(os.path.join(path_to_results, 'Find_SSNs.xml'), 'a')
    detailsfile.dopen(path_to_results)
    detailsfile.dput("<document>")
    results = open(os.path.join(path_to_results, 'Find_SSNs.%s' %report_type), 'a')

    # Validate contains the two functions valid_ccn and valid_ssn
    # Could be re-written in C++ and used with boost.python
    # to speed-up performance. 
    def validate(numbers, context):

        global ssnflags
        global ccnflags
        global banflags

        #details = open(os.path.join(path_to_results, 'Find_SSNs.xml'), 'a')

        #print numbers, context

        def valid_ccn(ccn):


            global ssnflags
            global ccnflags
            global banflags

            #print ccn

            ############################## Start

            # Author David Shaw (Luhn Algorithm)
            # Source http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/172845
            # Modified slightly by 16 Systems, LLC

            sumit = 0
            num_digits = len(ccn)
            oddeven = num_digits & 1

            for count in range(0, num_digits):
                digit = int(ccn[count])

                if not (( count & 1 ) ^ oddeven ):
                    digit = digit * 2
                if digit > 9:
                    digit = digit - 9

                sumit = sumit + digit

            ############################### End

            if (sumit % 10) == 0:
                # Any card with 1 digit prefix
                if ccn[0] in cs.keys():
                    for k,v in cs.iteritems():
                        if ccn[0] == k and len(ccn) in v[0]:
                            return (True, True)
                        else:
                            return (False, False)

                # Any card with 2 digit prefix
                elif ccn[:2] in cs.keys():
                    for k,v in cs.iteritems():
                        if ccn[:2] == k and len(ccn) == v[0]:
                            return (True, True)
                        else:
                            return (False, False)

                # Any card with 3 digit prefix
                elif ccn[:3] in cs.keys():
                    for k,v in cs.iteritems():
                        if ccn[:3] == k and len(ccn) == v[0]:
                            return (True, True)
                        else:
                            return (False, False)

                # Any card with 4 digit prefix
                elif ccn[:4] in cs.keys():
                    for k,v in cs.iteritems():
                        if ccn[:4] == k and len(ccn) == v[0]:
                            return (True, True)
                        else:
                            return (False, False)

                # Obviously not a CC we're looking for, but note that there are other cards out there.
                else:
                    #print ccn, "Not a credit card we're searching for."
                    return (False, False)
            else:
                return (False, False)

        # End of valid_ccn


        def valid_ssn(area_number, group_number, serial_number):

            global ssnflags
            global ccnflags
            global banflags

            if group_number == '00':
                return (False, False)

            if serial_number == '0000':
                return (False, False)

            odd_one = [1,3,5,7,9]
            even_two = range(10, 100, 2) # (10-98 even only)
            even_three = [2,4,6,8]
            odd_four = range(11, 100, 2)# (11-99 odd only)

            if area_number in ag.keys():
                for area, group in ag.iteritems():
                    # Look up area number to find max group number
                    if area_number == area:

                        # For little odds (odds between 1 and 9)
                        if int(group) in odd_one:
                            if int(group_number) in odd_one and int(group_number) <= int(group):
                                #print '%s-%s-%s valid SSN' %(area_number, group_number, serial_number)
                                return (True, True)
                            else:
                                return (False, False)

                        # For big evens (evens between 10 and 98)
                        elif int(group) in even_two:
                            ok_areas = odd_one + [a for a in even_two if a <= int(group)]
                            #print ok_areas
                            if int(group_number) in ok_areas:
                                #print '%s-%s-%s valid SSN' %(area_number, group_number, serial_number)
                                return (True, True)
                            else:
                                return (False, False)

                        # For little evens (evens between 2 and 8)
                        elif int(group) in even_three:
                            ok_areas = odd_one + even_two + [a for a in even_three if a <= int(group)]
                            #print ok_areas
                            if int(group_number) in ok_areas:
                                #print '%s-%s-%s valid SSN' %(area_number, group_number, serial_number)
                                return (True, True)
                            else:
                                return (False, False)

                        # For big odds (odds between 11 and 99)
                        elif int(group) in odd_four:
                            ok_areas = odd_one + even_two + even_three + [a for a in odd_four if a <= int(group)]
                            #print ok_areas
                            if int(group_number) in ok_areas:
                                #print '%s-%s-%s valid SSN' %(area_number, group_number, serial_number)
                                return (True, True)
                            else:
                                return (False, False)

                        else:
                            pass

            else:
                # Bad area number.
                return (False, False)

            # End of valid_ssn() function.


        for number in numbers:

            #print number

            # Remove delimiters from numbers
            clean_number = re.sub(bad, "", number)

            if clean_number not in skipped_numbers:

                #print number, clean_number

                if banflags > 0 and ((number.lower()).startswith("acct") or number.lower().startswith("account")):
                    clean_number = re.search(justnumbers,number).group(1)
                    #print number, clean_number
                    is_it = (True,True)
                    if f not in suspect:
                        if len(suspect) > 0:
                                detailsfile.dput("</file>")
                        suspect.append(f)
                        detailsfile.dput("<file><name>"+ html_escape(f) + "</name>\n<type>" + filetype + "</type>")
                        detailsfile.dput("<match>")
                        if (len(context) > 0):
                            detailsfile.dput(context)
                        detailsfile.dput("<data>"+ clean_number +"</data>")
                        detailsfile.dput("</match>")
                    else:
                        suspect.append(f)
                        detailsfile.dput("<match>")
                        if (len(context) > 0):
                                detailsfile.dput(context)
                        detailsfile.dput("<data>"+clean_number+"</data>")
                        detailsfile.dput("</match>")
                
                # Validate CCNs
                elif len(clean_number) > 9:
                    is_it = [False,False]
                    if ccnflags > 0 or ccnpattern.search(number) is not None:
                        is_it = valid_ccn(clean_number)
                    if is_it[0] == True:
                        #print clean_number
                        if f not in suspect:
                            if len(suspect) > 0:
                                detailsfile.dput("</file>")
                            suspect.append(f)
                            detailsfile.dput("<file><name>"+ html_escape(f) + "</name>\n<type>" + filetype + "</type>")
                            detailsfile.dput("<match>")
                            if (len(context) > 0):
                                detailsfile.dput(context)
                            detailsfile.dput("<data>"+ number +"</data>")
                            detailsfile.dput("</match>")
                        else:
                            suspect.append(f)
                            detailsfile.dput("<match>")
                            if (len(context) > 0):
                                detailsfile.dput(context)
                            detailsfile.dput("<data>"+number+"</data>")
                            detailsfile.dput("</match>")
                    else:
                        pass

                # Validate SSNs
                else:
                    digits = [clean_number[0:3], clean_number[3:5], clean_number[5:9]]
                    is_it = [False,False]
                    if ssnflags > 0 or ssnpattern.search(number) is not None:
                        is_it = valid_ssn(digits[0], digits[1], digits[2])
                    if is_it[0] == True:

                        if f not in suspect:
                            if len(suspect) > 0:
                                detailsfile.dput("</file>")
                            suspect.append(f)
                            detailsfile.dput("<file><name>"+ html_escape(f) + "</name>\n<type>" + filetype + "</type>")
                            detailsfile.dput("<match>")
                            if (len(context) > 0):
                                detailsfile.dput(context)
                            detailsfile.dput("<data>"+number+"</data></match>")
                        else:
                            suspect.append(f)
                            detailsfile.dput("<match>")
                            if (len(context) > 0):
                                detailsfile.dput(context)
                            detailsfile.dput("<data>"+number+"</data></match>")
                    else:
                        pass

            else:
                fp = open(os.path.join(path_to_results, '.Find_SSNs_skip.txt'), 'a')
                print >> fp, f, clean_number, "\tSkipped Number"
                fp.close()


        #details.close()

        # End of validate() function.

    def checkflags(cdata):
        global ssnflags
        global ccnflags
        global banflags
        if ssnflagwords.search(cdata) is not None:
            ssnflags = ssnflags + 25
        if ccnflagwords.search(cdata) is not None:
            ccnflags = ccnflags + 25
        if banflagwords.search(cdata) is not None:
            banflags = banflags + 1
        # end of checkflags() function




    current_count = 0
    ssnflags = 0
    ccnflags = 0
    banflags = 0
    if mode == 'gui':

        app = wx.PySimpleApp()
        app.MainLoop()

        # A progress dialog box (for Jeremy Sippel).
        dialog = wx.ProgressDialog ('Estimate is a guess.','',
                                    maximum = len(target_file_list), parent = None,
                                    style = wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME | wx.PD_ESTIMATED_TIME)


    for f in target_file_list:

        try:
            filetype = "other"
            ssnflags = 0
            ccnflags = 0
            banflags = 0
            # Determine the file type
            is_pdf = False
            is_ps = False
            is_text = False
            is_dvi = False
            is_mbx = False
            is_emlx = False
            is_doc = False
            is_xls = False
            fd = open(f, 'rb')
            # This reads '%PDF-1.' if it is present.
            header = fd.read(512)
            fd.close()
            #print header
            if header[0:7] == '%PDF-1.':
                is_pdf = True
                filetype = "pdf"
                if gv.noise:
                    print f, "is a pdf file."
            elif header[0:10] == "%!PS-Adobe":
                is_ps = True
                filetype = "ps"
                if gv.noise:
                    print f, "is a postscript file."
            elif f.endswith(".doc") or f.endswith(".DOC"):
                is_doc = True
                filetype = "doc"
                if gv.noise:
                    print f, "is a doc file."
            elif header[0:2] == "\367\002":
                is_dvi = True
                filetype = "dvi"
                if gv.noise:
                    print f, "is a DVI file"
            elif fromline.match(header[0:128]):
                is_mbx = True
                filetype = "mbox"
                if gv.noise:
                    print f, "is an mbox file."
            elif digitline.match(header[0:32]) and f.endswith(".emlx"):
                is_emlx = True
                filetype = "emlx"
                if gv.noise:
                    print f, "is an emlx file."
            elif havexlrd and (f.endswith(".xls") or f.endswith(".XLS")):
                is_xls = True
                filetype = "xls"
                if gv.noise:
                    print f, "is an xls file."
            else:
                # Is it a text file?
                numnl = header.count("\n")
                numcr = header.count("\r")
                if numcr > numnl:
                    numnl = numcr
                if numnl > len(header)/128:
                    is_text = True
                    filetype = "text"
                    if gv.noise:
                        print f, "is a text file."
                # ad hoc header skipping

            # Skip TTF's
            if ord(header[0]) == 0 and ord(header[1]) == 1 and ord(header[2]) == 0 and ord(header[3]) == 0 and ord(header[4]) == 0:
                continue

            # Skip TTC's
            if ord(header[0]) == 116 and ord(header[1]) == 116 and ord(header[2]) == 99 and ord(header[3]) == 102:
                continue
            
            # EXE's and DLL's
            if ord(header[0]) == 77 and ord(header[1]) == 90:
                continue
            
            

        except Exception, e:
            fp = open(os.path.join(path_to_results, '.Find_SSNs_exceptions.txt'), 'a')
            print >> fp, __name__, e, f, "is_pdf"
            fp.close()

        #print f

        current_count = current_count + 1

        if mode == 'gui':

            # A progress dialog box (for Jeremy Sippel).
            dialog.Update(current_count, 'Searching file %s (%s of %s)' %(f, current_count, len(target_file_list)))

        else:

            print "Searching file %s of %s" %(current_count, len(target_file_list))


        # Handle zip archives (zip, OOo XML and MS Office XML)
        if zipfile.is_zipfile(f):
            filetype = "zip"
            try:
                x = zipfile.ZipFile(f)
                zfiles = x.namelist()
                for zf in zfiles:
                    ssnflags = 0
                    ccnflags = 0
                    data = x.read(zf)
                    if data:
                        checkflags(data)
                        search = reg_exp.findall(data)
                        if search:
                            #print search
                            validate(search, "<subfile>" + html_escape(zf) + "</subfile>")
                x.close()

            except Exception, e:
                fp = open(os.path.join(path_to_results, '.Find_SSNs_exceptions.txt'), 'a')
                print >> fp, __name__, e, f, "is_zipfile"
                fp.close()


        # Option to handle PDF files. Off by default.
        #
        # You must have pdftotext installed and working on your system before enabling this feature.
        # pdftotext is a part of the xpdf project. It is free software. It may be downloaded here:
        # http://www.foolabs.com/xpdf/download.html
        #
        # To enable pdf searching, uncomment the following section:

        elif is_pdf or is_ps or is_doc:
            try:
                #print f, "is a pdf file."

                q = quote(f)
                if is_pdf:
                    sfd = os.popen("pdftotext  -layout %s -" %q)
                if is_doc:
                    sfd = os.popen("antiword %s" %q)
                else:
                    sfd = os.popen("ps2ascii %s" %q)
                while 1:
                    data = sfd.readline(256)
                    if data:
                        checkflags(data)
                        search = reg_exp.findall(data)
                        if search:
                            #print search
                            ctxt = "<context>" + html_escape(data.rstrip("\n\r")) + "</context>"
                            validate(search, ctxt)
                    else:
                        break

            except Exception, e:
                print __name__, e, f
                fp = open(os.path.join(path_to_results, '.Find_SSNs_exceptions.txt'), 'a')
                print >> fp, __name__, e, f
                fp.close()


        elif is_mbx:
        # Handle mbox files.

            in_body = True
            try:
                target_file = open(f, 'rU')
                boundaryStrings = []
                last_data = "\n"
                while 1:
                    data = target_file.readline(256)
                    if data:
                        bmatch = boundary.search(data)
                        if bmatch:
                            if gv.noise:
                                print "Found boundary", bmatch.group(1)
                            boundaryStrings.append(bmatch.group(1).replace('\"',''))

                        if (fromline.match(data) and last_data == "\n") or eudorafromline.match(data):
                            in_body = False
                            subject = ""
                            boundaryStrings = []
                            ssnflags = 0;
                            ccnflags = 0;
                            banflags = 0;
                            if gv.noise:
                                print "In header", data
                        if in_body:
                          notBoundary = True
                          for boundaryStr in boundaryStrings:
                              if data.find(boundaryStr) >= 0:
                                  notBoundary = False
                          if notBoundary:
                            checkflags(data)
                            search = reg_exp.findall(data)
                            if search:
                                ctxt = "<context>" + html_escape(data.rstrip("\n\r")) + "</context>" + "\n" + "<subject>" + html_escape(subject.rstrip("\n\r")) + "</subject>"
                                if gv.noise:
                                    print "MATCH", data
                                validate(search, ctxt)
                        else:
                            if subjline.match(data):
                                subject = data
                            if data == "\n":
                                in_body = True
                                if gv.noise:
                                    print "In body"

                    else:
                        break
                    last_data = data
                target_file.close()

            except Exception, e:
                fp = open(os.path.join(path_to_results, '.Find_SSNs_exceptions.txt'), 'a')
                print >> fp, __name__, e, f, "mbox_files"
                fp.close()

        elif is_emlx:
        # Handle emlx files.

            in_body = False
            subject = ""
            boundaryString= ""
            datalen = 0
            try:
                target_file = open(f, 'rU')
                sizeline = target_file.readline(64)
                size = int(sizeline)
                while 1:
                    last_data = "\n"
                    data = target_file.readline(256)
                    if data:
                        if in_body:
                          if boundaryString == "" or data.find(boundaryString) == -1:
                            checkflags(data)
                            search = reg_exp.findall(data)
                            if search:
                                ctxt = "<context>" + html_escape(data.rstrip("\n\r")) + "</context>" + "\n" + "<subject>" + html_escape(subject.rstrip("\n\r")) + "</subject>"
                                validate(search, ctxt)
                        else:
                            if subjline.match(data):
                                subject = data
                            bmatch = boundary.search(data)
                            if bmatch:
                                if gv.noise:
                                    print "H Found boundary", bmatch.group(1)
                                boundaryString = bmatch.group(1).replace('\"','')
                            if data == "\n":
                                in_body = True
                        datalen += len(data)
                        if datalen >= size:
                            break
                    else:
                        break
                    last_data = data
                target_file.close()

            except Exception, e:
                fp = open(os.path.join(path_to_results, '.Find_SSNs_exceptions.txt'), 'a')
                print >> fp, __name__, e, f, "emlx_files"
                fp.close()

        elif is_xls:
        # Handle XLS files.
            try:
                xlsbook = open_workbook(f)
                for sheet in xlsbook.sheets():
                    for row in range(sheet.nrows):
                        rowstr = ""
                        for col in range(sheet.ncols):
                            colvalue = sheet.cell(row,col).value
                            if isinstance(colvalue, float):
                                if int(colvalue) == colvalue:
                                    colvalue = int(colvalue)
                            rowstr = rowstr + " " + xstr(colvalue)
                            checkflags(rowstr)
                            search = reg_exp.findall(xstr(colvalue))
                            if search:
                                ctxt = "<context>" + html_escape(rowstr) + "</context>" 
                                validate(search, ctxt)
            except Exception, e:
                fp = open(os.path.join(path_to_results, '.Find_SSNs_exceptions.txt'), 'a')
                print >> fp, __name__, e, f, "xls_files"
                fp.close()


        elif is_text:
        # Handle text files.
            try:
                target_file = open(f, 'rU')
                while 1:
                    data = target_file.readline(256)
                    if data:
                        checkflags(data)
                        search = reg_exp.findall(data)
                        if search:
                            #print search
                            ctxt = "<context>" + html_escape(data.rstrip("\n\r")) + "</context>" 
                            validate(search, ctxt)
                    else:
                        break
                target_file.close()

            except Exception, e:
                fp = open(os.path.join(path_to_results, '.Find_SSNs_exceptions.txt'), 'a')
                print >> fp, __name__, e, f, "text_files"
                fp.close()


        # Handle most other files.
        else:

            try:
                target_file = open(f, 'rb')
                while 1:
                    data = target_file.read(256)
                    if data:
                        checkflags(data)
                        search = reg_exp.findall(data)
                        if search:
                            #print search
                            validate(search, "")
                    else:
                        break
                target_file.close()

            except Exception, e:
                fp = open(os.path.join(path_to_results, '.Find_SSNs_exceptions.txt'), 'a')
                print >> fp, __name__, e, f, "most_other_files"
                fp.close()

    if mode == 'gui':
        dialog.Destroy()

    uniqueSet = list(set(suspect))
    suspect_numbers = 0
    for item in uniqueSet:
        if suspect.count(item) >= threshold:
            suspect_numbers += suspect.count(item)

    #details = open(os.path.join(path_to_results, 'Find_SSNs.xml'), 'a')
    if suspect_numbers > 0:
        detailsfile.dput("</file>")
    detailsfile.dput("</document>")
    detailsfile.dclose()
    # Write Report

    if report_type == 'html':

        if suspect_numbers == 0:
            print>> results, '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">'
            print>> results, "<html>"
            print>> results, "<head>"
            print>> results, "<title>'Find_SSNs %s'</title>" %(version.numb)
            print>> results, "</head>"
            print>> results, "<body>"
            print>> results, "<h1>Find_SSNs %s</h1>" %(version.numb)
            print>> results, "Host ", version.host, " | "
            print>> results, version.ip, " | "
            print>> results, tt_time, " | "
            print>> results, "Total Suspect Numbers ", suspect_numbers, " | "
            print>> results, "Threshold ", threshold, " | "
            print>> results, "Max File Size ", ms, " | "
            print>> results, "Search For ", search_for_value, "<br><br>"
            print>> results, "Search Path ", search_path, "<br><br>"
            print>> results, "<p style='color:blue'>No Sensitive Numbers Were Found During This Scan</p>"
            print>> results, "<br>End of report"
            print>> results, "</body>"
            print>> results, "</html>"
        else:
            print>> results, '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">'
            print>> results, "<html>"
            print>> results, "<head>"
            print>> results, "<title>'Find_SSNs %s'</title>" %(version.numb)
            print>> results, "<style type='text/css'>"
            print>> results, "h3:target {color: white; background: #FF0000;}"
            print>> results, "</style>"
            print>> results, '<script src="http://www.security.vt.edu/Find_SSNs/sorttable.js"></script>'
            print>> results, "</head>"
            print>> results, "<body>"
            print>> results, "<h1>Find_SSNs %s</h1>" %(version.numb)
            print>> results, "Host ", version.host, " | "
            print>> results, version.ip, " | "
            print>> results, tt_time, " | "
            print>> results, "Total Suspect Numbers ", suspect_numbers, " | "
            print>> results, "Threshold ", threshold, " | "
            print>> results, "Max File Size ", ms, " | "
            print>> results, "Search For ", search_for_value, "<br><br>"
            print>> results, "Search Path ", search_path, "<br><br>"
            print>> results, "<table border='1' summary='results' class='sortable'>"
            print>> results, "<tr>"
            print>> results, "<td align='center' bgcolor='cccccc' style='cursor:crosshair'>Suspect Number Count</td>"
            print>> results, "<td align='center' bgcolor='cccccc' style='cursor:crosshair'>File Extension</td>"
            print>> results, "<td align='center' bgcolor='cccccc' style='cursor:crosshair'>File Path</td>"
            print>> results, "</tr>"
            for item in uniqueSet:
                if suspect.count(item) >= threshold:
                    #print item
                    print>> results, "<tr><td align='center'>%s</td>" %(suspect.count(item))
                    print>> results, "<td align='center'>%s</td>" %os.path.splitext(item)[1]
                    print>> results, '<td><a href="file:///%s" title="Open the file">%s</a></td></tr>' %(item, item)
            print>> results, "</table>"
            print>> results, "<br>End of report"
            print>> results, "</body>"
            print>> results, "</html>"

    if report_type == 'csv':
        if suspect_numbers == 0:
            results.write("#,Ext,Path,Host,IP,Date,Total,Threshold,Maxsize,Search_For,Search_Path\n")
            results.write("NO,DATA,FOUND,%s,%s,%s,%s,%s,%s,%s,%s\n" %(version.host,
                                                                      version.ip, 
                                                                      tt_time,
                                                                      suspect_numbers,
                                                                      threshold,
                                                                      ms,
                                                                      search_for_value,
                                                                      search_path))
        else:
            results.write("#,Ext,Path,Host,IP,Date,Total,Threshold,Maxsize,Search_For,Search_Path\n")
           
            for item in uniqueSet:
                if suspect.count(item) >= threshold:
                    #print item
                    results.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" %(suspect.count(item),
                                                                      os.path.splitext(item)[1],
                                                                      item,
                                                                      version.host,
                                                                      version.ip,   
                                                                      tt_time,
                                                                      suspect_numbers,
                                                                      threshold,
                                                                      ms,
                                                                      search_for_value,
                                                                      search_path))



                    
    results.close()
    
html_escape_table = {
    "&": "&amp;",
    '"': "&quot;",
    "'": "&apos;",
    ">": "&gt;",
    "<": "&lt;",
    }

def html_escape(text):
            L=[]
            for c in text:
                b = ord(c)
                if b<32 or b>127:
                    L.append(".")
                else:
                    L.append(html_escape_table.get(c,c))
            return "".join(L)

                        
