# Copyright (c) 2004 - 2008 16 Systems, LLC
# Copyright (c) 2009 Virginia Polytechnic Institute and State University
# All rights reserved.
#
# License & Redistribution Information
# http://www.security.vt.edu/Find_SSNs/find_ssns_license.html


import os
import os.path


def files(search_path, ext_skip_list, file_skip_list, folder_skip_list, path_skip_list, path_to_results, mode, maxsize=1000000000):

    if mode == 'gui':
        import wx

    # Allow users to set maxsize of files to be searched.
    # Set dmaxsize (default maxsize) to equal 1GB
    dmaxsize = maxsize

    if os.path.isfile('maxsize.txt'):
        fp = open('maxsize.txt')
        # Change maxsize to user specified number *IF* it's bigger than the default maxsize.
        try:
            usersize = int(fp.read().strip())
            if usersize > dmaxsize:
                maxsize = usersize
            else:
                maxsize = dmaxsize
        # 'maxsize.txt' contains non-numeric text, fallback to dmaxsize.
        except ValueError:
            maxsize = dmaxsize
        fp.close()

    #print maxsize

    target_file_list = []

    if mode == 'gui':
        pass
    else:
        print "Building file list... please wait."

    sk = open(os.path.join(path_to_results, '.Find_SSNs_skip.txt'), 'a')

    for root, dirs, files in os.walk(search_path):

        # Remove dirs from search.

        for d in dirs[:]:
            try:
                if d.lower() in folder_skip_list:
                    dirs.remove(d)
                    print >> sk, os.path.join(root, d), "\tParticular Folder"
                if os.path.join(root,d).lower() in path_skip_list:
                    dirs.remove(d)
                    print >> sk, os.path.join(root, d), "\tParticular Folder in path list"
            except Exception, e:
                er = open(os.path.join(path_to_results, '.Find_SSNs_exceptions.txt'), 'a')
                print >> er, __name__, e, "while attempting to remove folders from the search"
                er.close()

        # Remove files from search.

        for f in files:

            try:
                file_size = os.stat(os.path.join(root,f))[6]
                ext = os.path.splitext(f)

                # Skip files larger than maxsize
                if file_size > maxsize:
                    print >> sk, os.path.join(root,f), "\tToo Large"

                # Skip files smaller than 48 bytes
                elif file_size < 48:
                    print >> sk, os.path.join(root,f), "\tToo Small"

                # Skip Symbolic Links (only works on POSIX systems, but does not harm Windows)
                # Requested by Luke Ward.
                elif os.path.islink(os.path.join(root,f)):
                    print >> sk, os.path.join(root,f), "\tSymbolic Link"

                # Skip particular files
                elif f.lower() in file_skip_list:
                    print >> sk, os.path.join(root,f), "\tParticular File"

                # Skip files that have certain extensions
                elif ext[1].lower() in ext_skip_list:
                    print >> sk, os.path.join(root,f), "\tExtension Skipped"

                # Check the path skip list
                elif os.path.join(root,f).lower() in path_skip_list:
                    print >> sk, os.path.join(root,f), "\tParticular Path"
                    
                # Scan all other files
                else:
                    target_file_list.append(os.path.join(root,f))

            except Exception, e:
                er = open(os.path.join(path_to_results, '.Find_SSNs_exceptions.txt'), 'a')
                print >> er, __name__, e, "while attempting to remove files from the search"
                er.close()

    sk.close()

    return target_file_list, maxsize
