Directory scanning program

ryuurei 3 Tallied Votes 449 Views Share

I had been working on this program for a while. It was inspired by the slowness of Windows' search feature. I made it so I could simply look for files to see if/where they existed quickly.

I eventually added in the feature to delete files. It was no major extension off of the previous code, but I figured it had some place in there, perhaps. It makes work quick, in any case.

I'm new here, so this is just one of my little contributions to set me on my way. Hope some of you guys like it.

'''A program created by Travis Mullaly (A.K.A. _ryuurei) to search through the
files/directories in and following a specified directory path for a list of
user-defined keywords. Works in file names as well as file content.

!!! WARNING: Searching through file-content can be dangerous as it requires a
great amount of power. It is recommended that this feature only be used in
directories that contain very few files and sub-directories.

Completed on: 29, June 2010.
Creator: Travis Mullaly
Contact: travis_m.cpp@live.com
'''

import os
import sys
from time import sleep

def perror(location):
    '''A function to print a text-based error message.'''
    print '-'*83
    print ' Access to {0} not allowed. '.format(location).center(83, '#')
    print '-'*83
    sleep(3)

def scan_dirs(location, queries, action='N', limit=False):
    '''A function to search through file names/content.'''
    try: files = os.listdir(os.curdir)
    except:
        perror(location)
        print '\nProgram could not continue at this point, and will now close.\n'
        sys.exit()
        
    newLocation = location

    # If a search in filenames or both was instructed, compare the search
    # queries with the names of the files.
    if action in ['N', 'n', 'B', 'b']:
        for item in files:
            for search in queries:
                if search in item.lower() and not item.endswith('~')\
                   and not os.path.isdir(item):
                    print '\nQuery found:\t{0}'.format(search)
                    print 'In filename:\t{0}'.format(item)
                    print 'In directory . . .\n{0}\n'.format(newLocation)

    # If a search in file content or both was instructed, compare the
    # queries with the content of files.
    if action in ['C', 'c', 'B', 'b']:
        for item in files:
            if not os.path.isdir(item):
                for search in queries:
                    if search in open(item, 'r').read():
                        print '\nQuery found:\t{0}'.format(search)
                        print 'In content of:\t{0}'.format(item)
                        print 'In directory . . .\n{0}\n'.format(newLocation)

    # If directed to delete files, search through files (avoiding folders)
    # and delete them if any of the quiries specified were found in the name.
    if action == 'D' or action == 'd':
        for item in files:
            if not os.path.isdir(item):
                for search in queries:
                    if search in item.lower():
                        try: os.remove(item)
                        except: perror(item)
                        else:
                            print '\nDeleting:\t{0}'.format(item)
                            print 'From . . .\n{0}\n'.format(newLocation)

    # If not limit is put on the search, cycle through the items in the current
    # directory again and enter the next folder. This occurs recursively.
    if limit == False:
        for item in files:
            newLocation = location + os.sep + item
            
            if os.path.isdir(newLocation):
                try: os.chdir(newLocation)
                except:
                    perror(newLocation)
                    continue
                
                try: os.listdir(os.curdir)
                except:
                    perror(newLocation)
                    continue
                
                scan_dirs(newLocation, queries, action)
                os.chdir('../')
    else: return

###############################################################################
############################# MAIN FUNCTION ###################################
###############################################################################

if __name__ == '__main__':
    queries = []

    # Have the user enter the directory to begin the search.
    while True:
        print '\nFile paths must be entered in full.'
        try: location = raw_input('Enter the directory path to begin search.\n--> ')
        except: continue
        if location == '.': continue
        
        try: os.chdir(location)
        except: continue
        else: break

    print '\nEnter keywords to search for. Enter nothing to stop query input.'
    print 'You may choose to enter file extensions if you wish.'
    
    # Have the user enter a list of keywords to search for.
    # This is not heavily monitored, as it is up to the user to be aware of
    # the potential drawbacks of certain search queries.
    while True:
        try: lookFor = raw_input(' - ').lower()
        except: continue
        
        if not len(lookFor) == 0: queries.append(lookFor)
        else:
            print '- Done taking keyword input -'
            break

    # Have the user decide whether they would like the search to occur
    # in just files names, the file's content, or both.
    while True:
        print 'Please choose which action you would like to take.'
        print 'Note that selecting "B" or "b" will not invoke the delete feature.'
        
        try: decision = raw_input('\nFile Names(N) File Content(C) Names and Content(B) Delete(D): ')
        except: continue

        if decision in ['N', 'n', 'C', 'c', 'B', 'b', 'D', 'd']:
            if decision in ['C', 'c', 'B', 'b']:
                print '\nSearching file content is a very tedious and resource consuming task.'
                print 'It could even cause the program to crash if run through a large directory.'

                try: yn = raw_input('Are you sure you want to continue? (Y/n): ')
                except: continue
                if yn in ['N', 'n']: continue
                elif yn not in ['Y', 'y']:
                    print 'Improper input.\nPlease start again.'
                    continue
                
            if decision == 'D' or decision == 'd':
                print 'The program will now proceed to delete files with the specified names.'
                try: yn = raw_input('Do you really want to continue? (Y/n): ')
                except: continue
                else:
                    if yn == 'N' or yn == 'n': continue
                    elif yn not in ['Y', 'y']:
                        print 'Improper input.\nPlease start again.'
                        continue

            while True:
                limit = False
                try: lim = raw_input('Would you like to limit the search to only the directory specified? (Y/n): ')
                except: continue
                else:
                    if lim in ['Y', 'y']: limit = True
                    elif lim in ['N', 'n']: limit = False
                    else: continue
                    break 
                
            print '\nBeginnning search.\n' + '-'*83
            break
        else: print 'Input not correct.\nTry again.\n\n'

    scan_dirs(location, queries, decision, limit)

    print '\n' + '-'*83 + '\nSearch Complete.\n\n'
Gribouillis 1,391 Programming Explorer Team Colleague

Nice work. You may be interested in the python program grin http://pypi.python.org/pypi/grin, which is multiplatform, but produces a result similar to unix' grep command. A nice feature of grin is that it uses python regular expressions in the query. Also its source code may contain ideas to improve your program.

Gribouillis 1,391 Programming Explorer Team Colleague

Your program gave me the idea of a small app to search for patterns in a hierarchy of files. This little script prompts you for a regex, a directory, and sends grin's results to the web browser ! :)

#!/usr/bin/env python
"""
 script gribougrin.py (python 2)
 
    sketchy grep-like script to find a pattern in a collection of
    files, with output to the web browser.
    
    Usage:
        When started, the script opens a dialog to read a python regular
        expression from the user and a dialog to choose a directory. It
        then searches occurrences of the pattern in the file hierarchy
        based at this directory. The output is sent to the web browser
        with links to the files where the pattern was found.
        
    Install:
        you need to install the modules
            easygui : http://easygui.sourceforge.net/
            grin : http://pypi.python.org/pypi/grin
        copy gribougrin.py to a place where you store executable files.
    
    Licence:
        public domain
        
    Written by Gribouillis for the python forum at www.daniweb.com
    August, 3, 2010
    
"""

from easygui import*
import re, sys, os
import grin
from grin import get_grin_arg_parser, COLOR_TERMS, get_regex, GrepText, gzip, get_filenames

html_escape_table = {
    "&": "&",
    '"': """,
    "'": "'",
    ">": ">",
    "<": "&lt;",
    }

def html_escape(text):
    """Produce entities within text."""
    return "".join(html_escape_table.get(c,c) for c in text)

TEMPFILE = os.path.join(os.environ['HOME'],"gribougrin.html")

pattern = enterbox(msg='Enter a python regular expression.', title='gribougrin', default='', strip=True, image=None, root=None)

rep = diropenbox(msg="search directory ?", title="gribougrin", default=None)

def grin_reports(argv):
    parser = get_grin_arg_parser()
    args = parser.parse_args(argv[1:])
    if args.context is not None:
        args.before_context = args.context
        args.after_context = args.context
    args.use_color = False
    regex = get_regex(args)
    g = GrepText(regex, args)
    openers = dict(text=open, gzip=gzip.open)
    for filename, kind in get_filenames(args):
        report = g.grep_a_file(filename, opener=openers[kind])
        if report:
            yield report.split('\n')
            
def html_report(argv):
    L = ["<html><head><title>Gribougrin<title></head>", "<body><H3>gribougrin search \"%s\" in %s</H3>" % (
    html_escape(pattern), rep)]
    for report in grin_reports(argv):
        s = """
        <a href=%(file)s>%(file)s</a><pre>%(lines)s</pre>
        """ % dict(file = report[0][:-1], lines = html_escape("\n".join(x for x in report[1:] if x)))
        L.append(s.strip())
    L.append("</body></html>")
    return "\n".join(L)

res = html_report(["grin", pattern, rep])
with open(TEMPFILE, "w") as out:
    out.write(res)
import webbrowser
webbrowser.open(TEMPFILE)
ryuurei 0 Newbie Poster

I did a bit of a touchup to the program I posted above. This new version does basically the same thing. I did however take out the ability to search file content and delete files (they could easily be added back on).
This version is a bit cleaner, a little shorter, easier to read, and hopefully a little easier to expand upon. It is also all called right from the command line, with no interaction during execution.

'''A program that can be used to quickly resolve whether or not a file exists
on the machine in a specifc area and or onwards.

Completed on: July 31, 2010.
Creator: _ryuurei (Travis Mullaly)
Contact: travis_m.cpp@live.com
'''

import os
import sys
from time import sleep

# Establish an error message to facilitate accurate use of the program.
USAGEMSG = 'To properly run this program, please execute it as such:\n\
python {0} [full directory path] [query1,query2,query3] [limit]\n\
An example is: python {0} /usr/bin python,src,lib,doc true\n\
\n\
query:\n\
A list of keywords to search for. Seperated only by one comma each.\n\
\n\
limit:\n\
Must be entered as the word "true" or the word "false". \n\
Denotes whether the search should be contained to the current \n\
directory (true) or not (false).'\
.format(sys.argv[0])

def perror(location):
    '''A function to print a text-based error message.'''
    print('-'*72)
    print(' Access to {0} not allowed. '.format(location).center(72, '#'))
    print('-'*72)
    sleep(1.5)

def print_result(queryTerm, fileName, directory):
    '''Print a message when a query term is found.'''
    print('\nQuery found:\t{0}'.format(queryTerm))
    print('In filename of\t{0}'.format(fileName))   
    print('In Directory . . .\n{0}\n'.format(directory))

def operation(fileList, queries, directory):
    '''Perform a search through the items in the current directory.'''
    for item in fileList:
        if os.path.isfile(item):
            for term in queries:
                if term in item.lower() and not item.endswith('~'):
                    print_result(term, item, directory)

def scan(location, queries, limit=False):
    '''Launches position-based recursion search algorithms.'''

    # If the program cannot obtain a list of the current directorie's items,
    # it cannot operate, and thus ceases functioning.
    try: files = os.listdir(os.curdir)
    except:
        perror(location)
        print('\nProgram cannot continue due to initialization error.\n')
        sys.exit()

    operation(files, queries, location)

    # When appropriate, move to the next sub-directory.
    if not limit:
        for item in files:
            newLocation = location + os.sep + item

            if os.path.isdir(newLocation):
                try: os.chdir(newLocation)
                except:
                    perror(newLocation)
                    continue

                try: os.listdir(os.curdir)
                except:
                    perror(newLocation)
                    continue

                scan(newLocation, queries)
                
    else: return

def main():
    '''The main function processes the program arguments and begins searching.'''
    if len(sys.argv) != 4:
        print('You did not enter the correct number of specifications.')
        print(USAGEMSG)
        sys.exit()

    if sys.argv[3].lower() == 'true':
        limit = True
    elif sys.argv[3].lower() == 'false':
        limit = False
    else:
        print('There was an error in your last specification.')
        print('The value you assign limit must be true or false.')
        print(USAGEMSG)
        sys.exit()

    queries = sys.argv[2].split(',')
    for i in range(len(queries)):
        queries[i] = queries[i].lower()

    try: os.chdir(sys.argv[1])
    except:
        print('There was an error in the way your directory path was specified.')
        print('Please remember to enter the path in full for most reliable performance.')
        print(USAGEMSG)
        sys.exit()

    print('\nBeginning Search.' + '\n' + '-'*72)

    scan(sys.argv[1], queries, limit)

    print('-'*72 + '\nSearch Complete.\n')

if __name__ == '__main__':
    main()
tls-005 0 Newbie Poster

Fast directory scanning is really a tough issue ... once you want to gain the last percents....

I would suggest you to look at Artima's post on threads and further down the line the usage of generators !
See Dave Beazley's mondo computer blog on those...

Cool piece of code though !

\T,

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.