The other day I needed a recursive glob to find all the *.py files in my home directory. Much to my amazement Python doesn’t have one built into the glob module. So, I built my own and decided to share it for anyone else who might need it. Fork it here and install it with easy_install off PyPi.
""" Recursive Glob Module Methods: rglob(base, pattern) rglob_(pattern) lcount(base, pattern, func=lambda x : True) """ import glob import os def _getDirs(base): return [x for x in glob.iglob(os.path.join( base, '*')) if os.path.isdir(x) ] def _count(files, func): lines = 0 for f in files: lines += sum([1 for l in open(f) if func(l)]) return lines def rglob(base, pattern): """ Recursive glob starting in specified directory """ flist = [] flist.extend(glob.glob(os.path.join(base,pattern))) dirs = _getDirs(base) if len(dirs): for d in dirs: flist.extend(rglob(os.path.join(base,d), pattern)) return flist def rglob_(pattern): """ Performs a recursive glob in the current working directory """ return rglob(os.getcwd(), pattern) def lcount(base, pattern, func = lambda x : True): """ Counts the number of lines in each file found matching pattern. Params: base - root directory to start the search pattern - pattern for glob to match (i.e '*.py') func - boolean filter function example: lambda x : True if len(x.strip()) else False #filter empty lines default: lambda x : True """ allFiles = rglob(base, pattern) return _count(allFiles, func) if __name__ == "__main__": #filter out empty lines and comments filterFunc = lambda x : True if (len(x.strip()) and x.strip()[0] != '#') else False pyFiles = rglob(os.path.dirname(__file__), "*.py") print " {} total lines".format(_count(pyFiles, filterFunc)) pyFiles_ = rglob_("*.py") print " {} total lines".format(_count(pyFiles_, filterFunc)) print " {} total lines".format(lcount(os.path.dirname(__file__), "*.py", filterFunc))
It works by starting off in a base directory where it uses a generator expression to iterate through a glob iterator that matches any subdirectory. It then goes through these subdirectories one at a time and performs a glob pattern match on the contents of said directory. This is repeated recursively until all subdirectories off the base are inspected.