Hi,
I created a Python script using pyPDF that automatically merges all the PDFs in a folder and puts them into an output folder and rename them automatically as per the folder's name.
What I want to do now is for the script to search for sub-directories, process all the PDF files in them and create an output in the sub-directory and give it the same name as the sub=dir.
I have been trying to use the os.path.walk() function but I can't get the hang of it or understand it.
This is my current code:
#----------------------------------------------------------------------------------------------
# Name: pdfMerger
# Purpose: Automatic merging of all PDF files in a directory and its sub-directories and
# rename them according to the folder itself. Requires the pyPDF Module
#
# Current: Processes all the PDF files in the current directory
# To-Do: Process the sub-directories.
#
# Version: 1.0
# Author: Brian Livori
#
# Created: 03/08/2011
# Copyright: (c) Brian Livori 2011
# Licence: Open-Source
#---------------------------------------------------------------------------------------------
#!/usr/bin/env python
import os
import glob
import sys
from pyPdf import PdfFileReader, PdfFileWriter
output = PdfFileWriter()
path = str(os.getcwd())
x = 0
for infile in glob.glob( os.path.join(path, '*.pdf') ):
for (path, dirs, files) in os.walk(path, topdown=True):
for files in dirs:
i = 0
print "Merging " + infile
pdf = PdfFileReader(file( infile, "rb"))
x = pdf.getNumPages()
while (i != x):
output.addPage(pdf.getPage(i))
print "Merging page: " + str(i+1) + "/" + str(x)
i += 1
output_dir = "\Output\\"
ext = ".pdf"
dir = os.path.basename(path)
outputpath = str(os.getcwd()) + output_dir
final_output = outputpath
if os.path.exists(final_output) != True:
os.mkdir(final_output)
outputStream = file(final_output + dir + ext, "wb")
output.write(outputStream)
outputStream.close()
else:
outputStream = file(final_output + dir + ext, "wb")
output.write(outputStream)
outputStream.close()