Below is a working program that goes through the xml document and outputs all of the data to an output file. That part is fine, but what I need help with is that the tagname Comments: in the xml document will have the word Patch* in it. I need to search through the xml document and only pull the information where it finds a the word Patch in it. Any ideas?
#!/usr/bin/env python
from xml.dom import minidom
from xml.dom.minidom import Document
import re
import sys
import time
class logparser:
"Log parser"
def __init__(self):
self.usermap = {}
self.modules = []
try:
fuserin = open('C:/CVSAutoMailerPY/userMap.txt',"r")
for line in fuserin:
usermapping = line.split(":",1)
p = re.compile('[a-zA-Z.]*\S')
userid = p.match(usermapping[0])
username = p.match(usermapping[1])
self.usermap[userid.group()]=username.group()
fuserin.close()
except IOError:
#Cannot proceed in this case. Write an error log and exit.
ferrout = open('C:/CVSAutoMailerPY/errorlog.txt',"a")
now = time.localtime(time.time())
ferrout.write("PARSER ERRORLOG ("+time.asctime(now)+"):Message - User Map file could not be found\n")
ferrout.flush()
ferrout.close()
fuserin.cloase()
sys.exit()
try:
fmodin = open('C:/CVSAutoMailerPY/modules.txt',"r")
for module in fmodin:
self.modules.append(module)
fmodin.close()
except IOError:
#Cannot proceed in this case. Write an error log and exit.
ferrout = open('C:/CVSAutoMailerPY/errorlog.txt',"a")
now = time.localtime(time.time())
ferrout.write("PARSER ERRORLOG ("+time.asctime(now)+"):Message - Module information file could not be found\n")
ferrout.flush()
ferrout.close()
fmodin.close()
sys.exit()
def parseLog(self):
#we have to populate a module map but for now....
for module in self.modules:
try:
modname = self.normalizeModuleName(module)
fin = "C:/CVSAutoMailerPY/logDataForum2.1.OLD"
fmsgout = "C:/CVSAutoMailerPY/CVSTest/test.txt"
ftest = open(fin)
fmsg = open(fmsgout,"w")
doc = minidom.parse(fin)
#msgHeader = self.getMessageHeader(modname)
#msgFooter = self.getMessageFooter(modname)
#fmsg.write(msgHeader)
# Unwanted but mandatory overhead to detect maliciou activities
loghead = doc.getElementsByTagName("CommitLog")
if loghead[0].hasChildNodes():
children = loghead[0].childNodes
for child in children:
if child.nodeType == 1:
if self.usermap.has_key(child.nodeName):
pass
else:
self.usermap[child.nodeName] = child.nodeName+" (user unknown)"
for user in self.usermap:
userEntry = doc.getElementsByTagName(user)
if userEntry:
userent = userEntry[0]
#print "Populating events performed by "+user
message = ""
username = ""
if self.usermap.has_key(user):
username = self.usermap[user]
else:
username = "Unknown User: "+user
fmsg.write("*** Changes by: "+username+" ***\n\n")
commitOp = userent.getElementsByTagName("CommitOperation")
times = "2"
old = None
oldmessage = ""
files = ""
hfg = "false"
if commitOp:
for ops in commitOp:
if old == None:
old = ops
newmessage = self.normalizeMessage(ops.getAttribute("message"))
#Populate file names(s) in msg
if oldmessage == newmessage:
file =ops.getAttribute("file")
path = ops.getAttribute("path")
normfilenames = self.normalizeFileName(file)
for file in normfilenames:
files+="\tFileName:\t"+path+"/"+file+"\n"
fmsg.write(files)
files=""
#fmsg.write("\t ** Grouped ** \n")
else:
if oldmessage == "":
pass
else:
fmsg.write("\tComments:\t" + oldmessage)
fmsg.write("\n\n")
file = ops.getAttribute("file")
path = ops.getAttribute("path")
normfilenames = self.normalizeFileName(file)
for file in normfilenames:
files+="\tFileName:\t"+path+"/"+file+"\n"
fmsg.write(files)
files=""
oldmessage = newmessage
old = ops
fmsg.write("\tComments:\t" + oldmessage)
fmsg.write("\n\n")
else:
ferrout = open('C:/CVSAutoMailerPY/errorlog.txt',"a")
now = time.localtime(time.time())
ferrout.write("PARSER ERRORLOG ("+time.asctime(now)+"):Message - No Commit operation found for this user entry. Atleast one operation expected.\n")
ferrout.flush()
ferrout.close()
fmodin.close()
#fmsg.write(msgFooter)
fmsg.close()
except IOError:
pass
def getMessageHeader(self,modname):
msgHeader = "From: CVSADMIN\n"
msgHeader+= "To: CVSNT\n"
msgHeader+= "Subject: CVS "+ modname + " Update Notification\n\n"
msgHeader+= "DO NOT Reply to this mail. This is an automatic notification of the CVS Updates.\n\n"
return msgHeader
def getMessageFooter(self,modname):
msgFooter = "Please update your local working copies.\n"
msgFooter+= "Thanks.\n"
msgFooter+= "CVS Admin\n"
msgFooter+= "Fischer International"
return msgFooter
def normalizeModuleName(self,module):
mod = module.strip("\n")
return mod
def normalizeMessage(self,message):
#reg = re.compile(r'[\s][\s]')
if message == "\"":
return "*** No Comments Added by the user ***"
else:
normmsg = message.strip("\"")
normmsg = normmsg.replace("\\","\"")
return normmsg
def normalizeFileName(self,filename):
reg = re.compile(r'[\\]+[ ]+')
filelist = []
newfilelist = []
intmFN = reg.search(filename)
if intmFN!=None:
filename = filename.replace("\\\\ ","%")
filelist = filename.split();
else:
filelist.append(filename)
if len(filelist) == 1:
spacedfiles = []
temp = filelist[0]
spacedfiles = temp.split()
if len(spacedfiles) == 1:
return filelist
else:
return spacedfiles
else:
for file in filelist:
makeorg = re.compile(r'[%]+')
found = makeorg.search(file)
if found!=None:
newfile = file.replace("%"," ");
newfilelist.append(newfile)
else:
newfilelist.append(file)
return newfilelist
lp = logparser()
lp.parseLog()
Here is a sample XML Doc
<?xml version="1.0" ?><CommitLog><jnd><CommitOperation file="tick.gif" message="new image for indicating a member already exists in a policy"" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/WebFiles/images"/><CommitOperation file="ProvDBObject.java" message="Fix for issue #5562- , #5560"" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/provisioning"/><CommitOperation file="ProvisioningCallbackService.java" message="Use the stage-identifier to identify that there is no source workflow associated with this request"" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/provisioning"/><CommitOperation file="ProvisioningWorkflowRunner.java" message="Use the stage-identifier to identify that there is no source workflow associated with this request"" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/webservice/provisioning"/><CommitOperation file="PrioPolicyUtilServlet.java" message="issue #5560 fixed"" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/dfweb"/><CommitOperation file="ProvCache.java ProvDBUtil.java" message="issue #5560 fixed"" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/provisioning"/><CommitOperation file="provMemberView.html" message="issue #5560 fixed Changing dates during auto-permanent prevented.ref issue# 5569"" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/WebFiles/admin"/><CommitOperation file="DateUtil.java" message="The parseDate method modified to fix issue #5566"" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/util"/><CommitOperation file="RecordStore.java" message="Patch_2.2.1 checkin :: Issue# 5567 Temporary workflow files"" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/source/com/fisc/df/util"/></jnd><jbs><CommitOperation file="provisioningattrcfg.xml" message="Added Petro HPA Attributes to provisionign schema."" module="DataForum2.1" path="DataForum2.1/df/DataForumWebApp/WebFiles/config"/></jbs><jxb><CommitOperation file="HPA_SYSTEM_ACCOUNT_TYPE.sql" message="Changed PRE_AUTHORIZED_FREQUENCY to PRE_AUTHORIZED_DURATION_ID"" module="DataForum2.1" path="DataForum2.1/df/DatabaseScripts/MSSQL/hpa/tables"/><CommitOperation file="HPA_ACCOUNT_V.sql" message="Added SYSTEM_ACCOUNT_TYPE information (Membership Type, Pre-authorized Start Date, Pre-authorized End Date, Pre-authorized Duration Id, Reset Password, Needs Approval and Account Lock Indicators) and SYSTEM_DESC field to view."" module="DataForum2.1" path="DataForum2.1/df/DatabaseScripts/MSSQL/hpa/views"/><CommitOperation file="HPA_ACCOUNT_V.sql" message="Added SYSTEM_ACCOUNT_TYPE information (Membership Type, Pre-authorized Start Date, Pre-authorized End Date, Pre-authorized Duration Id, Reset Password, Needs Approval and Account Lock Indicators) and SYSTEM_DESC field to view."" module="DataForum2.1" path="DataForum2.1/df/DatabaseScripts/Oracle/hpa/views"/></jxb></CommitLog>