Hi,

I am a tester and new to python.

The code in bold should execute only if the webpage has the text 'BUILD COMPLETE'. This text is between the <th> tag and this is found using DataParser class.

URL = "http://11.12.13.27:8080/cruisecontrol"

from urllib2 import urlopen
from HTMLParser import HTMLParser

import re

# Fetching links using HTMLParser
def get_links(url):
    parser = MyHTMLParser()
    parser.feed(urlopen(url).read())
    parser.close()
    return parser.links

# Build url for Deploy page
def get_deploy_url():
    url = URL + "/buildresults/xxx_%s_nightly_build" % branch
    print url
    [B]parser = DataParser()
    parser.feed(urlopen(url).read())
    parser.close()
    print data #Here it displays 0
    if data.find("/BUILD COMPLETE/") > 0:
        for link in get_links(url):
            if link["href"].startswith("Deploy"):
                return "%s/%s" % (URL, link["href"])
    else:
        print "Build Failed"[/B]

# Build url for Destination page
def get_destination_url():
    url = get_deploy_url()
    print url
    destination_re = re.compile(r"%s" % destination)
    for link in get_links(url):
        if destination_re.search(link["href"]):
            return "http://11.12.13.27:8080/cruisecontrol/" + link["href"]

# Parsing HTML pages 
class MyHTMLParser(HTMLParser):
    def __init__(self, *args, **kwd):
        HTMLParser.__init__(self, *args, **kwd)
        self.links = []

    def handle_starttag(self, tag, attrs):
        if tag == "a":
            attrs = dict(attrs)
            if "href" in attrs:
                self.links.append(dict(attrs))

    def handle_endtag(self, tag):
        pass

# To find all text in the HTML table
[B]class DataParser(HTMLParser):
    def handle_data(self, data):
        data = data.strip()
        if data:
            print data[/B]  #This is displaying all the text including the text BUILD COMPLETE
        
if __name__ == "__main__":
    # Read the branch name and the test destination to deploy on
    lines = [x.split(':') for x in open("branch_dest.txt")]
    print lines
    branch = "%s" % lines[0][1].strip()
    print branch
    destination = "%s" % lines[1][1].strip()
    print destination
    
    final_url = get_destination_url()
    if final_url is None:
        print "Could not find a destination to deploy"
    else:
        print final_url

Below is the error i am getting

Traceback (most recent call last):
  File "C:\build_complete testing.py", line 76, in <module>
    final_url = get_destination_url()
  File "C:\build_complete testing.py", line 38, in get_destination_url
    url = get_deploy_url()
  File "C:\build_complete testing.py", line 29, in get_deploy_url
    if data.find("/BUILD COMPLETE/") > 0:
AttributeError: 'int' object has no attribute 'find'

Please help

You are not saving this data nowhere:

parser.feed(urlopen(url).read())

Please continue your threads if you have question from the same program!

Cheers,
Tony

Hi tony,
Sorry for creating new threads.

I thought the variable data is saving the required data. Can you please tell me what mistake i am doing?

I can not run the code but let's check little:

class DataParser(HTMLParser):
    def handle_data(self, data):
        self.data = data.strip() ## data is local variable which exist only until end of function
parser = DataParser()
    parser.feed(urlopen(url).read())
    parser.close()
    parser.data #Here it displays ?

    if parser.data.find("/BUILD COMPLETE/") > 0:
        for link in get_links(url):
            if link["href"].startswith("Deploy"):
                return "%s/%s" % (URL, link["href"])
    else:
        print "Build Failed"

Hi,

I implemented your changes.It prints BUILD FAILED. But this isnt the case. The webpage has the text BUILD COMPLETE.Also i am getting another error.

[['Branch', ' trunk\n'], ['Destination', ' Test 4\n']]
trunk
Test 4
http://10.47.42.27:8080/cruisecontrol/buildresults/Poker-TTM_trunk_nightly_build
Build Failed
None
Traceback (most recent call last):
  File "C:\build_complete.py", line 77, in <module>
    final_url = get_destination_url()
  File "C:\build_complete.py", line 42, in get_destination_url
    for link in get_links(url):
  File "C:\build_complete.py", line 11, in get_links
    parser.feed(urlopen(url).read())
  File "C:\Python26\lib\urllib2.py", line 126, in urlopen
    return _opener.open(url, data, timeout)
  File "C:\Python26\lib\urllib2.py", line 382, in open
    req.timeout = timeout
AttributeError: 'NoneType' object has no attribute 'timeout'

Line 6 it says None, which lines print it is, add some info to each print to identify which line it comes from (name of function, name of variable, for example)?

Hi Tony,

Below is the new code

URL = "http://10.47.42.27:8080/cruisecontrol"

from urllib2 import urlopen
from HTMLParser import HTMLParser

import re

# Fetching links using HTMLParser
def get_links(url):
    parser = MyHTMLParser()
    parser.feed(urlopen(url).read())
    parser.close()
    return parser.links

# Build url for Deploy page
def get_deploy_url():
    url = URL + "/buildresults/Poker-TTM_%s_nightly_build" % branch
    print url
    parser = DataParser()
    parser.feed(urlopen(url).read())
    parser.close()
    parser.data
    [B]if parser.data.find("/BUILD COMPLETE/") > 0:[/B]
        for link in get_links(url):
            if link["href"].startswith("Deploy"):
                return "%s/%s" % (URL, link["href"])
    else:
        print "Build Failed"

# Build url for Destination page
def get_destination_url():
    url = get_deploy_url()
    print "URL is %s" % url
    destination_re = re.compile(r"%s" % destination)
    for link in get_links(url):
        if destination_re.search(link["href"]):
            return "http://10.47.42.27:8080/cruisecontrol/" + link["href"]

# Parsing HTML pages 
class MyHTMLParser(HTMLParser):
    def __init__(self, *args, **kwd):
        HTMLParser.__init__(self, *args, **kwd)
        self.links = []

    def handle_starttag(self, tag, attrs):
        if tag == "a":
            attrs = dict(attrs)
            if "href" in attrs:
                self.links.append(dict(attrs))

    def handle_endtag(self, tag):
        pass

# To find all text in the HTML table
class DataParser(HTMLParser):
    def handle_data(self, data):
        self.data = data.strip()
        if data:
            print self.data
        
if __name__ == "__main__":
    # Read the branch name and the test destination to deploy on
    lines = [x.split(':') for x in open("branch_dest.txt")]
    print lines
    branch = "%s" % lines[0][1].strip()
    print branch
    destination = "%s" % lines[1][1].strip()
    print destination
    
    final_url = get_destination_url()
    if final_url is None:
        print "Could not find a destination to deploy"
    else:
        print final_url

Below is the output (I haven't copied the entire output as there is lot of stuff)

>>> 
[['Branch', ' trunk\n'], ['Destination', ' Test 4\n']]
trunk
Test 4
http://10.47.42.27:8080/cruisecontrol/buildresults/Poker-TTM_trunk_nightly_build
CruiseControl Build Results
Project
-- STATUS PAGE --
BScreened_trunk
Poker-TTM_7.1.0_nightly_build
Poker-TTM_7.2.0_nightly_build
Poker-TTM_7.2.1_nightly_build
Poker-TTM_7.3.0_nightly_build
Poker-TTM_7.3.1_nightly_build
Poker-TTM_7.3.2_nightly_build
Poker-TTM_Wrk7.5.0_nightly_build
.....
.....
.....
.....
waiting for next time to build since
05/13/2010 00:32:33
progress: 00:32:33 next build in 22 hours 28 minutes
Latest Build
2010/05/12 23:04:43 (27478)
2010/05/11 23:04:52 (27455)
........
........
More builds
2010/04/30 23:04:46 (27251)
2010/04/29 23:04:46 (27220)
........
........


Build Results
Unit Test Results
Auto Test Results
XML Log File
Metrics
Control Panel

BUILD COMPLETE
-
27478



Date of build:

05/12/2010 23:04:43

Time to build:

83 minutes 52 seconds



Last changed:

05/12/2010 18:53:10
.......
.......

/trunk/src/flash_client/src/Clients/lang/en/poker_pheaven_en.xml

modified

/trunk/src/flash_client/src/Clients/lang/cs/poker_cs.xml

modified





/trunk/src/libs/gms/gms_exports.cpp

modified




















Build Failed
URL is None
Traceback (most recent call last):
  File "C:\build_complete.py", line 80, in <module>
    final_url = get_destination_url()
  File "C:\build_complete.py", line 45, in get_destination_url
    for link in get_links(url):
  File "C:\build_complete.py", line 11, in get_links
    parser.feed(urlopen(url).read())
  File "C:\Python26\lib\urllib2.py", line 126, in urlopen
    return _opener.open(url, data, timeout)
  File "C:\Python26\lib\urllib2.py", line 382, in open
    req.timeout = timeout
AttributeError: 'NoneType' object has no attribute 'timeout'
>>>

In the output there is text Build Complete. But here it isnt finding it and is going to the else part to print "Build Failed".

you are comparing >0 (begining of line) maybe you should do >-1 (not found) the lind before it seems doing nothing.

Did that too. Same again. Build Failed. Something wrong with either with the RE or with parser.data. Not sure if it has the data.

Prove regular Python:

if "BUILD COMPLETE" in parser.data:

No. Same again. BUILD FAILED. I am loosing patience now. :(

The success case has no print inside, I think it goes there ones, the process continues until one more step, and then comes BUILD FAILED.

I recommend including info of place where print comes (even that functions parameters) in the print statements. Maybe you can comment out most of them except from this function and DataParse to reduce noise.

Take out the failed part else, and prove this:

if parser.data and ("BUILD COMPLETE" in parser.data):
        print "BUILD COMPLETE"
        for link in get_links(url):
            if link["href"].startswith("Deploy"):
                print "DEPLOYING"
                return "%s/%s" % (URL, link["href"])

It isn't going inside the if.

URL = "http://10.47.42.27:8080/cruisecontrol"

from urllib2 import urlopen
from HTMLParser import HTMLParser

import re

# Fetching links using HTMLParser
def get_links(url):
    parser = MyHTMLParser()
    parser.feed(urlopen(url).read())
    parser.close()
    return parser.links

#
def build_check(url):
    parser = DataParser()
    parser.feed(urlopen(url).read())
    parser.close()
    return parser.data

# Build url for Deploy page
def get_deploy_url():
    url = URL + "/buildresults/Poker-TTM_%s_nightly_build" % branch
    print url
    parser = DataParser()
    parser.feed(urlopen(url).read())
    parser.close()
    parser.data
    print "before if"
    if parser.data and ("BUILD COMPLETE" in parser.data):
        print "BUILD COMPLETE"
        for link in get_links(url):
            if link["href"].startswith("Deploy"):
                return "%s/%s" % (URL, link["href"])
    #else:
    #    print "Build Failed"

# Build url for Destination page
def get_destination_url():
    url = get_deploy_url()
    #print "URL is %s" % url
    #destination_re = re.compile(r"%s" % destination)
    #for link in get_links(url):
    #    if destination_re.search(link["href"]):
    #        return "http://10.47.42.27:8080/cruisecontrol/" + link["href"]

# Parsing HTML pages 
class MyHTMLParser(HTMLParser):
    def __init__(self, *args, **kwd):
        HTMLParser.__init__(self, *args, **kwd)
        self.links = []

    def handle_starttag(self, tag, attrs):
        if tag == "a":
            attrs = dict(attrs)
            if "href" in attrs:
                self.links.append(dict(attrs))

    def handle_endtag(self, tag):
        pass

# To find all text in the HTML table
class DataParser(HTMLParser):
    def handle_data(self, data):
        self.data = data.strip()
        if data:
            print self.data
        
if __name__ == "__main__":
    # Read the branch name and the test destination to deploy on
    lines = [x.split(':') for x in open("branch_dest.txt")]
    print lines
    branch = "%s" % lines[0][1].strip()
    print branch
    destination = "%s" % lines[1][1].strip()
    print destination
    
    final_url = get_destination_url()
    #if final_url is None:
    #    print "Could not find a destination to deploy"
    #else:
    #    print final_url

Output: (Haven't pasted all of it)

[['Branch', ' trunk\n'], ['Destination', ' Test 4\n']]
trunk
Test 4
http://10.47.42.27:8080/cruisecontrol/buildresults/Poker-TTM_trunk_nightly_build



CruiseControl Build Results
Project
-- STATUS PAGE --

BScreened_trunk

Poker-TTM_7.1.0_nightly_build

Poker-TTM_7.2.0_nightly_build
......
......
More builds

2010/05/03 23:04:23 (27272)

2010/04/30 23:04:46 (27251)
........
........

Build Results
Unit Test Results
Auto Test Results
XML Log File
Metrics
Control Panel





BUILD COMPLETE
-
27482



Date of build:

05/13/2010 23:04:47



Time to build:

85 minutes 24 seconds



Last changed:

05/13/2010 18:16:26
.....
.....

Maybe one read() too much.

How about result of this

URL = "http://10.47.42.27:8080/cruisecontrol"

from urllib2 import urlopen
from HTMLParser import HTMLParser

import re

# Fetching links using HTMLParser
def get_links(url):
    parser = MyHTMLParser()
    parser.feed(urlopen(url).read())
    parser.close()
    return parser.links

#
def build_check(url):
    parser = DataParser()
    parser.feed(urlopen(url).read())
    parser.close()
    return parser.data

# Build url for Deploy page
def get_deploy_url():
    url = URL + "/buildresults/Poker-TTM_%s_nightly_build" % branch
    print url
    parser = DataParser()
    parser.feed(urlopen(url).read())
    parser.close()
    print "before if"
    if parser.found:
        print "BUILD COMPLETE"
        for link in get_links(url):
            if link["href"].startswith("Deploy"):
                return "%s/%s" % (URL, link["href"])
    #else:
    #    print "Build Failed"

### Build url for Destination page MEANINGLESS FUNCTION DOES NOT RETURN ANYTHING
##def get_destination_url():
##    url = get_deploy_url()
##    #print "URL is %s" % url
##    #destination_re = re.compile(r"%s" % destination)
##    #for link in get_links(url):
##    #    if destination_re.search(link["href"]):
##    #        return "http://10.47.42.27:8080/cruisecontrol/" + link["href"]

# Parsing HTML pages 
class MyHTMLParser(HTMLParser):
    def __init__(self, *args, **kwd):
        HTMLParser.__init__(self, *args, **kwd)
        self.links = []

    def handle_starttag(self, tag, attrs):
        if tag == "a":
            attrs = dict(attrs)
            if "href" in attrs:
                self.links.append(dict(attrs))

    def handle_endtag(self, tag):
        pass

# To find all text in the HTML table
class DataParser(HTMLParser):
    def handle_data(self, data):
        self.find = "COMPLETE"
        self.found = False
        self.data = data.strip()
        if self.data:   
            if self.find in parser.data:
                parser.found=True
                print "FOUND"
        
if __name__ == "__main__":
    # Read the branch name and the test destination to deploy on
    lines = [x.split(':') for x in open("branch_dest.txt")]
##    print lines
    branch = "%s" % lines[0][1].strip()
##    print branch
    destination = "%s" % lines[1][1].strip()
    print destination
    
    final_url = get_deploy_url()
    if final_url is None:
        print "Could not find a destination to deploy"
    else:
        print final_url

NameError: global name 'parser' is not defined

Sorry should be self there inside object:

# To find all text in the HTML table
class DataParser(HTMLParser):
    def handle_data(self, data):
        self.find = "COMPLETE"
        self.found = False
        self.data = data.strip()
        if self.data and (self.find in self.data):
            self.found=True
            print "FOUND"

I am reordering and taking out some grud from earlier emperimentation. How about this, this build_check function was not used so I changed it little and use it.

Also the destination global variable is not used.

URL = "http://10.47.42.27:8080/cruisecontrol"

from urllib2 import urlopen
from HTMLParser import HTMLParser

import re

# Parsing HTML pages 
class MyHTMLParser(HTMLParser):
    def __init__(self, *args, **kwd):
        HTMLParser.__init__(self, *args, **kwd)
        self.links = []

    def handle_starttag(self, tag, attrs):
        if tag == "a":
            attrs = dict(attrs)
            if "href" in attrs:
                self.links.append(dict(attrs))

    def handle_endtag(self, tag):
        pass


# Fetching links using HTMLParser
def get_links(url):
    parser = MyHTMLParser()
    parser.feed(urlopen(url).read())
    parser.close()
    return parser.links

# To find all text in the HTML table
class DataParser(HTMLParser):
    def handle_data(self, data):
        self.find = "COMPLETE"
        self.found = False
        self.data = data.strip()
        if self.data:   
            if self.find in self.data:
                self.found=True
                print "FOUND"
        else: print "NO DATA"

def build_check(url):
    parser = DataParser()
    parser.feed(urlopen(url).read())
    parser.close()
    return parser.found

# Build url for Deploy page
def get_deploy_url():
    url = URL + "/buildresults/Poker-TTM_%s_nightly_build" % branch
    print url
    if build_check(url):
        print "BUILD COMPLETE"
        for link in get_links(url):
            if link["href"].startswith("Deploy"):
                return "%s/%s" % (URL, link["href"])
        
if __name__ == "__main__":
    # Read the branch name and the test destination to deploy on
    lines = [x.split(':') for x in open("branch_dest.txt")]
##    print lines
    branch = "%s" % lines[0][1].strip()
##    print branch
    destination = "%s" % lines[1][1].strip()
    print destination
    
    final_url = get_deploy_url()
    if final_url is None:
        print "Could not find a destination to deploy"
    else:
        print final_url

Same words, and here in sanity check the basic idea works:

# To find all text in the HTML table
class DataParser():
    def __init__(self):
        self.find = "COMPLETE"
        self.found = False
        self.data = "BUILD COMPLETE"
        if self.data:   
            if self.find in self.data:
                self.found=True
                print "FOUND"
            else:
                print "NOT FOUND"
        else: print "NO DATA"

def build_check():
    parser = DataParser()
    return parser.found

# Build url for Deploy page
def get_deploy_url():
    if build_check():
        print "BUILD COMPLETE"
            
get_deploy_url()

it is giving syntax error at def __init__(self):

I edited it after first posting. Succeeds for me.

# To do sanity check
class DataParser():
    def __init__(self):
        self.find = "COMPLETE"
        self.found = False
        self.data = "BUILD COMPLETE"
        if self.data:   
            if self.find in self.data:
                self.found=True
                print "FOUND"
            else:
                print "NOT FOUND"
        else: print "NO DATA"

def build_check():
    parser = DataParser()
    return parser.found

# Build url for Deploy page
def get_deploy_url():
    if build_check():
        print "BUILD COMPLETE"
            
get_deploy_url()

Ok now it says BUILD COMPLETE

Working fine now. Added the remaining part too!
Output

>>> 
[['Branch', ' trunk\n'], ['Destination', ' Test 4\n']]
trunk
Test 4
http://10.47.42.27:8080/cruisecontrol/buildresults/Poker-TTM_trunk_nightly_build
FOUND
BUILD COMPLETE found
Deploy URL is: http://10.47.42.27:8080/cruisecontrol/DeploySelect.jsp?ArtifactsUrl=artifacts/Poker-TTM_trunk_nightly_build/20100513230447
Destination URL is: http://10.47.42.27:8080/cruisecontrol/DeployConfig.jsp?name=Test 4&scriptPath=f:/Projects/PokerDeployer&script=test4.cfg&artifacts=f:/cruisecontrol/s_drive/artifacts/Poker-TTM_trunk_nightly_build/20100513230447&configPath=f:/Projects/Poker-TTM_trunk_nightly_build/source/tools/PokerDeployer
>>> 

Thank you very much! Now only submitting the webform after checking a particular checkbox is pending.

Hey, i tested it for a branch whose latest build has failed. But it still says BUILD COMPLETE. ??? :(

because that was sanity check with fixed string instead of the data. That urlopen must go in place of the string BUILD COMPLETE

I put the urlopen there

class DataParser():
    def __init__(self):
        self.find = "COMPLETE"
        self.found = False
        self.data = urlopen(url).read()
        if self.data:   
            if self.find in self.data:
                self.found=True
                print "FOUND"
            else:
                print "NOT FOUND"
        else: print "NO DATA"

Got below error

Traceback (most recent call last):
  File "C:\build_complete_tony.py", line 77, in <module>
    final_url = get_destination_url()
  File "C:\build_complete_tony.py", line 61, in get_destination_url
    url = get_deploy_url()
  File "C:\build_complete_tony.py", line 53, in get_deploy_url
    if build_check():
  File "C:\build_complete_tony.py", line 46, in build_check
    parser = DataParser()
  File "C:\build_complete_tony.py", line 36, in __init__
    self.data = urlopen(url).read()
NameError: global name 'url' is not defined

url must be that Trunk url. Main url was in global var URL. You can add url as init parameter and put that format string expression in parameter for creating UrlParser.

Or go half way out of object oriented way and do like this (remove ## debug lines for outside testing and uncomment your environment lines)

from urllib2 import urlopen
from HTMLParser import HTMLParser

URL = "http://10.47.42.27:8080/cruisecontrol"

# Parsing HTML pages 
class MyHTMLParser(HTMLParser):
    def __init__(self, *args, **kwd):
        HTMLParser.__init__(self, *args, **kwd)
        self.links = []

    def handle_starttag(self, tag, attrs):
        if tag == "a":
            attrs = dict(attrs)
            if "href" in attrs:
                self.links.append(dict(attrs))

    def handle_endtag(self, tag):
        pass


# Fetching links using HTMLParser
def get_links(url):
    parser = MyHTMLParser()
    parser.feed(urlopen(url).read())
    parser.close()
    return parser.links
        
def build_check(url):
    find = "COMPLETE"
    if find in urlopen(url).read().strip():
##        return "FOUND" ## debug, take out this
        for link in get_links(url):
            if link["href"].startswith("Deploy"):
                return  "%s/%s" % (URL, link["href"])
    
if __name__ == "__main__":
## use these lines, take out ## debug lines
    # Read the branch name and the test destination to deploy on
    lines = [x.split(':') for x in open("branch_dest.txt")]
    print lines
    branch = "%s" % lines[0][1].strip()
    print branch
    destination = "%s" % lines[1][1].strip()
    print destination

##    branch='trunk' ## I have no file for original lines ## debug
    url = URL + "/buildresults/Poker-TTM_%s_nightly_build" % branch
    print url
    
##    final_url='' ## debug
    
## use these lines, take out ## debug lines
   final_url= build_check(url):
   if final_url:
       print final_url
    
##    if build_check('http://www.daniweb.com/forums/thread283335-3.html'): ## debug
##        print 'OK' ## debug
##
    else:
        print "Could not find a destination to deploy"

Better match "BUILD COMPLETE" to reduce risk of false positive. It is checking everywhere in page regardless of place.

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.