crankyslap 0 Newbie Poster

Hey guys, I got my application working fine but it's using a constant 50%-ish of my CPU power, which is way too much I believe. Can any of you figure out what's causing this, or tell me how I can find out myself? :)

What we're doing is reading a HTML link, putting that in a datagrid, then moving on to next link etc. until we have 50 pages' data read. Then we copy all data over to second datagrid, put that data in a dataset and export it to XML. Next we clear the datagrids and then we move on to the next 50 links.

CPU-usage is around 40-50% during all of these tasks so it's not just one element that's causing it. CPU is; AMD AThlon II x2 255 3.1GHz

Are the resources used normal or is there indeed something I'm doing "wrong" ?

Imports System.Text.RegularExpressions

Public Class Form1

    Public apppath As String = My.Application.Info.DirectoryPath

    Private Sub Form1_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load
        'Start at this topic id
        StartAt.Text = "34315"
        'Don't show script errors
        WebBrowser1.ScriptErrorsSuppressed = True
    End Sub

    Private Sub StartManual_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles StartManual.Click
        getdata()
    End Sub

    Public Sub getdata()

        'For each link, get content. 50 (51) links at a time to prevent big data loss on crash or something.
        For intyX2 As Integer = StartAt.Text To StartAt.Text + 50
            If StartAt.Text < 50000 Then
                Dim contentholder As String = ""
                WebBrowser1.Navigate("http://www.xxwebsiteherexx.nl/showthread.php?t=" & StartAt.Text)

                Do Until WebBrowser1.ReadyState = WebBrowserReadyState.Complete
                    Application.DoEvents()
                Loop

                'Insert link in datagrid
                DataGridView1.Rows(DataGridView1.Rows.Add()).Cells(0).Value = WebBrowser1.Url.ToString

                'Get messages, put in "contentholder, and add to datagrid.
                Dim PageElement2 As HtmlElementCollection = WebBrowser1.Document.GetElementsByTagName("div")
                For Each CurElement2 As HtmlElement In PageElement2
                    'ListBox1.Items.Add(CurElement.GetAttribute("className").ToString())
                    If (CurElement2.GetAttribute("className") = "content") Then
                        contentholder = contentholder & vbCrLf & "<HR>" & vbCrLf & CurElement2.InnerHtml
                    End If
                Next
                'Add content to datagrid
                DataGridView1.Rows(DataGridView1.Rows.Count - 2).Cells(1).Value = contentholder
                contentholder = ""

                'Strip topicID from URL and add to datagrid
                Dim thetopicid As String
                thetopicid = Regex.Replace(WebBrowser1.Url.ToString, "http://(.*)" & "-", "")
                thetopicid = thetopicid.Replace("/", "")
                DataGridView1.Rows(DataGridView1.Rows.Count - 2).Cells(2).Value = thetopicid

                'Add site to datagrid
                DataGridView1.Rows(DataGridView1.Rows.Count - 2).Cells(3).Value = "xxwebsiteherexx.nl"

                'Get main post title
                Dim titleholder As String = ""
                Dim PageElement4 As HtmlElementCollection = WebBrowser1.Document.GetElementsByTagName("span")
                For Each CurElement As HtmlElement In PageElement4
                    If (CurElement.GetAttribute("className") = "threadtitle") Then
                        If titleholder = "" Then
                            titleholder = CurElement.InnerText
                        Else
                            'If "titleholder" already has a value, keep that (in case of multiple spans with same classname)
                            titleholder = titleholder
                        End If
                    End If
                Next
                'Add first-post title to both datagrid fields
                DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(6).Value = titleholder
                DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(11).Value = titleholder
                titleholder = ""

                'Get date and time in correct format
                Dim thismonth As String = DateTime.Now.Month
                If thismonth < 10 Then
                    thismonth = "0" & thismonth.ToString
                Else
                    thismonth = thismonth.ToString
                End If
                Dim Nowtime As String
                Nowtime = DateTime.Now.Year.ToString & "-" & thismonth & "-" & DateTime.Now.Day & " " & DateTime.Now.Hour & ":" & DateTime.Now.Minute & ":" & DateTime.Now.Second
                'Add date and time to all 4 fields
                DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(4).Value = Nowtime
                DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(5).Value = Nowtime
                DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(12).Value = Nowtime
                DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(13).Value = Nowtime
                'Other (non-variable) values required
                DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(8).Value = "value"
                DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(9).Value = "value"
                DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(10).Value = "value"
                DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(14).Value = "value"
                DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(16).Value = "value"
                DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(17).Value = "value"
                DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(18).Value = "value"

                'Done with current row, on to next one.
                StartAt.Text = StartAt.Text + 1
                StartAt.Refresh()

            Else
                'If topic-number exceeds pre-set maximum, close form
                Me.Close()
            End If

        Next

        'For-each row in datagridview1.
        For intyX2 As Integer = 0 To DataGridView1.Rows.Count - 1
            'Check if row (content) already exists in datagridview2 and make sure it's not a NewRow
            If Not DataGridView1.Rows(intyX2).Cells(1).Value = "" And Not DataGridView1.Rows(intyX2).IsNewRow Then
                Dim found As Boolean = False
                For Each row As DataGridViewRow In DataGridView2.Rows
                    For Each cell As DataGridViewCell In row.Cells
                        If cell.Value IsNot Nothing AndAlso cell.Value.ToString().Equals(DataGridView1.Rows(intyX2).Cells(2).Value) Then
                            found = True
                        End If
                    Next
                Next
                'If not found, copy row over to Datagridview2
                If Not found Then
                    DataGridView2.Rows(DataGridView2.Rows.Add()).Cells(0).Value = DataGridView1.Rows(intyX2).Cells(0).Value
                    DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(1).Value = DataGridView1.Rows(intyX2).Cells(1).Value
                    DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(2).Value = DataGridView1.Rows(intyX2).Cells(2).Value
                    DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(3).Value = DataGridView1.Rows(intyX2).Cells(3).Value
                    DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(4).Value = DataGridView1.Rows(intyX2).Cells(4).Value
                    DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(5).Value = DataGridView1.Rows(intyX2).Cells(5).Value
                    DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(6).Value = DataGridView1.Rows(intyX2).Cells(6).Value
                    DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(7).Value = DataGridView1.Rows(intyX2).Cells(7).Value
                    DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(8).Value = DataGridView1.Rows(intyX2).Cells(8).Value
                    DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(9).Value = DataGridView1.Rows(intyX2).Cells(9).Value
                    DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(10).Value = DataGridView1.Rows(intyX2).Cells(10).Value
                    DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(11).Value = DataGridView1.Rows(intyX2).Cells(11).Value
                    DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(12).Value = DataGridView1.Rows(intyX2).Cells(12).Value
                    DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(13).Value = DataGridView1.Rows(intyX2).Cells(13).Value
                    DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(14).Value = DataGridView1.Rows(intyX2).Cells(14).Value
                    DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(15).Value = DataGridView1.Rows(intyX2).Cells(15).Value
                    DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(16).Value = DataGridView1.Rows(intyX2).Cells(16).Value
                    DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(17).Value = DataGridView1.Rows(intyX2).Cells(17).Value
                    DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(18).Value = DataGridView1.Rows(intyX2).Cells(18).Value
                    Me.DataGridView2.FirstDisplayedScrollingRowIndex = DataGridView2.Rows.Count - 2
                    Me.DataGridView2.Rows(DataGridView2.Rows.Count - 2).Selected = True
                    DataGridView2.Refresh()
                End If
            End If
        Next

        'Clear datagrid1
        DataGridView1.Rows.Clear()

        'Datagrid2 -> to dataset -> to xml, for easy loading into seperate mysql-uploader application
        Dim ds As New System.Data.DataSet

        'Take the data and structure from the datagridview and return it as a dataset.  You can use 
        '"Imports System.Data" declaration at the top of your project/class and remove the system.data 
        'from the various parts of this function.

        Try
            'Add a new table to the dataset
            ds.Tables.Add("Main")

            'Add the columns
            Dim col As System.Data.DataColumn

            'For each colum in the datagridveiw add a new column to your table
            For Each dgvCol As DataGridViewColumn In DataGridView2.Columns
                col = New System.Data.DataColumn(dgvCol.Name)
                ds.Tables("Main").Columns.Add(col)
            Next

            'Add the rows from the datagridview
            Dim row As System.Data.DataRow
            Dim colcount As Integer = DataGridView2.Columns.Count - 1

            For i As Integer = 0 To DataGridView2.Rows.Count - 1
                row = ds.Tables("Main").Rows.Add

                For Each column As DataGridViewColumn In DataGridView2.Columns
                    row.Item(column.Index) = DataGridView2.Rows.Item(i).Cells(column.Index).Value
                Next

            Next

            'Count database files already in folder to determine new database file's name.
            Dim amountoffiles As Integer = System.IO.Directory.GetFiles(apppath & "\databases\xxwebsiteherexx.nl\").Length()
            ds.WriteXml(apppath & "\databases\xxwebsiteherexx.nl\database" & amountoffiles & ".xml")

            'Clear datagrid2
            DataGridView2.Rows.Clear()

            'Repeat process
            getdata()

        Catch ex As Exception
            'Catch any potential errors and display them to the user
            MessageBox.Show("Error Converting from DataGridView" & ex.InnerException.ToString, _
            "Error Converting from DataGridView", MessageBoxButtons.OK, MessageBoxIcon.Error)
        End Try
    End Sub

    'Other functions below this line.
    Public Function ExtractLinks(ByVal innerhtml As String) As DataTable
        Dim dt As New DataTable
        dt.Columns.Add("LinkText")
        dt.Columns.Add("LinkUrl")

        Dim links As MatchCollection = Regex.Matches(innerhtml, "<a.*?href=""(.*?)"".*?>(.*?)</a>")

        For Each match As Match In links
            Dim dr As DataRow = dt.NewRow
            Dim matchUrl As String = match.Groups(1).Value
            'Ignore all anchor links
            If matchUrl.StartsWith("#") Then
                Continue For
            End If
            'Ignore all javascript calls
            If matchUrl.ToLower.StartsWith("javascript:") Then
                Continue For
            End If
            'Ignore all email links
            If matchUrl.ToLower.StartsWith("mailto:") Then
                Continue For
            End If
            'Add the link data to datatable
            dr("LinkUrl") = matchUrl
            dr("LinkText") = match.Groups(2).Value
            dt.Rows.Add(dr)
        Next

        Return dt
    End Function

    Public Function MapUrl(ByVal baseAddress As String, ByVal relativePath As String) As String

        Dim u As New System.Uri(baseAddress)

        If relativePath = "./" Then
            relativePath = "/"
        End If

        If relativePath.StartsWith("/") Then
            Return u.Scheme + Uri.SchemeDelimiter + u.Authority + relativePath
        Else
            Dim pathAndQuery As String = u.AbsolutePath
            ' If the baseAddress contains a file name, like ..../Something.aspx
            ' Trim off the file name
            pathAndQuery = pathAndQuery.Split("?")(0).TrimEnd("/")
            If pathAndQuery.Split("/")(pathAndQuery.Split("/").Count - 1).Contains(".") Then
                pathAndQuery = pathAndQuery.Substring(0, pathAndQuery.LastIndexOf("/"))
            End If
            baseAddress = u.Scheme + Uri.SchemeDelimiter + u.Authority + pathAndQuery

            'If the relativePath contains ../ then
            ' adjust the baseAddress accordingly

            While relativePath.StartsWith("../")
                relativePath = relativePath.Substring(3)
                If baseAddress.LastIndexOf("/") > baseAddress.IndexOf("//" + 2) Then
                    baseAddress = baseAddress.Substring(0, baseAddress.LastIndexOf("/")).TrimEnd("/")
                End If
            End While

            Return baseAddress + "/" + relativePath
        End If

    End Function

End Class

Thanks!