Hey guys, I got my application working fine but it's using a constant 50%-ish of my CPU power, which is way too much I believe. Can any of you figure out what's causing this, or tell me how I can find out myself? :)
What we're doing is reading a HTML link, putting that in a datagrid, then moving on to next link etc. until we have 50 pages' data read. Then we copy all data over to second datagrid, put that data in a dataset and export it to XML. Next we clear the datagrids and then we move on to the next 50 links.
CPU-usage is around 40-50% during all of these tasks so it's not just one element that's causing it. CPU is; AMD AThlon II x2 255 3.1GHz
Are the resources used normal or is there indeed something I'm doing "wrong" ?
Imports System.Text.RegularExpressions
Public Class Form1
Public apppath As String = My.Application.Info.DirectoryPath
Private Sub Form1_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load
'Start at this topic id
StartAt.Text = "34315"
'Don't show script errors
WebBrowser1.ScriptErrorsSuppressed = True
End Sub
Private Sub StartManual_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles StartManual.Click
getdata()
End Sub
Public Sub getdata()
'For each link, get content. 50 (51) links at a time to prevent big data loss on crash or something.
For intyX2 As Integer = StartAt.Text To StartAt.Text + 50
If StartAt.Text < 50000 Then
Dim contentholder As String = ""
WebBrowser1.Navigate("http://www.xxwebsiteherexx.nl/showthread.php?t=" & StartAt.Text)
Do Until WebBrowser1.ReadyState = WebBrowserReadyState.Complete
Application.DoEvents()
Loop
'Insert link in datagrid
DataGridView1.Rows(DataGridView1.Rows.Add()).Cells(0).Value = WebBrowser1.Url.ToString
'Get messages, put in "contentholder, and add to datagrid.
Dim PageElement2 As HtmlElementCollection = WebBrowser1.Document.GetElementsByTagName("div")
For Each CurElement2 As HtmlElement In PageElement2
'ListBox1.Items.Add(CurElement.GetAttribute("className").ToString())
If (CurElement2.GetAttribute("className") = "content") Then
contentholder = contentholder & vbCrLf & "<HR>" & vbCrLf & CurElement2.InnerHtml
End If
Next
'Add content to datagrid
DataGridView1.Rows(DataGridView1.Rows.Count - 2).Cells(1).Value = contentholder
contentholder = ""
'Strip topicID from URL and add to datagrid
Dim thetopicid As String
thetopicid = Regex.Replace(WebBrowser1.Url.ToString, "http://(.*)" & "-", "")
thetopicid = thetopicid.Replace("/", "")
DataGridView1.Rows(DataGridView1.Rows.Count - 2).Cells(2).Value = thetopicid
'Add site to datagrid
DataGridView1.Rows(DataGridView1.Rows.Count - 2).Cells(3).Value = "xxwebsiteherexx.nl"
'Get main post title
Dim titleholder As String = ""
Dim PageElement4 As HtmlElementCollection = WebBrowser1.Document.GetElementsByTagName("span")
For Each CurElement As HtmlElement In PageElement4
If (CurElement.GetAttribute("className") = "threadtitle") Then
If titleholder = "" Then
titleholder = CurElement.InnerText
Else
'If "titleholder" already has a value, keep that (in case of multiple spans with same classname)
titleholder = titleholder
End If
End If
Next
'Add first-post title to both datagrid fields
DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(6).Value = titleholder
DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(11).Value = titleholder
titleholder = ""
'Get date and time in correct format
Dim thismonth As String = DateTime.Now.Month
If thismonth < 10 Then
thismonth = "0" & thismonth.ToString
Else
thismonth = thismonth.ToString
End If
Dim Nowtime As String
Nowtime = DateTime.Now.Year.ToString & "-" & thismonth & "-" & DateTime.Now.Day & " " & DateTime.Now.Hour & ":" & DateTime.Now.Minute & ":" & DateTime.Now.Second
'Add date and time to all 4 fields
DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(4).Value = Nowtime
DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(5).Value = Nowtime
DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(12).Value = Nowtime
DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(13).Value = Nowtime
'Other (non-variable) values required
DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(8).Value = "value"
DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(9).Value = "value"
DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(10).Value = "value"
DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(14).Value = "value"
DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(16).Value = "value"
DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(17).Value = "value"
DataGridView1.Rows.Item(DataGridView1.Rows.Count - 2).Cells(18).Value = "value"
'Done with current row, on to next one.
StartAt.Text = StartAt.Text + 1
StartAt.Refresh()
Else
'If topic-number exceeds pre-set maximum, close form
Me.Close()
End If
Next
'For-each row in datagridview1.
For intyX2 As Integer = 0 To DataGridView1.Rows.Count - 1
'Check if row (content) already exists in datagridview2 and make sure it's not a NewRow
If Not DataGridView1.Rows(intyX2).Cells(1).Value = "" And Not DataGridView1.Rows(intyX2).IsNewRow Then
Dim found As Boolean = False
For Each row As DataGridViewRow In DataGridView2.Rows
For Each cell As DataGridViewCell In row.Cells
If cell.Value IsNot Nothing AndAlso cell.Value.ToString().Equals(DataGridView1.Rows(intyX2).Cells(2).Value) Then
found = True
End If
Next
Next
'If not found, copy row over to Datagridview2
If Not found Then
DataGridView2.Rows(DataGridView2.Rows.Add()).Cells(0).Value = DataGridView1.Rows(intyX2).Cells(0).Value
DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(1).Value = DataGridView1.Rows(intyX2).Cells(1).Value
DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(2).Value = DataGridView1.Rows(intyX2).Cells(2).Value
DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(3).Value = DataGridView1.Rows(intyX2).Cells(3).Value
DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(4).Value = DataGridView1.Rows(intyX2).Cells(4).Value
DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(5).Value = DataGridView1.Rows(intyX2).Cells(5).Value
DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(6).Value = DataGridView1.Rows(intyX2).Cells(6).Value
DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(7).Value = DataGridView1.Rows(intyX2).Cells(7).Value
DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(8).Value = DataGridView1.Rows(intyX2).Cells(8).Value
DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(9).Value = DataGridView1.Rows(intyX2).Cells(9).Value
DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(10).Value = DataGridView1.Rows(intyX2).Cells(10).Value
DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(11).Value = DataGridView1.Rows(intyX2).Cells(11).Value
DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(12).Value = DataGridView1.Rows(intyX2).Cells(12).Value
DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(13).Value = DataGridView1.Rows(intyX2).Cells(13).Value
DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(14).Value = DataGridView1.Rows(intyX2).Cells(14).Value
DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(15).Value = DataGridView1.Rows(intyX2).Cells(15).Value
DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(16).Value = DataGridView1.Rows(intyX2).Cells(16).Value
DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(17).Value = DataGridView1.Rows(intyX2).Cells(17).Value
DataGridView2.Rows(DataGridView2.Rows.Count - 2).Cells(18).Value = DataGridView1.Rows(intyX2).Cells(18).Value
Me.DataGridView2.FirstDisplayedScrollingRowIndex = DataGridView2.Rows.Count - 2
Me.DataGridView2.Rows(DataGridView2.Rows.Count - 2).Selected = True
DataGridView2.Refresh()
End If
End If
Next
'Clear datagrid1
DataGridView1.Rows.Clear()
'Datagrid2 -> to dataset -> to xml, for easy loading into seperate mysql-uploader application
Dim ds As New System.Data.DataSet
'Take the data and structure from the datagridview and return it as a dataset. You can use
'"Imports System.Data" declaration at the top of your project/class and remove the system.data
'from the various parts of this function.
Try
'Add a new table to the dataset
ds.Tables.Add("Main")
'Add the columns
Dim col As System.Data.DataColumn
'For each colum in the datagridveiw add a new column to your table
For Each dgvCol As DataGridViewColumn In DataGridView2.Columns
col = New System.Data.DataColumn(dgvCol.Name)
ds.Tables("Main").Columns.Add(col)
Next
'Add the rows from the datagridview
Dim row As System.Data.DataRow
Dim colcount As Integer = DataGridView2.Columns.Count - 1
For i As Integer = 0 To DataGridView2.Rows.Count - 1
row = ds.Tables("Main").Rows.Add
For Each column As DataGridViewColumn In DataGridView2.Columns
row.Item(column.Index) = DataGridView2.Rows.Item(i).Cells(column.Index).Value
Next
Next
'Count database files already in folder to determine new database file's name.
Dim amountoffiles As Integer = System.IO.Directory.GetFiles(apppath & "\databases\xxwebsiteherexx.nl\").Length()
ds.WriteXml(apppath & "\databases\xxwebsiteherexx.nl\database" & amountoffiles & ".xml")
'Clear datagrid2
DataGridView2.Rows.Clear()
'Repeat process
getdata()
Catch ex As Exception
'Catch any potential errors and display them to the user
MessageBox.Show("Error Converting from DataGridView" & ex.InnerException.ToString, _
"Error Converting from DataGridView", MessageBoxButtons.OK, MessageBoxIcon.Error)
End Try
End Sub
'Other functions below this line.
Public Function ExtractLinks(ByVal innerhtml As String) As DataTable
Dim dt As New DataTable
dt.Columns.Add("LinkText")
dt.Columns.Add("LinkUrl")
Dim links As MatchCollection = Regex.Matches(innerhtml, "<a.*?href=""(.*?)"".*?>(.*?)</a>")
For Each match As Match In links
Dim dr As DataRow = dt.NewRow
Dim matchUrl As String = match.Groups(1).Value
'Ignore all anchor links
If matchUrl.StartsWith("#") Then
Continue For
End If
'Ignore all javascript calls
If matchUrl.ToLower.StartsWith("javascript:") Then
Continue For
End If
'Ignore all email links
If matchUrl.ToLower.StartsWith("mailto:") Then
Continue For
End If
'Add the link data to datatable
dr("LinkUrl") = matchUrl
dr("LinkText") = match.Groups(2).Value
dt.Rows.Add(dr)
Next
Return dt
End Function
Public Function MapUrl(ByVal baseAddress As String, ByVal relativePath As String) As String
Dim u As New System.Uri(baseAddress)
If relativePath = "./" Then
relativePath = "/"
End If
If relativePath.StartsWith("/") Then
Return u.Scheme + Uri.SchemeDelimiter + u.Authority + relativePath
Else
Dim pathAndQuery As String = u.AbsolutePath
' If the baseAddress contains a file name, like ..../Something.aspx
' Trim off the file name
pathAndQuery = pathAndQuery.Split("?")(0).TrimEnd("/")
If pathAndQuery.Split("/")(pathAndQuery.Split("/").Count - 1).Contains(".") Then
pathAndQuery = pathAndQuery.Substring(0, pathAndQuery.LastIndexOf("/"))
End If
baseAddress = u.Scheme + Uri.SchemeDelimiter + u.Authority + pathAndQuery
'If the relativePath contains ../ then
' adjust the baseAddress accordingly
While relativePath.StartsWith("../")
relativePath = relativePath.Substring(3)
If baseAddress.LastIndexOf("/") > baseAddress.IndexOf("//" + 2) Then
baseAddress = baseAddress.Substring(0, baseAddress.LastIndexOf("/")).TrimEnd("/")
End If
End While
Return baseAddress + "/" + relativePath
End If
End Function
End Class
Thanks!