I have a website with the url to be entered in TextBox2 . I want to extract names, email ID and mobile numbers of people from the website. The url in TextBox2 contains the names and the email IDs, however the mobile numbers are present on another webpage (profile page) the url of which has to be extracted along with names and email IDs from the first page. I am extracting the text using tags and classes.
On Button1 click :
ListBox1 has names
ListBox2 has emails
ListBox4 has url for profile page
On Button2 click : Supposed to go to each url in ListBox4, get number and store them in ListBox3
Timer1 : To supress script errors
PROBLEM : When i navigate to any of the url in ListBox4 to get numbers, it works fine, but when I put a loop to get numbers of ALL people from the listbox, the ListBox3 remains empty.
The code i'm using is
Imports System.IO
Imports System.Net
Public Class Form1
Dim WithEvents myWebBrowser As New WebBrowser
Dim WithEvents myWebBrowser2 As New WebBrowser
Dim numURL As String
Dim num As String
Private Sub GetSource()
Dim objResponse As WebResponse
Dim objRequest As WebRequest = HttpWebRequest.Create(TextBox2.Text)
objResponse = objRequest.GetResponse()
Using sr As New StreamReader(objResponse.GetResponseStream())
myWebBrowser.DocumentText = sr.ReadToEnd()
sr.Close()
End Using
End Sub
Private Sub GetSourceForNum()
Dim objResponse As WebResponse
Dim objRequest As WebRequest = HttpWebRequest.Create(numURL)
objResponse = objRequest.GetResponse()
Using sr As New StreamReader(objResponse.GetResponseStream())
myWebBrowser2.DocumentText = sr.ReadToEnd()
sr.Close()
End Using
End Sub
Private Sub myWebBrowser_DocumentCompleted(ByVal sender As System.Object, ByVal e As System.Windows.Forms.WebBrowserDocumentCompletedEventArgs) Handles myWebBrowser.DocumentCompleted
If myWebBrowser.ReadyState = WebBrowserReadyState.Complete Then
Me.Cursor = Cursors.Default
GetFromWebsite()
End If
Timer1.Enabled = False
End Sub
Private Sub myWebBrowser2_DocumentCompleted(ByVal sender As System.Object, ByVal e As System.Windows.Forms.WebBrowserDocumentCompletedEventArgs) Handles myWebBrowser2.DocumentCompleted
If myWebBrowser.ReadyState = WebBrowserReadyState.Complete Then
Me.Cursor = Cursors.Default
GetNumFromWebsite()
End If
Timer1.Enabled = False
End Sub
Private Sub IterateAllDivTags()
For Each currentDiv_Element As HtmlElement In myWebBrowser.Document.GetElementsByTagName(TextBox3.Text)
If currentDiv_Element.GetAttribute("className") = TextBox4.Text Then
TextBox1.Text = TextBox1.Text & vbNewLine & currentDiv_Element.InnerText
End If
Next
End Sub
Private Sub IterateAnyTag(ByVal strTag As String, ByVal strClass As String)
For Each currentDiv_Element As HtmlElement In myWebBrowser.Document.GetElementsByTagName(strTag)
If currentDiv_Element.GetAttribute("className") = strClass Then
TextBox1.Text = TextBox1.Text & vbNewLine & currentDiv_Element.InnerText
End If
Next
End Sub
Private Sub GetFromWebsite()
GetLinkUrl()
GetNamesFromWebsite()
GetEmailFromWebsite()
' GetLinkUrl()
'----------------------
' Dim i As Int16
' For i = 0 To ListBox1.Items.Count - 1
' numURL = ListBox3.Items.Item(i).ToString
' num = ""
' GetSourceForNum()
' Next
'----------------------
' MergeInfo()
End Sub
Private Sub GetNamesFromWebsite()
For Each currentDiv_Element As HtmlElement In myWebBrowser.Document.GetElementsByTagName("a")
If currentDiv_Element.GetAttribute("className") = "names" Then
ListBox1.Items.Add(currentDiv_Element.InnerText)
End If
Next
End Sub
Private Sub GetEmailFromWebsite()
For Each currentDiv_Element As HtmlElement In myWebBrowser.Document.GetElementsByTagName("div")
If currentDiv_Element.GetAttribute("className") = "emailID" Then
ListBox2.Items.Add(currentDiv_Element.InnerText)
End If
Next
End Sub
Private Sub GetNumFromWebsite()
Try
num = ""
For Each currentDiv_Element As HtmlElement In mywebBrowser2.Document.GetElementsByTagName("span")
If currentDiv_Element.GetAttribute("className") = "tel" Then
num = num & " " & currentDiv_Element.InnerText
' ListBox4.Items.Add(currentDiv_Element.InnerText)
' MessageBox.Show(currentDiv_Element.InnerText)
End If
Next
ListBox4.Items.Add(num)
Catch
End Try
End Sub
Private Sub GetLinkUrl()
For Each currentDiv_Element As HtmlElement In myWebBrowser.Document.GetElementsByTagName("a")
If currentDiv_Element.GetAttribute("className") = "result-title" Then
ListBox3.Items.Add(currentDiv_Element.GetAttribute("href"))
End If
Next
End Sub
Private Sub MergeInfo()
Dim i As Int16
For i = 0 To ListBox1.Items.Count - 1
TextBox1.Text = TextBox1.Text & vbNewLine & vbNewLine & "Name : " & ListBox1.Items.Item(i) & vbNewLine & "Email0 : " & ListBox2.Items.Item(i) & vbNewLine & "Telephone : " & ListBox4.Items.Item(i)
Next
End Sub
Private Sub Form1_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load
End Sub
Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click
' Dim i As Int16
' For i = 0 To 3
ListBox1.Items.Clear()
ListBox2.Items.Clear()
ListBox3.Items.Clear()
ListBox4.Items.Clear()
Timer1.Enabled = True
GetSource()
' Next
End Sub
Private Sub Timer1_Tick(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Timer1.Tick
Try
myWebBrowser.ScriptErrorsSuppressed = True
myWebBrowser2.ScriptErrorsSuppressed = True
Catch
End Try
End Sub
Private Sub Button2_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button2.Click
Timer1.Enabled = True
Dim i As Int16
' For i = 0 To ListBox4.Items.Count
numURL = ListBox3.Items.Item(0)
GetSourceForNum()
' System.Threading.Thread.Sleep(1000)
' Next
End Sub
End Class