Hello Team
i am a newbie in c#, i have was asked to come up with a program that extracts information from websites and stores in txt, or any other readable format. now i managed to do the program.
my problem now is i am extracting information from a site that sells books its a chinese website. i want to extract data from a query i ran and have to extract the data of the results in uniform thus
find the string <div class="listitem pic" from page, and
//extract maintitle
//extract publicer info
//using name="Publishing" to extract publishing company
//using class="describ" to extract desciption
//using class="panel price" to extract price extract price_d and price_m
// extract discount
and so on, how do i use the vectors in c# to get such information. my code so far stands at..
using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.IO;
//using system.linq;
namespace project_sougu
{
class Program
{
static void Main(string[] args)
{
// used to build entire input
// prepare the web page we will be asking for
// print out page source
String path = "dangdang";
string temp = readPage("http://search.dangdang.com/search.php?key=%C7%C7%B2%BC%CB%B9&SearchFromTop=1&catalog=", "gb2312");
StreamWriter sw = File.CreateText(path);
sw.Write(temp);
// new pause();
}
static string readPage(string url, string type)
{
HttpWebRequest request = (HttpWebRequest)
WebRequest.Create(url);
// execute the request
HttpWebResponse response = (HttpWebResponse)
request.GetResponse();
// we will read data via the response stream
Stream resStream = response.GetResponseStream();
string tempString;
using (StreamReader sr = new StreamReader(resStream, Encoding.GetEncoding(type)))
{
tempString = sr.ReadToEnd();
}
return tempString;
}
static List<List<string> > extractAttribute(string page) //
{
List<List<string>> allresult = new List<List<string>>();
//each result is a vector ,the length of this vector is seven, maintitle author publishing description price_d price_m discount
List<string> result = new List<string>(7);
result[0] = "jobs";
result[1] = "jobs,stevie";
result[2] = "title";
result[3] = "publishing house";
result[4] = "24.20";
result[5] = "39.80";
result[6] = "year of publish";
allresult.Add(result);
return allresult;
}
}
}