Hi. I would like to build a web crawler, but I am only familiar with C and C++. Can any one give tips on how to develop one using those two environments. I have a Borland 4.5 compiler that I will be using.
Protuberance 67 Junior Poster in Training
You can use MFC
Example
#include <WinInet.h>
#pragma comment(lib, "wininet")
class CHTTP
{
public:
CHTTP(LPCTSTR lpszServerName, LPCTSTR lpszTarget, int Method, LPCTSTR lpszFileName);
CHTTP();
~CHTTP();
public:
bool OpenInet(LPCTSTR lpszAgent);
// creating connection lpszServerName hostname e.g. "google.com"
bool Connection(LPCTSTR lpszServerName);
void CloseOpenedInet();
void CloseConection();
bool CheckError(bool bTest);
DWORD m_dwLastError;
HINTERNET m_hInternet;
HINTERNET m_hConnection;
HINTERNET m_hRequest;
void CloseRequest();
// sending request
bool SendRequest(LPCTSTR lpszVerb, LPCTSTR lpszTarget);
// GET query
bool Get(LPCTSTR lpszTarget);
// POST query
bool Post(LPCTSTR lpszTarget);
// write answer into file
bool WriteRespToFile(LPCTSTR FileName);
CFile *m_lpFile;
};
and implementation
#include "StdAfx.h"
#include "HTTPWork.h"
CHTTP::CHTTP(LPCTSTR lpszServerName, LPCTSTR lpszTarget, int Method, LPCTSTR lpszFileName)
: m_dwLastError(0)
, m_hInternet(NULL)
, m_hConnection(NULL)
, m_hRequest(NULL)
, m_lpFile(NULL)
{
OpenInet(TEXT(""));
Connection(lpszServerName);
if (Method == 1)
{
Post(lpszTarget);
WriteRespToFile(lpszFileName);
}
else
{
Get(lpszTarget);
WriteRespToFile(lpszFileName);
}
}
CHTTP::CHTTP()
: m_dwLastError(0)
, m_hInternet(NULL)
, m_hConnection(NULL)
, m_hRequest(NULL)
, m_lpFile(NULL)
{
}
CHTTP::~CHTTP()
{
CloseRequest();
CloseConection();
CloseOpenedInet();
}
bool CHTTP::CheckError(bool bTest)
{
if (bTest == false)
{
m_dwLastError = ::GetLastError();
}
return bTest;
}
bool CHTTP::OpenInet(LPCTSTR lpszAgent)
{
if (m_hInternet == NULL)
{
m_hInternet = ::InternetOpen(lpszAgent,INTERNET_OPEN_TYPE_PRECONFIG,NULL,NULL,0);
}
return CheckError(m_hInternet != NULL);
}
bool CHTTP::Connection(LPCTSTR lpszServerName)
{
CloseConection();
if (m_hConnection == NULL)
{
m_hConnection = ::InternetConnect(m_hInternet,lpszServerName,INTERNET_DEFAULT_HTTP_PORT,NULL,NULL,INTERNET_SERVICE_HTTP,0,1);
}
return CheckError(m_hConnection != NULL);
}
void CHTTP::CloseOpenedInet()
{
if (m_hInternet)
{
::InternetCloseHandle(m_hInternet);
}
m_hInternet = NULL;
}
void CHTTP::CloseConection()
{
if (m_hConnection)
{
::InternetCloseHandle(m_hConnection);
}
m_hConnection = NULL;
}
void CHTTP::CloseRequest()
{
if (m_hRequest)
{
::InternetCloseHandle(m_hRequest);
}
m_hRequest = NULL;
}
bool CHTTP::SendRequest(LPCTSTR lpszVerb, LPCTSTR lpszTarget)
{
if (m_hConnection)
{
CloseRequest();
m_hRequest = ::HttpOpenRequest(m_hConnection,lpszVerb,lpszTarget,NULL,NULL,NULL,INTERNET_FLAG_KEEP_CONNECTION,1);
}
if (m_hRequest)
{
BOOL bSendRequest = ::HttpSendRequest(m_hRequest,NULL,0,NULL,0);
if (!bSendRequest)
{
CloseRequest();
CloseConection();
CloseOpenedInet();
}
}
return CheckError(m_hRequest != NULL);
}
bool CHTTP::Get(LPCTSTR lpszTarget)
{
return SendRequest(TEXT("GET"),lpszTarget);
}
bool CHTTP::Post(LPCTSTR lpszTarget)
{
return SendRequest(TEXT("POST"),lpszTarget);
}
bool CHTTP::WriteRespToFile(LPCTSTR FileName)
{
if (m_hRequest)
{
char Data[2048];
DWORD dwBytesRead = 0;
BOOL bReadResponse = ::InternetReadFile(m_hRequest, Data, sizeof(Data)-1, &dwBytesRead);
if (bReadResponse)
{
m_lpFile = new CFile();
m_lpFile->Open(FileName, CFile::modeCreate | CFile::modeWrite);
while (dwBytesRead != 0)
{
m_lpFile->Write(Data, dwBytesRead);
bReadResponse = ::InternetReadFile(m_hRequest, Data, sizeof(Data)-1, &dwBytesRead);
}
m_lpFile->Close();
delete m_lpFile;
}
else
{
return false;
}
}
return CheckError(m_hRequest != NULL);
}
or use sockets.
Be a part of the DaniWeb community
We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.