Get source page of a site using Sockets

I've been trying to get the soruce page of a site using sockets, and found a code on the internet...
It works for the main sites, for ex. cplusplus.com, but not for cplusplus.com/info

This is my code:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#include <winsock2.h>
#include <windows.h>
#include <iostream>
#pragma comment(lib,"ws2_32.lib")

using namespace std;

int main (){
	WSADATA wsaData;

    if (WSAStartup(MAKEWORD(2,2), &wsaData) != 0) {
		cout << "WSAStartup failed.\n";
        system("pause");
		return 1;
    }

	SOCKET Socket=socket(AF_INET,SOCK_STREAM,IPPROTO_TCP);

	struct hostent *host;
	host = gethostbyname("www.cplusplus.com");

	SOCKADDR_IN SockAddr;
	SockAddr.sin_port=htons(80);
	SockAddr.sin_family=AF_INET;
	SockAddr.sin_addr.s_addr = *((unsigned long*)host->h_addr);

	cout << "Connecting...\n";
	if(connect(Socket,(SOCKADDR*)(&SockAddr),sizeof(SockAddr)) != 0){
		cout << "Could not connect";
		system("pause");
		return 1;
	}
	cout << "Connected.\n";

	string sendbuf = "GET /www.cplusplus.com/info  /HTTP/1.1\r\nHost: www.cplusplus.com\r\nConnection: close\r\n\r\n";

	send(Socket,sendbuf.c_str(), sendbuf.length(), 0);
	char buffer[10000];

	int nDataLength = 1;
	while (nDataLength != 0)
	{
		nDataLength = recv(Socket,buffer,10000,0);
		cout << buffer;
	}

	closesocket(Socket);
        WSACleanup();

	system("pause");
	return 0;
}


It gives me page not found...
closed account (G309216C)
I am guessing this, is not homework therefore I suggest using WinInet it is dedicated to interaction with Internet specifically Websites.

Read and learn from documentation provided by MSDNJust Love it!
MSDN Documentation: http://msdn.microsoft.com/en-us/library/windows/desktop/aa383630(v=vs.85).aspx

Source Code to get website source code:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#pragma comment(lib,"wininet.lib") /*This links wininet lib only works on Visual Studio VC++ */
#include<iostream>
#include<cstring>
#include<windows.h>
#include<wininet.h>
using namespace std;
int main(){
HINTERNET connect = InternetOpen("MyBrowser",INTERNET_OPEN_TYPE_PRECONFIG,NULL, NULL, 0);
 
   if(!connect){
      cout<<"Connection Failed or Syntax error";
      return 0;
   }
 
HINTERNET OpenAddress = InternetOpenUrl(connect,"http://localhost/", NULL, 0, INTERNET_FLAG_PRAGMA_NOCACHE|INTERNET_FLAG_KEEP_CONNECTION, 0);
 
   if ( !OpenAddress )
   {
      DWORD ErrorNum = GetLastError();
      cout<<"Failed to open URL \nError No: "<<ErrorNum;
      InternetCloseHandle(connect);
      return 0;
   }
 
   char DataReceived[4096];
   DWORD NumberOfBytesRead = 0;
   while(InternetReadFile(OpenAddress, DataReceived, 4096, &NumberOfBytesRead) && NumberOfBytesRead )
   {
           cout << DataReceived;
   }
 
   InternetCloseHandle(OpenAddress);
   InternetCloseHandle(connect);
 
   cin.get();
return 0;
}
Last edited on
WinINet is prob a better API for your purposes (or cURL).

But you could try:

GET /info/  /HTTP/1.1\r\nHost: www.cplusplus.com\r\nConnection: close\r\n\r\n
GET /info/ HTTP/1.1\r\nHost: www.cplusplus.com\r\nConnection: close\r\n\r\n

Andy

Edit corrected string based on mordoran's comments below
Last edited on
Watch out at HTTP protocol:


This line is [b]incorrect:
GET /info/ /HTTP/1.1\r\nHost: www.cplusplus.com\r\nConnection: close\r\n\r\n


Working one is this:
GET /info/ HTTP/1.1\r\nHost: www.cplusplus.com\r\nConnection: close\r\n\r\n


A quick test example can be made like this:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#include <winsock2.h>
#include <windows.h>
#include <iostream>
#include <string>

//#pragma comment(lib,"ws2_32.lib")

using namespace std;

int main (){
	WSADATA wsaData;

    if (WSAStartup(MAKEWORD(2,2), &wsaData) != 0) {
		cout << "WSAStartup failed.\n";
        system("pause");
		return 1;
    }

	SOCKET Socket=socket(AF_INET,SOCK_STREAM,IPPROTO_TCP);

	struct hostent *host;
	host = gethostbyname("www.cplusplus.com");

	SOCKADDR_IN SockAddr;
	SockAddr.sin_port=htons(80);
	SockAddr.sin_family=AF_INET;
	SockAddr.sin_addr.s_addr = *((unsigned long*)host->h_addr);

	cout << "Connecting...\n";
	if(connect(Socket,(SOCKADDR*)(&SockAddr),sizeof(SockAddr)) != 0){
		cout << "Could not connect";
		system("pause");
		return 1;
	}
	cout << "Connected.\n";

	string sendbuf = "GET /info/ HTTP/1.1\r\nHost: www.cplusplus.com\r\nConnection: close\r\n\r\n";

	send(Socket,sendbuf.c_str(), sendbuf.length(), 0);
	char buffer[10000];
    string buff;
	int nDataLength = 1;
	while (nDataLength != 0)
	{
	    buffer[0] = 0;
		nDataLength = recv(Socket,buffer,10000,0);
		buff.append (buffer, nDataLength);
		//cout << buffer;
	}

	closesocket(Socket);
        WSACleanup();

    cout << buff;
	system("pause");
	return 0;
}


Thanks guys! @SpaceWorm, It isnt homework , but a small self project for my self, and thanks, it works wonderfully :)
@modoran

I've fixed my mail

But for some reason, the erroneous string (with /HTTP) also work with cplusplus.com

(I tried both, and the downloaded text only differed by a couple of timestamps.)

Andy
Last edited on
@andywestken, I'm having a problem with your code....
when I try to convert the char* to string, it delets most of the data of the char*, and only keeps the end of the source file (mostly garbage)...
Last edited on
What code?

I only suggested a change to the HTTP GET command?

Andy
Yeah my bad, haha

Anywya, thanks a lot guys, I was able to create a good working solution thanks to your help :)
Topic archived. No new replies allowed.