download a web page
-
I created some code to download a web page:
#define _CRT_SECURE_NO_WARNINGS
#define _WINSOCK_DEPRECATED_NO_WARNINGS
#include
#include
#include
#include
#include
#include#pragma comment(lib, "ws2_32.lib")
int main(int argc, char* argv[]) {
WSADATA wsaData; int iResult = WSAStartup(MAKEWORD(2, 2), &wsaData); if (iResult != 0) { std::cerr << "WSAStartup failed: " << iResult << std::endl; return 1; } const char\* hostname = "www.example.com"; const char\* path = "/"; struct hostent\* host = gethostbyname(hostname); struct sockaddr\_in server\_address; int socket\_fd, bytes\_received; char buffer\[1024\]; if (host == NULL) { fprintf(stderr, "Error: Could not resolve hostname.\\n"); int error\_num = WSAGetLastError(); exit(1); } socket\_fd = socket(AF\_INET, SOCK\_STREAM, 0); if (socket\_fd < 0) { perror("Error: Could not create socket.\\n"); exit(1); } memset(&server\_address, 0, sizeof(server\_address)); server\_address.sin\_family = AF\_INET; server\_address.sin\_port = htons(80); memcpy(&server\_address.sin\_addr, host->h\_addr\_list\[0\], host->h\_length); if (connect(socket\_fd, (struct sockaddr\*)&server\_address, sizeof(server\_address)) < 0) { perror("Error: Could not connect to server.\\n"); exit(1); } char\* request = (char \*)malloc(strlen(path) + strlen(hostname) + 16); sprintf(request, "GET %s HTTP/1.1\\r\\nHost: %s\\r\\n\\r\\n", path, hostname); send(socket\_fd, request, strlen(request), 0); while ((bytes\_received = recv(socket\_fd, buffer, sizeof(buffer), 0)) > 0) { fwrite(buffer, 1, bytes\_received, stdout); } free(request); closesocket(socket\_fd); return 0;
}
It prints the web page out, but then it seems to get stuck in the recv function. Anyone know what's wrong? Thanks.
-
I created some code to download a web page:
#define _CRT_SECURE_NO_WARNINGS
#define _WINSOCK_DEPRECATED_NO_WARNINGS
#include
#include
#include
#include
#include
#include#pragma comment(lib, "ws2_32.lib")
int main(int argc, char* argv[]) {
WSADATA wsaData; int iResult = WSAStartup(MAKEWORD(2, 2), &wsaData); if (iResult != 0) { std::cerr << "WSAStartup failed: " << iResult << std::endl; return 1; } const char\* hostname = "www.example.com"; const char\* path = "/"; struct hostent\* host = gethostbyname(hostname); struct sockaddr\_in server\_address; int socket\_fd, bytes\_received; char buffer\[1024\]; if (host == NULL) { fprintf(stderr, "Error: Could not resolve hostname.\\n"); int error\_num = WSAGetLastError(); exit(1); } socket\_fd = socket(AF\_INET, SOCK\_STREAM, 0); if (socket\_fd < 0) { perror("Error: Could not create socket.\\n"); exit(1); } memset(&server\_address, 0, sizeof(server\_address)); server\_address.sin\_family = AF\_INET; server\_address.sin\_port = htons(80); memcpy(&server\_address.sin\_addr, host->h\_addr\_list\[0\], host->h\_length); if (connect(socket\_fd, (struct sockaddr\*)&server\_address, sizeof(server\_address)) < 0) { perror("Error: Could not connect to server.\\n"); exit(1); } char\* request = (char \*)malloc(strlen(path) + strlen(hostname) + 16); sprintf(request, "GET %s HTTP/1.1\\r\\nHost: %s\\r\\n\\r\\n", path, hostname); send(socket\_fd, request, strlen(request), 0); while ((bytes\_received = recv(socket\_fd, buffer, sizeof(buffer), 0)) > 0) { fwrite(buffer, 1, bytes\_received, stdout); } free(request); closesocket(socket\_fd); return 0;
}
It prints the web page out, but then it seems to get stuck in the recv function. Anyone know what's wrong? Thanks.
Some things that might help you diagnose the problem: - use a debugger and check return codes from each function. See where it fails. - a network sniffer like Wireshark can help you see what’s going on on the wire. - a network terminal program like Putty can be used to check the expected behavior. See if the server really answers the way you expect. Also, I assume you are using a real server name, not example.com 😀
Mircea
-
I created some code to download a web page:
#define _CRT_SECURE_NO_WARNINGS
#define _WINSOCK_DEPRECATED_NO_WARNINGS
#include
#include
#include
#include
#include
#include#pragma comment(lib, "ws2_32.lib")
int main(int argc, char* argv[]) {
WSADATA wsaData; int iResult = WSAStartup(MAKEWORD(2, 2), &wsaData); if (iResult != 0) { std::cerr << "WSAStartup failed: " << iResult << std::endl; return 1; } const char\* hostname = "www.example.com"; const char\* path = "/"; struct hostent\* host = gethostbyname(hostname); struct sockaddr\_in server\_address; int socket\_fd, bytes\_received; char buffer\[1024\]; if (host == NULL) { fprintf(stderr, "Error: Could not resolve hostname.\\n"); int error\_num = WSAGetLastError(); exit(1); } socket\_fd = socket(AF\_INET, SOCK\_STREAM, 0); if (socket\_fd < 0) { perror("Error: Could not create socket.\\n"); exit(1); } memset(&server\_address, 0, sizeof(server\_address)); server\_address.sin\_family = AF\_INET; server\_address.sin\_port = htons(80); memcpy(&server\_address.sin\_addr, host->h\_addr\_list\[0\], host->h\_length); if (connect(socket\_fd, (struct sockaddr\*)&server\_address, sizeof(server\_address)) < 0) { perror("Error: Could not connect to server.\\n"); exit(1); } char\* request = (char \*)malloc(strlen(path) + strlen(hostname) + 16); sprintf(request, "GET %s HTTP/1.1\\r\\nHost: %s\\r\\n\\r\\n", path, hostname); send(socket\_fd, request, strlen(request), 0); while ((bytes\_received = recv(socket\_fd, buffer, sizeof(buffer), 0)) > 0) { fwrite(buffer, 1, bytes\_received, stdout); } free(request); closesocket(socket\_fd); return 0;
}
It prints the web page out, but then it seems to get stuck in the recv function. Anyone know what's wrong? Thanks.
mike7411 wrote:
get stuck in the recv function.
I suspect that none of the suggestions in the other post are going to help with this. Your design is wrong. The message flow looks like this. - Client- Open socket (server accepts) - Client- Send request - Server- Sends response - Client- Read request - Client- Closes socket Notice in the above the server does nothing to terminate the message stream. The client is responsible, not the server. So recv() sits there waiting for a message that the server will never send. The HTTP protocol defines a request and then a response. You however are not following that protocol. At a minimum you are missing the following 1 - You are not checking for a HTTP error code. 2 - You are not reading the 'content-length' header attribute. 3 - You are not looking for the response body. If you were doing the second then you would use that to read to the end of the message using the content-length. That specifically defines how many bytes the server should send in the response body Additionally there are additional error conditions that good code must expect - The content-length might be missing. Invalid HTTP but one must still anticipate that. - The content-length is too long. Very difficult to deal with. And it still results in the problem you are seeing. So you must add a timeout. Google for how to do that. What about if the content-length is too short? Myself I just ignore that case. Because in most cases content-length will always be right. And too short might lead to other problems but you have no way to detect that unless you always do a timeout read, and that will slow the application to no point (again because it almost always will be right.)
-
I created some code to download a web page:
#define _CRT_SECURE_NO_WARNINGS
#define _WINSOCK_DEPRECATED_NO_WARNINGS
#include
#include
#include
#include
#include
#include#pragma comment(lib, "ws2_32.lib")
int main(int argc, char* argv[]) {
WSADATA wsaData; int iResult = WSAStartup(MAKEWORD(2, 2), &wsaData); if (iResult != 0) { std::cerr << "WSAStartup failed: " << iResult << std::endl; return 1; } const char\* hostname = "www.example.com"; const char\* path = "/"; struct hostent\* host = gethostbyname(hostname); struct sockaddr\_in server\_address; int socket\_fd, bytes\_received; char buffer\[1024\]; if (host == NULL) { fprintf(stderr, "Error: Could not resolve hostname.\\n"); int error\_num = WSAGetLastError(); exit(1); } socket\_fd = socket(AF\_INET, SOCK\_STREAM, 0); if (socket\_fd < 0) { perror("Error: Could not create socket.\\n"); exit(1); } memset(&server\_address, 0, sizeof(server\_address)); server\_address.sin\_family = AF\_INET; server\_address.sin\_port = htons(80); memcpy(&server\_address.sin\_addr, host->h\_addr\_list\[0\], host->h\_length); if (connect(socket\_fd, (struct sockaddr\*)&server\_address, sizeof(server\_address)) < 0) { perror("Error: Could not connect to server.\\n"); exit(1); } char\* request = (char \*)malloc(strlen(path) + strlen(hostname) + 16); sprintf(request, "GET %s HTTP/1.1\\r\\nHost: %s\\r\\n\\r\\n", path, hostname); send(socket\_fd, request, strlen(request), 0); while ((bytes\_received = recv(socket\_fd, buffer, sizeof(buffer), 0)) > 0) { fwrite(buffer, 1, bytes\_received, stdout); } free(request); closesocket(socket\_fd); return 0;
}
It prints the web page out, but then it seems to get stuck in the recv function. Anyone know what's wrong? Thanks.
Some more on "recv" protocol. [https://stackoverflow.com/questions/4362525/about-recv-and-the-read-buffer-c-berkeley-sockets\](https://stackoverflow.com/questions/4362525/about-recv-and-the-read-buffer-c-berkeley-sockets)
"Before entering on an understanding, I have meditated for a long time, and have foreseen what might happen. It is not genius which reveals to me suddenly, secretly, what I have to say or to do in a circumstance unexpected by other people; it is reflection, it is meditation." - Napoleon I
-
Some things that might help you diagnose the problem: - use a debugger and check return codes from each function. See where it fails. - a network sniffer like Wireshark can help you see what’s going on on the wire. - a network terminal program like Putty can be used to check the expected behavior. See if the server really answers the way you expect. Also, I assume you are using a real server name, not example.com 😀
Mircea
I got it working. I added this code after the send call:
// shutdown the connection since no more data will be sent iResult = shutdown(socket\_fd, SD\_SEND); if (iResult == SOCKET\_ERROR) { printf("shutdown failed: %d\\n", WSAGetLastError()); closesocket(socket\_fd); WSACleanup(); return 1; }
I also had to increase the buffer size for malloc. There was a bug where it wasn't allocating enough bytes. Thanks.
-
I got it working. I added this code after the send call:
// shutdown the connection since no more data will be sent iResult = shutdown(socket\_fd, SD\_SEND); if (iResult == SOCKET\_ERROR) { printf("shutdown failed: %d\\n", WSAGetLastError()); closesocket(socket\_fd); WSACleanup(); return 1; }
I also had to increase the buffer size for malloc. There was a bug where it wasn't allocating enough bytes. Thanks.
Glad to hear it! Now, if you want to check a more C++ way of working with sockets, you can take a look at my series of articles about working with Windows sockets in C++. First instalment is Windows Sockets Streams[^]. Latest version of code can be downloaded from GitHub[^].
Mircea