URL Downloader
/* * Run with an http: or https: url on the command line. The program will * fetch the indicated document with HTTP 1.0 and print the entire response * on standard output, including headers. The URL parsing is far from * complete, so keep it simple. */ #include <stdlib.h> #include <iostream> #include <string> #include <stdexcept> #include <cleansocks.h> #include <cleanip.h> #include <cleantlsc.h> using std::string; using std::cout; using std::endl; using std::runtime_error; using namespace cleansocks; /* * This performs the bulk of the operation. It receives the URL from the * command line, parses it, builds and sends the HTTP request, and * echos the response to the command line. Throws if something goes wrong. */ void doit(string url) { string original(url); // Remove the protocol. int loc = url.find(":"); if(loc == string::npos) throw runtime_error (original + ": Can't find colon marking protocol"); string proto = url.substr(0,loc); url.erase(0,loc); // Must be http: or https: if(proto != "http" && proto != "https") throw runtime_error(original + ": Not http or https"); // Next part has to be ://. if(url.substr(0,3) != "://") throw runtime_error(original + ": Too complicated for me!"); url.erase(0,3); // Simplest case, the remainder of the URL is the host name, and // the path is implied /. string hostname = url; string path = "/"; // But if there's a slash, it's the start of the path. int hend = url.find("/"); if(hend != string::npos) { hostname = url.substr(0,hend); path = url.substr(hend); } // See if we need TLS. bool secure = (proto == "https"); // Show what we found out. cout << "proto = " << proto << ", host = " << hostname << ", path = " << path << (secure ? " [secure]" : "") << "\n" << endl; // Look up the host and try to connect. IPaddress a = lookup_host(hostname); IPport p = lookup_service(proto); TCPsocket cs; connect(cs,IPendpoint(a,p)); // Create a TLS socket, but only enable TLS if needed. client_tls_socket s(cs,hostname,secure); // Send a minimal HTTP request to the server. send(s, "GET "+path+" HTTP/1.0\r\n"); send(s, "Host: "+hostname+"\r\n"); send(s, string("User-Agent: URLtest\r\n\r\n")); // Read and print the response. char buf[1024]; int n; while((n = recv(s, buf, sizeof buf)) != 0) cout.write(buf, n); close(s); } int main(int argc, char **argv) { // Check that we got exactly one argument, or whine. if(argc != 2) { std::cerr << "Provide exactly one URL." << endl; exit(2); } // Perform the operation, and catch the pieces. try { doit(argv[1]); } catch(socket_db_error &e) { cout << "Socket db error: " << e.what() << endl; } catch(socket_tls_error &e) { cout << "TLS error: " << e.what() << endl; socket_tls_error::error_stack(); } catch(socket_error &e) { cout << "Socket error: " << e.what() << endl; } catch(runtime_error &e) { cout << "Runtime error: " << e.what() << endl; } }