#! /usr/bin/perl # # my-http - Perl-functions for handling HTTP-requests # Copyright (C) 2003-2004 Tommi Saviranta # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # # Version: my-http v0.3.2 06-May-2004 tsaviran@cs.helsinki.fi use strict; use warnings; use Socket; my $__debug = 0; # 1 = progress, 2 = connect, 4 = dump (text data) my ($__referer, $__redirect, $__socket, $__page_uri); my (@__cookie_domain, @__cookie_path, @__cookie_data); # Insert code here :-) exit(0); ##### my-http functions ### __connect # 0: host # 1: port # sub __connect { my $fd; print STDERR "Connecting to $_[0]:$_[1]\n" if ($__debug & 2); socket($__socket, PF_INET, SOCK_STREAM, getprotobyname('tcp')) or die "Couldn't create socket: $!"; connect($__socket, sockaddr_in($_[1], inet_aton($_[0]))); $fd = select($__socket); $| = 1; select($fd); } # __connect ### __close_socket # sub __close_socket { close($__socket); } # __close_socket ### __read_socket # no parameters # sub __read_socket { my ($header, $data); my ($size, $done, $type, $phase); $header = $data = $type = ""; $phase = $done = $size = 0; while (! eof($__socket)) { $_ = <$__socket>; if ($phase == 0) { $header .= $_; ($size) = /: (\d+)/ if (/^Content-Length:\s/); ($type) = /: ([^\s]*)/ if (/^Content-Type:\s/); } else { $data .= $_; $done += length($_); } $phase++ if ($phase == 0 && /^\r?\n/); if (($__debug & 4) && ($type =~ /text/ || $phase == 0)) { print STDERR "<< $_"; } elsif (($__debug & 1) && $size > 0) { printf STDERR "\rGetting file: %d/%d...", $done, $size; print STDERR " done.\n" if ($done == $size); } } return ($header, $data); } # __read_socket ### __write_socket # 0: data # sub __write_socket { print $__socket $_[0]; print STDERR ">> $_[0]" if ($__debug & 4); } # __write_socket ### http_get # 0: uri # 1: port # 2: no-track # sub http_get { my ($base) = $_[0] =~ /[^:]*:\/\/([^\/]*)/; my $port = defined($_[1]) ? $_[1] : 80; my ($header, $data); my $newuri; my $uri = $_[0]; do { ($header, $data) = &__http_get_real($uri, $port, $_[2]); $newuri = &__get_redirect($uri, $port, $header); if (defined($newuri)) { $uri = $newuri; print STDERR "Redirecting: $uri...\n" if ($__debug & 4); } else { undef($newuri); } } while (defined($newuri)); $__page_uri = $uri; # Needed in complete_uri() return $data; } # http_get ### __http_get_real # 0: uri # 1: port # 2: no-track # sub __http_get_real { my ($base, $uri) = $_[0] =~ /[^:]*:\/\/([^\/]*)(.*)/; my ($header, $data); &__connect($base, $_[1]); &__write_socket("GET $uri HTTP/1.0\r\n"); &__write_socket("Host: $base\r\n") if (! defined($_[2])); &__write_socket("Referer: http://$__referer\r\n") if (defined($__referer)); &__send_cookies($base, $uri); &__write_socket("\r\n"); $__referer = $base . $uri if (! defined($_[2])); ($header, $data) = &__read_socket(); &__close_socket(); &__read_cookies($header); return ($header, $data); } # __http_get_real ### http_post # 0: uri # 1: port # 2: data # 3: no-track # sub http_post { my ($base) = $_[0] =~ /[^:]*:\/\/([^\/]*)/; my $port = defined($_[1]) ? $_[1] : 80; my ($header, $data); my $uri = $_[0]; my $newuri; $__page_uri = $uri; # Needed in complete_uri() ($header, $data) = &__http_post_real($uri, $port, $_[2], $_[3]); $newuri = &__get_redirect($uri, $port, $header); ($header, $data) = &http_get($newuri, 80) if (defined($newuri)); return $data; } # http_post ### __http_post_real # 0: uri # 1: port # 2: data # 3: no-track # sub __http_post_real { my ($base, $uri) = $_[0] =~ /.*:\/\/([^\/]*)(.*)/; my ($header, $data); &__connect($base, $_[1]); &__write_socket("POST $uri HTTP/1.0\r\n"); &__write_socket("Host: $base\r\n") if (! defined($_[3])); &__write_socket("Referer: http://$__referer\r\n") if (defined($__referer)); &__send_cookies($base, $uri); &__write_socket("Content-Type: application/x-www-form-urlencoded\r\n"); &__write_socket("Content-Length: " . length($_[2]) . "\r\n"); &__write_socket("\n"); &__write_socket($_[2]); $__referer = $base . $uri if (! defined($_[3])); ($header, $data) = &__read_socket(); print STDERR "\n" if ($__debug & 4); # final line-feed, &__close_socket(); &__read_cookies($header); return ($header, $data); } # __http_post_real ### __get_redirect # 0: uri # 1: port # 2: data sub __get_redirect { my ($base) = $_[0] =~ /.*?:\/\/(.*?)\//; my $uri; if ($_[2] =~ /^HTTP\/\d\.\d 30[12]/) { $__redirect++; die "Too many redirects !" if ($__redirect == 17); ($uri) = $_[2] =~ /\nLocation: (.*?)[\r\n]/; $uri = "$base$uri" if ($uri =~ /^\//); $uri = "http://$uri" if (! ($uri =~ /^http/)); } else { $__redirect = 0; } return $uri; } # __get_redirect ### clear_cookies # no parameters # sub clear_cookies { @__cookie_data = (); @__cookie_domain = (); @__cookie_path = (); } # clear_cookies ### __read_cookies # 0: page # sub __read_cookies { my ($data, $domain, $path); my $page = $_[0]; my ($foo, $bar, $back); while ($page =~ /Set-Cookie: /i) { ($_, $page) = $page =~ /.*?\nSet-Cookie: (.*?)[\r?\n](.*)/s; ($data, $_) = /(.*?)(;.*)/; $_ = " $_ "; $back = $_; ($foo, $domain, $bar) = /(.*;)domain=([^;]*)(.*)$/i; $_ = defined($domain) ? " $foo $bar " : $back; $domain = "" if (! defined($domain)); ($foo, $path, $bar) = /(.*;)path=([^;]*)(.*)$/i; $path = "" if (! defined($path)); if (defined($data) && $data =~ /=deleted$/i) { my $key; ($key) = $data =~ /(.*?)=/; for (my $i = 0; $i <= $#__cookie_data; $i++) { if ($__cookie_data[$i] =~ /^$key/ && $__cookie_domain[$i] eq $domain && $__cookie_path[$i] eq $path) { splice(@__cookie_domain, $i, 1); splice(@__cookie_path, $i, 1); splice(@__cookie_data, $i, 1); } } } else { push(@__cookie_domain, $domain); push(@__cookie_path, $path); push(@__cookie_data, $data); } } } # __read_cookies ### __send_cookies # 0: domain # 1: path # sub __send_cookies { my ($domain, $path); my $cookie = ""; for (my $i = 0; $i <= $#__cookie_domain; $i++) { $domain = $__cookie_domain[$i]; $path = $__cookie_path[$i]; if (($_[0] =~ /$domain$/i) && ($_[1] =~ /^$path/)) { $cookie .= $__cookie_data[$i] . "; "; } } if ($cookie =~ /./) { ($cookie) = $cookie =~ /(.*); /; &__write_socket("Cookie: $cookie\r\n"); } } # __send_cookies ### save_page # 0: filename # 1: page content # sub save_page { open(my $fd, "> $_[0]") or return -1; print $fd $_[1]; close($fd); return 0; } # save_page ### complete_uri # 0: uri # sub complete_uri { my $uri = $_[0]; my ($npath); my ($domain, $path) = $__referer =~ /([^\/]*)(.*)/; ($path) = $path =~ /(.*)\// if ($path =~ /\//); $_ = $_[0]; if (/^\//) { return "http://$domain$_"; } if (/^http:\/\//) { # http://... if (/http:\/\/[^\.]*\//) { # URL is like http://foo/bar.index ($_) = /\/\/(.*?)\//; } else { # URL is like http://www.foobar.com/... return $_[0]; } } if (! /^\//) { # ../... while (/^\.\.\//) { ($path) = $path =~ /(.*)\// if (defined($path)); ($_) = /...(.*)/; } $path = "" if (! defined($path)); } $_ = "/$_" if (! /^\//); return "http://$domain$path$_"; } # complete_uri