#!/usr/bin/env perl use warnings; #use LWP::Simple; use LWP 5.64; # # Copyright (c) 2009 Fergus Gallagher, CiteULike.org # All rights reserved. # # This code is derived from software contributed to CiteULike.org # by # Fergus Gallagher # # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 3. All advertising materials mentioning features or use of this software # must display the following acknowledgement: # This product includes software developed by # CiteULike and its # contributors. # 4. Neither the name of CiteULike nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY CITEULIKE.ORG AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. my $browser = LWP::UserAgent->new; $browser->cookie_jar({}); $url = <>; chomp($url); my @ns_headers = ( 'User-Agent' => 'Mozilla/4.76 [en] (Win98; U)', 'Accept' => 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*', 'Accept-Charset' => 'iso-8859-1,*,utf-8', 'Accept-Language' => 'en-US', ); if ($url =~ m{http://linkinghub.elsevier.com/retrieve/pii/}) { my $resp = $browser->get("$url", @ns_headers); if ($resp && $resp->code == 200 ) { # this gives us back the last hop "request", i.e., the # URL of the last redirect my $uri = $resp->request()->uri; if ($uri !~ /linkinghub.elsevier.com/) { print "status\tredirect\t$uri\n"; exit 0; } else { # print "ARSE: $uri\n"; my $c = $resp->content; my ($sd) = ($c =~ m{value=\"(http://www.sciencedirect.com/science?[^\"]*)\"}); if ($sd) { print "status\tredirect\t$sd\n"; exit 0; } } } print "status\terr\tCannot process $url\n"; }