#!/usr/bin/env python2.6
# Copyright (c) 2008 Oversity Ltd.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. All advertising materials mentioning features or use of this software
# must display the following acknowledgement:
# This product includes software developed by
# CiteULike ... ....
tags # page = re.sub('
\s+
', '
', page) soup = BeautifulSoup(page) try: abstract = soup.find("p", "abstract").findAll(text=True) abstract = u' '.join(abstract) abstract = re.sub('\n+', ' ', abstract).strip() except: abstract = '' # # Look for the link to the BibTeX export # bibtex_match = re.search("window.open[(]'([^\']+)',", page, re.IGNORECASE) if not bibtex_match: print ERR_STR_PREFIX + "Could not find BibTeX export link (popBibTex.cfm...) in page" sys.exit(1) bibtex_url = ACM_URL + bibtex_match.group(1) # # Fetch the BibTeX... # try: bibtex_page = urllib2.urlopen(bibtex_url).read(); except: print ERR_STR_PREFIX + "Could not fetch BibTeX page (" + bibtex_url + ")" sys.exit(1) # # UGH - BibTeX record comes back as part of an HTML page... # bib_match = re.search('
(.+?)', bibtex_page, re.IGNORECASE | re.DOTALL) if not bib_match: print ERR_STR_PREFIX + "Could not find BibTeX in page" sys.exit(1) bibtex = bib_match.group(1).strip() # # Look for the DOI in the bibtex # doi_match = re.search('doi\s*=\s*\{http://[^/]+/(10\.[^/]+/.+?)\}', bibtex, re.IGNORECASE) if doi_match: doi = doi_match.group(1) else: doi = '' # # Output plugin results # print "begin_bibtex" print bibtex print "end_bibtex" print "begin_tsv" if abstract: print "abstract\t%s" % (abstract) print "linkout\tACM\t%s\t\t\t" % (acm_id) if doi: print "linkout\tDOI\t\t%s\t\t" % (doi) print "end_tsv" print "status\tok"