#!/usr/bin/env perl # # (C) 2001 Alan Donovan. # # Author: Alan Donovan # # extractcd -- extract CD cover information from Amazon web pages. # Use in conjunction with cdcover(1) to make inlay # cards. # # $Id: extractcd,v 1.1.1.1 2002/08/23 13:40:34 adonovan Exp $ # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied waranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # See the GNU General Public License for more details. # . # sub restore() { my($i) = (@_); $i =~ s/&/\&/g; # restore '&' $i =~ s/ / /g; # restore ' ' $i =~ s/</\/g; # restore '>' $i; } ######################################################################## $usage="usage: extractcd \n"; (@ARGV == 1) || die $usage; $ASIN = shift; $url = "http://www.amazon.com/exec/obidos/ASIN/$ASIN"; # e.g. B00001ZWEF print "Extracting CD info from Amazon...\n"; $page = `lynx -source '$url' | tee ASIN-$ASIN.html`; ($page eq "") && die "Can't open URL.\n"; $_= $page; s/\n/ /g; # make into 1 line # get picture if(m?(http://images.amazon.com/images/./$ASIN[\.\w]+\.gif)?) { $pic = $1; system("wget $pic -O ASIN-$ASIN.gif"); } elsif (m?(http://images.amazon.com/images/./$ASIN[\.\w]+\.jpg)?) { $pic = $1; system("wget $pic -O ASIN-$ASIN.jpg"); system("djpeg ASIN-$ASIN.jpg | ppmquant 256 | ppmtogif >ASIN-$ASIN.gif"); } else { die "Sorry: no picture! Aborting.\n"; } print "Picture:\n\t$pic\n\n"; system("xview ASIN-$ASIN.gif &"); # get title / artist: /(.*?)<\/b>.*?(.*?)<\/a>/; $artist=&restore($2); $title=&restore($1); print "Artist:\n\t$artist\n\n"; print "Title:\n\t$title\n\n"; # # NB: Amazon uses upper/lower cases interchangeably # # get track listing (/Listen to Samples.*?(.*?)<\/table>/i) || die "Can't get track listing!\n"; $_ = $1; # extract table contents s/>Listen/ /g; # remove tags $_=&restore($_); s/\s*[0-9]+\.\s*/\n/g; # break into lines s/^\s*//; # trim leading space open(OUT, ">ASIN-$ASIN.txt") || die "can't open file \`ASIN-$ASIN.txt' for output.\n"; print OUT "$_\n"; close(OUT); s/\n/\n\t/g; # format for display print "Tracks:\n\t$_\n\n"; # escape doublequotes for safety: $artist =~ s/\"/\\\"/g; $title =~ s/\"/\\\"/g; system("cdcover \"$artist\" \"$title\" \"ASIN-$ASIN.txt\" \"ASIN-$ASIN.gif\""); $title =~ s/ //g; # remove spaces from filename (as does cdcover) system("gv \"$title.ps\"");