If there was extra time I was going to talk about my googling current awareness program. I thought it was a simple but effective use of SOAP. If you want to run it, download it. 1 #!/usr/bin/perl 2 3 =head 1 NAME 4 5 googling - current awareness via email 6 7 =head1 SYNOPSIS 8 9 googling --query='funkadelic' --email='george@clinton.com' 10 --key='IXJGrPF_THIS_WON'T_WORK_1bB8+FCO'; 11 12 =head1 DESCRIPTION 13 14 googling will perform a search of Google and send results to an email 15 or to STDOUT if no email is given. 16 17 =head1 OPTIONS 18 19 =head2 --query 20 21 You must pass this in since it's the query you want to run against google. 22 23 =head2 --email 24 25 If you'd like to have results sent via email use this option. You can use 26 more than once if you'd like the report to go to more than one address. 27 If not supplied results will go to STDOUT. It's a useful option if you want 28 (as I did) to run the query from cron. 29 30 =head2 --db 31 32 Specify the sqlite database to use. If not supplied it defaults to 33 googling.db in the pwd. 34 35 =head2 --key 36 37 Specify your Google API Key. See http://www.google.com/apis/ to get your 38 own. 39 40 =head2 --limit 41 42 If you'd like to stop checking google after a certain amount of hits use 43 this option. Default is 100. 44 45 =head2 --debug 46 47 See diagnostic information. 48 49 =head2 --help 50 51 See this message. 52 53 =head1 SEE ALSO 54 55 =head1 AUTHOR 56 57 =over 4 58 59 =item Ed Summers E<lt>ehs@pobox.comE<gt> 60 61 =back 62 63 =cut 64 65 use strict; 66 use warnings; 67 use Getopt::Long; 68 use Pod::Usage; 69 use SOAP::Lite; 70 use DBI; 71 use Mail::Send; 72 73 ## gather options 74 my ( $limit, $query, $db, $debug, $help, $googleId, @emails ); 75 GetOptions( 76 'limit:i' => \$limit, 77 'query:s' => \$query, 78 'email:s' => \@emails, 79 'key:s' => \$googleId, 80 'db:s' => \$db, 81 'debug!' => \$debug, 82 'help!' => \$help, 83 ); 84 85 if ( !$query or !$googleId or $help ) { pod2usage( 1 ); } 86 87 $limit = 100 if !$limit; 88 $db = 'googling.db' if ! $db; 89 90 91 ## database access to remember what results have been seen 92 ## in previous runs 93 my $dbh = getDbh( $db ); 94 my $select = $dbh->prepare( qq( 95 select count(*) from seen where url = ? and query = ? 96 ) ); 97 my $insert = $dbh->prepare( qq( 98 insert into seen (url,query) values(?,?) 99 ) ); 100 101 102 ## build the variables that we are going to pass to our SOAP request 103 104 SOAP::Data->import('name'); 105 106 107 my $key = name('key')->value( $googleId ); 108 my $q = name('q',$query)->type('string'); 109 my $maxResults = name('maxResults',10)->type('int'); 110 my $filter = name('filter',1)->type('boolean'); 111 my $restrict = name('restrict','')->type('string'); 112 my $safeSearch = name('safeSearch',0)->type('boolean'); 113 my $langRestrict = name('lr','')->type('string'); 114 my $inputEncoding = name('ie','')->type('string'); 115 my $outputEncoding = name('oe','')->type('string'); 116 117 ## storage variables 118 my @results = (); 119 my $more = 1; 120 my $startElement = 0; 121 my $results; 122 123 ## while we haven't exceeded the citation limit 124 while ( $startElement < $limit ) { 125 126 ## create start element based on the last result record we read in 127 ## google limits query results to a set of 10 at a time 128 my $start = name('start',$startElement)->type('int'); 129 130 ## issue the query 131 my $response = SOAP::Lite 132 -> uri( 'urn:GoogleSearch' ) 133 -> proxy( 'http://api.google.com/search/beta2' ) 134 -> doGoogleSearch( $key, $q, $start, $maxResults, $filter, 135 $restrict, $safeSearch, $langRestrict, $inputEncoding, 136 $outputEncoding ); 137 138 ## if we didn't get any more results we're done 139 last if ( $startElement == $response->result->{ endIndex } ); 140 141 ## go through each item in the result set 142 foreach my $item ( @{ $response->result()->{ resultElements } } ) { 143 144 ## look in the db to see if we've seen this url for this query before 145 $select->execute( $item->{ URL }, $query ); 146 my ( $result ) = $select->fetchrow_array(); 147 148 ## if it's a new url add it to the database (so we don't 149 ## report it again), and add it to the results 150 if ( ! $result ) { 151 $insert->execute( $item->{ URL }, $query ); 152 if ( !@emails ) { 153 print $item->{ URL }, "\n", $item->{ snippet }, "\n", "\n"; 154 } else { 155 push( @results, [ $item->{ URL }, $item->{ snippet } ] ); 156 } 157 } 158 159 } 160 161 ## set the next start element to the index of the last item in this 162 ## result set 163 $startElement = $response->result->{ endIndex }; 164 165 } 166 167 ## if new results were found, and we have been asked to send an email 168 if ( @results and @emails ) { 169 my $msg = Mail::Send->new(); 170 $msg->to( join( ',', @emails ) ); 171 $msg->subject( "googling: $query" ); 172 my $fh = $msg->open( 'qmail' ); 173 my $count = 1; 174 foreach ( @results ) { 175 $fh->print( "[$count] ", $_->[0], "\n" ); 176 if ( $_->[1] ) { $fh->print( $_->[1], "\n" ); } 177 $fh->print( "\n" ); 178 $count++; 179 } 180 $fh->close(); 181 } 182 183 ## disconnect from db 184 $select->finish(); 185 $insert->finish(); 186 $dbh->disconnect(); 187 188 189 ## done 190 191 192 sub getDbh { 193 my $db = shift; 194 ## if the database file exists attempt to connect to it 195 if ( -f $db ) { 196 return( DBI->connect( "dbi:SQLite:$db" ) ); 197 } 198 ## otherwise create the datbase, and table 199 my $dbh = DBI->connect( "dbi:SQLite:$db" ); 200 $dbh->do( 'create table seen (url varchar(500), query varchar(500))' ); 201 return( $dbh ); 202 } 203 204 |