#!/bin/bash
# upside.sh - by stoner.savant and murfie
# cli automation for discovering hostnames when
# zone transfers are disabled.
# Note: This does not use the Google API and is a "Proof of Concept"
# only!!! Using this tool on google.com violates Google's Terms of Service.
# Consider yourself warned.
#
# Greets to everyone at JIHS dot com, Kujo and Lance James
#
# version 0.042 - bugfix for forbidden agent (murf)
# version 0.041 - bugfix for number of pages, using perl regexp (golfo)
# version 0.040 - guess the number of pages (murf)
# version 0.037 - a little less hardcoded stuff (murf)
# version 0.036 - added "are you sure" for no proxy (murf)
# version 0.035 - experimental command line proxy use (stoner)
# version 0.030 - added proxy support (stoner)
# version 0.020 - added $TARGET parameter for domain name (murf)
# version 0.010 - stoner !
# not sure if we need to set referers..
REFERER=""
# Not all user agents are accepted, so try to emulate a real browser with the agent string !
# Thanks to JimmyNeutron and J0hnny for pointing this out
#AGENT="Lynx/2.7.1 libwww-FM/2.14" # NOTE This agent is forbidden by Google since june 2005
AGENT="Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.8) Gecko/20050513 Firefox/1.0.4 (Debian package 1.0.4-1)"
#### TARGET ####
if [ "$1" == "" ] ; then
echo "usage: $0 domain.tld [proxy:port]"
exit
else
TARGET=$1
fi;
#### PROXY ####
if [ "$2" == "" ] ; then
read -p "Are you sure you want to continue without a proxy ? (y/N) : "
if [ "$REPLY" == "" ] ; then
echo Bailing out ..
exit
elif [ "$REPLY" == "n" ] ; then
echo Bailing out ..
exit
elif [ "$REPLY" == "y" ] ; then
echo Ok, no proxies set, continuing ..
fi
else
PROXY="$2"
#uncomment if you want to see this in the output
#echo Proxy set to : $PROXY
fi
#### GET GOOGLE PAGES ####
QUERY="http://www.google.com/search?q=site:$TARGET+-site:www.$TARGET&num=100&hl=en&lr=&start=$x00&sa=N&btnG=Google+Search"
#echo DEBUG: QUERY=$QUERY
#echo DEBUG: PROXY=$PROXY
#echo DEBUG: REFERER=$REFERER
#echo DEBUG: AGENT=$AGENT
curl -x "$PROXY" -L -A "$AGENT" -s "$QUERY" > googleresult.txt
# REGEXP EXAMPLE SYNTAX (watch the "about" word !)
#
# Results 1 - 4 of 4 from geek-punk.net for -site:www.geek-punk.net
# Results 1 - 35 of about 76 from philips.nl for -site:www.philips.nl
# Results 1 - 100 of about 166,000 from philips.com for -site:www.philips.com
RESULTS=`cat googleresult.txt | perl -e 'while ($SNIPPET=) { $SNIPPET =~ m#Results .*? - \d+ of (about |)(.*?) from .*? for -site:www.*?#igm ; if (defined ($2)) {print "$2\n"; exit;} }'`
if [ "$RESULTS" == "" ] ; then
echo ERROR: something broke, sorry.
echo but you get to keep both pieces :\)
exit
fi
RESULTCOUNT=`echo $RESULTS | sed -e 's/,//g' `
echo $RESULTCOUNT results found
PAGES=`expr $RESULTCOUNT \/ 100`
#### GET GOOGLE DNS LIST ####
echo "Listing unique domain names (it may take a little while):"
x=0
while [ $x -le $PAGES ];
if test $x -gt 9
then
exit ;
fi
do
QUERY="http://www.google.com/search?q=site:$TARGET+-site:www.$TARGET&num=100&hl=en&lr=&start=\"$x00\"&sa=N&btnG=Google+Search"
curl -x "$PROXY" -L -A "$AGENT" -e "$REFERER" -s "$QUERY"
x=$((x+1))
done |grep -E '(http://|https://)*(([-_[:alnum:]]+\.)+(com|net|org|edu|gov|fr|nl|de|be|uk|us|au)[/(-_[:alnum:])]*)' -o |tr \/ "\n" |grep $TARGET |sort -u