first commit
This commit is contained in:
@@ -0,0 +1,170 @@
|
||||
# This file contains a list, one per line, of regular expressions to be applied
|
||||
# to browser user agents to determine whether or not the browser is a "bot" or
|
||||
# not (i.e. should not be counted in article view counts). If any regexp
|
||||
# matches, the browser will be considered a bot.
|
||||
|
||||
/008/
|
||||
/ABACHOBot/
|
||||
/Accoona\-AI\-Agent/
|
||||
/AddSugarSpiderBot/
|
||||
/AnyApexBot/
|
||||
/Arachmo/
|
||||
/B\-l\-i\-t\-z\-B\-O\-T/
|
||||
/Baiduspider/
|
||||
/BecomeBot/
|
||||
/BeslistBot/
|
||||
/BillyBobBot/
|
||||
/Bimbot/
|
||||
/[Bb]ingbot/
|
||||
/Blekkobot/
|
||||
/BlitzBOT/
|
||||
/boitho.com\-dc/
|
||||
/boitho.com\-robot/
|
||||
/btbot/
|
||||
/CatchBot/
|
||||
/Cerberian Drtrs/
|
||||
/Charlotte/
|
||||
/ConveraCrawler/
|
||||
/cosmos/
|
||||
/Covario IDS/
|
||||
/DataparkSearch/
|
||||
/DiamondBot/
|
||||
/Discobot/
|
||||
/Dotbot/
|
||||
/EsperanzaBot/
|
||||
/Exabot/
|
||||
/FAST Enterprise Crawler/
|
||||
/FAST\-WebCrawler/
|
||||
/FDSE robot/
|
||||
/FindLinks/
|
||||
/FurlBot/
|
||||
/FyberSpider/
|
||||
/g2crawler/
|
||||
/Gaisbot/
|
||||
/GalaxyBot/
|
||||
/genieBot/
|
||||
/Gigabot/
|
||||
/Girafabot/
|
||||
/Google[Bb]ot/
|
||||
/gsa\-crawler/
|
||||
/GurujiBot/
|
||||
/HappyFunBot/
|
||||
/Holmes/
|
||||
/htdig/
|
||||
/iaskspider/
|
||||
/ia_archiver/
|
||||
/iCCrawler/
|
||||
/ichiro/
|
||||
/igdeSpyder/
|
||||
/IRLbot/
|
||||
/IssueCrawler/
|
||||
/Jaxified Bot/
|
||||
/Jyxobot/
|
||||
/KoepaBot/
|
||||
/LapozzBot/
|
||||
/Larbin/
|
||||
/LDSpider/
|
||||
/LexxeBot/
|
||||
/Linguee Bot/
|
||||
/LinkWalker/
|
||||
/lmspider/
|
||||
/LOCKSS/
|
||||
/lwp\-trivial/
|
||||
/mabontland/
|
||||
/magpie\-crawler/
|
||||
/Mediapartners\-Google/
|
||||
/MJ12bot/
|
||||
/MLBot/
|
||||
/Mnogosearch/
|
||||
/mogimogi/
|
||||
/MojeekBot/
|
||||
/Moreoverbot/
|
||||
/Morning Paper/
|
||||
/msnbot/
|
||||
/MSRBot/
|
||||
/MVAClient/
|
||||
/mxbot/
|
||||
/NetResearchServer/
|
||||
/NetSeer Crawler/
|
||||
/NewsGator/
|
||||
/NG\-Search/
|
||||
/nicebot/
|
||||
/noxtrumbot/
|
||||
/Nusearch Spider/
|
||||
/NutchCVS/
|
||||
/Nymesis/
|
||||
/obot/
|
||||
/oegp/
|
||||
/omgilibot/
|
||||
/OmniExplorer_Bot/
|
||||
/OOZBOT/
|
||||
/Orbiter/
|
||||
/PageBitesHyperBot/
|
||||
/Peew/
|
||||
/polybot/
|
||||
/Pompos/
|
||||
/PostPost/
|
||||
/Psbot/
|
||||
/PycURL/
|
||||
/Qseero/
|
||||
/Radian6/
|
||||
/RAMPyBot/
|
||||
/RufusBot/
|
||||
/SandCrawler/
|
||||
/SBIder/
|
||||
/ScoutJet/
|
||||
/Scrubby/
|
||||
/SearchSight/
|
||||
/Seekbot/
|
||||
/semanticdiscovery/
|
||||
/Sensis Web Crawler/
|
||||
/SeznamBot/
|
||||
/Shim\-Crawler/
|
||||
/ShopWiki/
|
||||
/Shoula robot/
|
||||
/silk/
|
||||
/Sitebot/
|
||||
/Snappy/
|
||||
/sogou spider/
|
||||
/Sosospider/
|
||||
/Speedy Spider/
|
||||
/Sqworm/
|
||||
/StackRambler/
|
||||
/suggybot/
|
||||
/SurveyBot/
|
||||
/SynooBot/
|
||||
/Teoma/
|
||||
/TerrawizBot/
|
||||
/TheSuBot/
|
||||
/Thumbnail.CZ robot/
|
||||
/TinEye/
|
||||
/truwoGPS/
|
||||
/TurnitinBot/
|
||||
/TweetedTimes Bot/
|
||||
/TwengaBot/
|
||||
/Twitterbot/
|
||||
/Urlfilebot/
|
||||
/Vagabondo/
|
||||
/VoilaBot/
|
||||
/Vortex/
|
||||
/voyager/
|
||||
/VYU2/
|
||||
/webcollage/
|
||||
/Websquash.com/
|
||||
/wf84/
|
||||
/WoFindeIch Robot/
|
||||
/WomlpeFactory/
|
||||
/Xaldon_WebSpider/
|
||||
/yacy/
|
||||
/Yahoo! Slurp/
|
||||
/YahooSeeker/
|
||||
/Yandex/
|
||||
/Yasaklibot/
|
||||
/Yeti/
|
||||
/YodaoBot/
|
||||
/yoogliFetchAgent/
|
||||
/YoudaoBot/
|
||||
/Zao/
|
||||
/Zealbot/
|
||||
/zspider/
|
||||
/ZyBorg/
|
||||
@@ -0,0 +1,574 @@
|
||||
# Stopword list taken from MySQL
|
||||
# (which is based on one from SMART ftp://ftp.cs.cornell.edu/pub/smart/smart.11.0.tar.Z)
|
||||
# Currently contains English stopwords only
|
||||
|
||||
a
|
||||
a's
|
||||
able
|
||||
about
|
||||
above
|
||||
according
|
||||
accordingly
|
||||
across
|
||||
actually
|
||||
after
|
||||
afterwards
|
||||
again
|
||||
against
|
||||
ain't
|
||||
all
|
||||
allow
|
||||
allows
|
||||
almost
|
||||
alone
|
||||
along
|
||||
already
|
||||
also
|
||||
although
|
||||
always
|
||||
am
|
||||
among
|
||||
amongst
|
||||
an
|
||||
and
|
||||
another
|
||||
any
|
||||
anybody
|
||||
anyhow
|
||||
anyone
|
||||
anything
|
||||
anyway
|
||||
anyways
|
||||
anywhere
|
||||
apart
|
||||
appear
|
||||
appreciate
|
||||
appropriate
|
||||
are
|
||||
aren't
|
||||
around
|
||||
as
|
||||
aside
|
||||
ask
|
||||
asking
|
||||
associated
|
||||
at
|
||||
available
|
||||
away
|
||||
awfully
|
||||
b
|
||||
be
|
||||
became
|
||||
because
|
||||
become
|
||||
becomes
|
||||
becoming
|
||||
been
|
||||
before
|
||||
beforehand
|
||||
behind
|
||||
being
|
||||
believe
|
||||
below
|
||||
beside
|
||||
besides
|
||||
best
|
||||
better
|
||||
between
|
||||
beyond
|
||||
both
|
||||
brief
|
||||
but
|
||||
by
|
||||
c
|
||||
c'mon
|
||||
c's
|
||||
came
|
||||
can
|
||||
can't
|
||||
cannot
|
||||
cant
|
||||
cause
|
||||
causes
|
||||
certain
|
||||
certainly
|
||||
changes
|
||||
clearly
|
||||
co
|
||||
com
|
||||
come
|
||||
comes
|
||||
concerning
|
||||
consequently
|
||||
consider
|
||||
considering
|
||||
contain
|
||||
containing
|
||||
contains
|
||||
corresponding
|
||||
could
|
||||
couldn't
|
||||
course
|
||||
currently
|
||||
d
|
||||
definitely
|
||||
described
|
||||
despite
|
||||
did
|
||||
didn't
|
||||
different
|
||||
do
|
||||
does
|
||||
doesn't
|
||||
doing
|
||||
don't
|
||||
done
|
||||
down
|
||||
downwards
|
||||
during
|
||||
e
|
||||
each
|
||||
edu
|
||||
eg
|
||||
eight
|
||||
either
|
||||
else
|
||||
elsewhere
|
||||
enough
|
||||
entirely
|
||||
especially
|
||||
et
|
||||
etc
|
||||
even
|
||||
ever
|
||||
every
|
||||
everybody
|
||||
everyone
|
||||
everything
|
||||
everywhere
|
||||
ex
|
||||
exactly
|
||||
example
|
||||
except
|
||||
f
|
||||
far
|
||||
few
|
||||
fifth
|
||||
first
|
||||
five
|
||||
followed
|
||||
following
|
||||
follows
|
||||
for
|
||||
former
|
||||
formerly
|
||||
forth
|
||||
four
|
||||
from
|
||||
further
|
||||
furthermore
|
||||
g
|
||||
get
|
||||
gets
|
||||
getting
|
||||
given
|
||||
gives
|
||||
go
|
||||
goes
|
||||
going
|
||||
gone
|
||||
got
|
||||
gotten
|
||||
greetings
|
||||
h
|
||||
had
|
||||
hadn't
|
||||
happens
|
||||
hardly
|
||||
has
|
||||
hasn't
|
||||
have
|
||||
haven't
|
||||
having
|
||||
he
|
||||
he's
|
||||
hello
|
||||
help
|
||||
hence
|
||||
her
|
||||
here
|
||||
here's
|
||||
hereafter
|
||||
hereby
|
||||
herein
|
||||
hereupon
|
||||
hers
|
||||
herself
|
||||
hi
|
||||
him
|
||||
himself
|
||||
his
|
||||
hither
|
||||
hopefully
|
||||
how
|
||||
howbeit
|
||||
however
|
||||
i
|
||||
i'd
|
||||
i'll
|
||||
i'm
|
||||
i've
|
||||
ie
|
||||
if
|
||||
ignored
|
||||
immediate
|
||||
in
|
||||
inasmuch
|
||||
inc
|
||||
indeed
|
||||
indicate
|
||||
indicated
|
||||
indicates
|
||||
inner
|
||||
insofar
|
||||
instead
|
||||
into
|
||||
inward
|
||||
is
|
||||
isn't
|
||||
it
|
||||
it'd
|
||||
it'll
|
||||
it's
|
||||
its
|
||||
itself
|
||||
j
|
||||
just
|
||||
k
|
||||
keep
|
||||
keeps
|
||||
kept
|
||||
know
|
||||
knows
|
||||
known
|
||||
l
|
||||
last
|
||||
lately
|
||||
later
|
||||
latter
|
||||
latterly
|
||||
least
|
||||
less
|
||||
lest
|
||||
let
|
||||
let's
|
||||
like
|
||||
liked
|
||||
likely
|
||||
little
|
||||
look
|
||||
looking
|
||||
looks
|
||||
ltd
|
||||
m
|
||||
mainly
|
||||
many
|
||||
may
|
||||
maybe
|
||||
me
|
||||
mean
|
||||
meanwhile
|
||||
merely
|
||||
might
|
||||
more
|
||||
moreover
|
||||
most
|
||||
mostly
|
||||
much
|
||||
must
|
||||
my
|
||||
myself
|
||||
n
|
||||
name
|
||||
namely
|
||||
nd
|
||||
near
|
||||
nearly
|
||||
necessary
|
||||
need
|
||||
needs
|
||||
neither
|
||||
never
|
||||
nevertheless
|
||||
new
|
||||
next
|
||||
nine
|
||||
no
|
||||
nobody
|
||||
non
|
||||
none
|
||||
noone
|
||||
nor
|
||||
normally
|
||||
not
|
||||
nothing
|
||||
novel
|
||||
now
|
||||
nowhere
|
||||
o
|
||||
obviously
|
||||
of
|
||||
off
|
||||
often
|
||||
oh
|
||||
ok
|
||||
okay
|
||||
old
|
||||
on
|
||||
once
|
||||
one
|
||||
ones
|
||||
only
|
||||
onto
|
||||
or
|
||||
other
|
||||
others
|
||||
otherwise
|
||||
ought
|
||||
our
|
||||
ours
|
||||
ourselves
|
||||
out
|
||||
outside
|
||||
over
|
||||
overall
|
||||
own
|
||||
p
|
||||
particular
|
||||
particularly
|
||||
per
|
||||
perhaps
|
||||
placed
|
||||
please
|
||||
plus
|
||||
possible
|
||||
presumably
|
||||
probably
|
||||
provides
|
||||
q
|
||||
que
|
||||
quite
|
||||
qv
|
||||
r
|
||||
rather
|
||||
rd
|
||||
re
|
||||
really
|
||||
reasonably
|
||||
regarding
|
||||
regardless
|
||||
regards
|
||||
relatively
|
||||
respectively
|
||||
right
|
||||
s
|
||||
said
|
||||
same
|
||||
saw
|
||||
say
|
||||
saying
|
||||
says
|
||||
second
|
||||
secondly
|
||||
see
|
||||
seeing
|
||||
seem
|
||||
seemed
|
||||
seeming
|
||||
seems
|
||||
seen
|
||||
self
|
||||
selves
|
||||
sensible
|
||||
sent
|
||||
serious
|
||||
seriously
|
||||
seven
|
||||
several
|
||||
shall
|
||||
she
|
||||
should
|
||||
shouldn't
|
||||
since
|
||||
six
|
||||
so
|
||||
some
|
||||
somebody
|
||||
somehow
|
||||
someone
|
||||
something
|
||||
sometime
|
||||
sometimes
|
||||
somewhat
|
||||
somewhere
|
||||
soon
|
||||
sorry
|
||||
specified
|
||||
specify
|
||||
specifying
|
||||
still
|
||||
sub
|
||||
such
|
||||
sup
|
||||
sure
|
||||
t
|
||||
t's
|
||||
take
|
||||
taken
|
||||
tell
|
||||
tends
|
||||
th
|
||||
than
|
||||
thank
|
||||
thanks
|
||||
thanx
|
||||
that
|
||||
that's
|
||||
thats
|
||||
the
|
||||
their
|
||||
theirs
|
||||
them
|
||||
themselves
|
||||
then
|
||||
thence
|
||||
there
|
||||
there's
|
||||
thereafter
|
||||
thereby
|
||||
therefore
|
||||
therein
|
||||
theres
|
||||
thereupon
|
||||
these
|
||||
they
|
||||
they'd
|
||||
they'll
|
||||
they're
|
||||
they've
|
||||
think
|
||||
third
|
||||
this
|
||||
thorough
|
||||
thoroughly
|
||||
those
|
||||
though
|
||||
three
|
||||
through
|
||||
throughout
|
||||
thru
|
||||
thus
|
||||
to
|
||||
together
|
||||
too
|
||||
took
|
||||
toward
|
||||
towards
|
||||
tried
|
||||
tries
|
||||
truly
|
||||
try
|
||||
trying
|
||||
twice
|
||||
two
|
||||
u
|
||||
un
|
||||
under
|
||||
unfortunately
|
||||
unless
|
||||
unlikely
|
||||
until
|
||||
unto
|
||||
up
|
||||
upon
|
||||
us
|
||||
use
|
||||
used
|
||||
useful
|
||||
uses
|
||||
using
|
||||
usually
|
||||
v
|
||||
value
|
||||
various
|
||||
very
|
||||
via
|
||||
viz
|
||||
vs
|
||||
w
|
||||
want
|
||||
wants
|
||||
was
|
||||
wasn't
|
||||
way
|
||||
we
|
||||
we'd
|
||||
we'll
|
||||
we're
|
||||
we've
|
||||
welcome
|
||||
well
|
||||
went
|
||||
were
|
||||
weren't
|
||||
what
|
||||
what's
|
||||
whatever
|
||||
when
|
||||
whence
|
||||
whenever
|
||||
where
|
||||
where's
|
||||
whereafter
|
||||
whereas
|
||||
whereby
|
||||
wherein
|
||||
whereupon
|
||||
wherever
|
||||
whether
|
||||
which
|
||||
while
|
||||
whither
|
||||
who
|
||||
who's
|
||||
whoever
|
||||
whole
|
||||
whom
|
||||
whose
|
||||
why
|
||||
will
|
||||
willing
|
||||
wish
|
||||
with
|
||||
within
|
||||
without
|
||||
won't
|
||||
wonder
|
||||
would
|
||||
would
|
||||
wouldn't
|
||||
x
|
||||
y
|
||||
yes
|
||||
yet
|
||||
you
|
||||
you'd
|
||||
you'll
|
||||
you're
|
||||
you've
|
||||
your
|
||||
yours
|
||||
yourself
|
||||
yourselves
|
||||
z
|
||||
zero
|
||||
Reference in New Issue
Block a user