Current File : //etc/mail/spamassassin/70_sare_uri1.cf
# SARE Spammer URI Rule Set for SpamAssassin - file 1
# Version:  01.01.05
# Created:  2004-09-13
# Modified: 2005-10-10
# Usage instructions and documentation are found in 70_sare_uri0.cf 
#@@# Revision History:  Full Revision History stored in 70_sare_uri.log
#@@# 01.01.03: Oct 05 2005 
#@@#           Minor score updates based on additional mass-check
#@@#           Renamed __SARE_BODY_BLANKS_5_100 to __SARE_BODY_BLNK_5_100 
#@@#           Added to file 1:                      SARE_URI_REFI
#@@#           Moved file 0 to file 1:               SARE_URI_PRIME
#@@#           Moved file 1 to file 4:               SARE_URI_CAMPAIGNID
#@@#           Moved file 1 to file 4:               SARE_URI_CASINO
#@@#           Moved file 1 to file x31:             SARE_URI_MIXED_CASE
#@@# 01.01.04: Oct 05 2005 
#@@#           Corrected lint error in SARE_URI_GEOCIT_NUM
#@@# 01.01.05: Oct 10 2005 
#@@#           Temp disable SARE_URI_SIXCAPS due to http://bugzilla.spamassassin.org/show_bug.cgi?id=4621

# License:  Artistic - see http://www.rulesemporium.com/license.txt
# Current   Maintainer: Bob Menschel - uri@rulesemporium.com
# Current   Home: http://www.rulesemporium.com/rules/70_sare_uri1.cf

########  ######################   ##################################################
#  Rule definitions to avoid --lint errors on archived/moved rules. 
########  ######################   ##################################################

meta      SARE_URI_CAMPAIGNID      0
meta      SARE_URI_CASINO          0
meta      SARE_URI_MIXED_CASE      0

########  ######################   ##################################################
#    Category:  Sub-rules needed by others
########  ######################   ##################################################

uri       __SARE_URI_ANY           /./
#hist     __SARE_URI_ANY           Murty Rompalli, 2005-01-03
body      __SARE_BODY_BLNK_5_100   eval:check_blank_line_ratio('5','100')
#hist     __SARE_BODY_BLNK_5_100   Murty Rompalli, 2005-01-03
meta      __SARE_META_MURTY3       (__SARE_URI_ANY && __SARE_BODY_BLNK_5_100)
#hist     __SARE_META_MURTY3       Murty Rompalli, 2005-01-03

########  ######################   ##################################################
#    Category:  URI links identified by spammer words
########  ######################   ##################################################

uri       SARE_URI_4_BIZ           /4.{0,24}\.biz/i
describe  SARE_URI_4_BIZ           Domain has a "four-you" type domain name
score     SARE_URI_4_BIZ           0.144
#hist     SARE_URI_4_BIZ           Fred Tarasevicius - FU_4_BIZ
#ham      SARE_URI_4_BIZ           40iseinc.biz
#counts   SARE_URI_4_BIZ           220s/155h of 689155 corpus (348140s/341015h RM) 09/18/05
#max      SARE_URI_4_BIZ           827s/1h of 114212 corpus (81067s/33145h RM) 01/19/05
#counts   SARE_URI_4_BIZ           147s/0h of 54828 corpus (17650s/37178h JH-3.01) 03/13/05
#counts   SARE_URI_4_BIZ           37s/1h of 57287 corpus (52272s/5015h MY) 09/22/05
#max      SARE_URI_4_BIZ           67s/1h of 26190 corpus (22790s/3400h MY) 02/15/05
#counts   SARE_URI_4_BIZ           0s/0h of 682 corpus (290s/392h CRF) 02/16/05
#counts   SARE_URI_4_BIZ           12s/0h of 10629 corpus (5847s/4782h CT) 09/18/05
#max      SARE_URI_4_BIZ           60s/0h of 11015 corpus (6587s/4428h CT) 03/10/05
#counts   SARE_URI_4_BIZ           0s/3h of 7500 corpus (1767s/5733h ft) 09/18/05

uri       SARE_URI_ANUMA           /\.[a-z]{4,}\d{4,}[a-z]{4,}\.(?:com|net|biz|info|org)/i
describe  SARE_URI_ANUMA           Domain with ALPHAs NUMBERs APLHAs
score     SARE_URI_ANUMA           0.632
#ham      SARE_URI_ANUMA           studio1509fineart.com
#hist     SARE_URI_ANUMA           Created by Chris Santerre Aug 31 2004
#counts   SARE_URI_ANUMA           76s/8h of 689155 corpus (348140s/341015h RM) 09/18/05
#max      SARE_URI_ANUMA           443s/0h of 70699 corpus (43133s/27566h RM) 10/02/04
#counts   SARE_URI_ANUMA           4s/0h of 57287 corpus (52272s/5015h MY) 09/22/05
#max      SARE_URI_ANUMA           88s/0h of 19448 corpus (16862s/2586h MY) 08/31/04
#counts   SARE_URI_ANUMA           36s/0h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05
#max      SARE_URI_ANUMA           117s/0h of 38753 corpus (15271s/23482h JH-SA3.0rc1) 09/03/04
#counts   SARE_URI_ANUMA           0s/0h of 682 corpus (290s/392h CRF) 02/16/05
#counts   SARE_URI_ANUMA           8s/0h of 10629 corpus (5847s/4782h CT) 09/18/05
#max      SARE_URI_ANUMA           12s/0h of 11015 corpus (6587s/4428h CT) 03/10/05

uri       SARE_URI_BARGAIN         /bargain/i
describe  SARE_URI_BARGAIN         URL has common spammer word
score     SARE_URI_BARGAIN         0.634
#hist     SARE_URI_BARGAIN         FU_BARGAIN
#ham      SARE_URI_BARGAIN         "smart bargains" in fwd of FamilyCorner.com Magazine 
#counts   SARE_URI_BARGAIN         583s/50h of 689155 corpus (348140s/341015h RM) 09/18/05
#counts   SARE_URI_BARGAIN         33s/3h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05
#max      SARE_URI_BARGAIN         224s/3h of 44759 corpus (16528s/28231h JH-SA3.0rc1) 09/06/04
#counts   SARE_URI_BARGAIN         160s/0h of 57287 corpus (52272s/5015h MY) 09/22/05
#counts   SARE_URI_BARGAIN         0s/0h of 682 corpus (290s/392h CRF) 02/16/05
#counts   SARE_URI_BARGAIN         2s/1h of 10629 corpus (5847s/4782h CT) 09/18/05
#max      SARE_URI_BARGAIN         23s/1h of 11015 corpus (6587s/4428h CT) 03/10/05

uri       SARE_URI_DEALZ           /dealz/i
describe  SARE_URI_DEALZ           spam contains misspelled URI word
score     SARE_URI_DEALZ           1.666 
#hist     SARE_URI_DEALZ           Created by Bob Menschel May 16 2004
#ham      SARE_URI_DEALZ           www.slickdealz.net, NYTimes.com Sunday, January 02, 2005
#counts   SARE_URI_DEALZ           5063s/1h of 689155 corpus (348140s/341015h RM) 09/18/05
#counts   SARE_URI_DEALZ           505s/0h of 57287 corpus (52272s/5015h MY) 09/22/05
#max      SARE_URI_DEALZ           77s/0h of 19448 corpus (16863s/2585h MY) 09/06/04
#counts   SARE_URI_DEALZ           26s/0h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05
#max      SARE_URI_DEALZ           218s/0h of 44759 corpus (16528s/28231h JH-SA3.0rc1) 09/06/04
#counts   SARE_URI_DEALZ           0s/0h of 682 corpus (290s/392h CRF) 02/16/05
#counts   SARE_URI_DEALZ           1s/0h of 10629 corpus (5847s/4782h CT) 09/18/05
#max      SARE_URI_DEALZ           3s/1h of 11015 corpus (6587s/4428h CT) 03/10/05

uri       SARE_URI_GOOD            /(?!\.greatergood\.com)\.[a-z]{3,8}good\.(?:com|net|info|org|biz)/i
describe  SARE_URI_GOOD            spammer hint found in URI
score     SARE_URI_GOOD            0.164
#hist     SARE_URI_GOOD            Chris Santerre and Carl R. Friend, Feb 20 2005
#counts   SARE_URI_GOOD            108s/47h of 689155 corpus (348140s/341015h RM) 09/18/05
#counts   SARE_URI_GOOD            14s/1h of 54828 corpus (17650s/37178h JH-3.01) 03/13/05
#counts   SARE_URI_GOOD            27s/1h of 57287 corpus (52272s/5015h MY) 09/22/05
#max      SARE_URI_GOOD            89s/1h of 31513 corpus (27912s/3601h MY) 03/09/05
#counts   SARE_URI_GOOD            1s/0h of 11015 corpus (6587s/4428h CT) 03/10/05
#counts   SARE_URI_GOOD            1s/0h of 7500 corpus (1767s/5733h ft) 09/18/05

uri       SARE_URI_OEM             /\boem\b/i
describe  SARE_URI_OEM             body contains link to probable spammer page
score     SARE_URI_OEM             0.533
#hist     SARE_URI_OEM             Created by Bob Menschel Jun 6 7004
#counts   SARE_URI_OEM             100s/14h of 689155 corpus (348140s/341015h RM) 09/18/05
#max      SARE_URI_OEM             85s/2h of 175589 corpus (98978s/76611h RM) 02/14/05
#counts   SARE_URI_OEM             17s/0h of 57287 corpus (52272s/5015h MY) 09/22/05
#counts   SARE_URI_OEM             23s/0h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05
#counts   SARE_URI_OEM             0s/0h of 682 corpus (290s/392h CRF) 02/16/05
#counts   SARE_URI_OEM             4s/0h of 10629 corpus (5847s/4782h CT) 09/18/05

uri       SARE_URI_OFF             /\boff\.(?:htm|html|php|asp|pl|cgi|jsp)\b/i
describe  SARE_URI_OFF             Unsubscribe at this link
score     SARE_URI_OFF             0.056
#ham      SARE_URI_OFF             flowers.com
#hist     SARE_URI_OFF             Fred Tarasevicius - FU_PAGE_OFF
#counts   SARE_URI_OFF             6s/18h of 689155 corpus (348140s/341015h RM) 09/18/05
#max      SARE_URI_OFF             71s/0h of 114212 corpus (81067s/33145h RM) 01/19/05
#counts   SARE_URI_OFF             3s/0h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05
#counts   SARE_URI_OFF             9s/0h of 57287 corpus (52272s/5015h MY) 09/22/05
#counts   SARE_URI_OFF             0s/0h of 682 corpus (290s/392h CRF) 02/16/05
#counts   SARE_URI_OFF             0s/0h of 11015 corpus (6587s/4428h CT) 03/10/05

uri       SARE_URI_PILLS           /\bpill[sz]\b/i
describe  SARE_URI_PILLS           text references likely spammer
score     SARE_URI_PILLS           1.047
#hist     SARE_URI_PILLS           Created by Bob Menschel Apr 04 2004, added z Feb 2 2005
#hist     SARE_URI_PILLS           Bugzilla entry 3789, Sep 18 2004
#counts   SARE_URI_PILLS           128s/1h of 689155 corpus (348140s/341015h RM) 09/18/05
#max      SARE_URI_PILLS           2050s/0h of 115925 corpus (94616s/21309h RM) 05/01/04
#counts   SARE_URI_PILLS           27s/0h of 57287 corpus (52272s/5015h MY) 09/22/05
#max      SARE_URI_PILLS           262s/0h of 19448 corpus (16863s/2585h MY) 09/06/04
#counts   SARE_URI_PILLS           17s/0h of 54084 corpus (16906s/37178h JH-3.01) 03/02/05
#max      SARE_URI_PILLS           360s/0h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05
#counts   SARE_URI_PILLS           1s/0h of 682 corpus (290s/392h CRF) 02/16/05
#counts   SARE_URI_PILLS           4s/0h of 10629 corpus (5847s/4782h CT) 09/18/05

uri       SARE_URI_PRIME           m'/prime/'
describe  SARE_URI_PRIME           body contains link to known spammer
score     SARE_URI_PRIME           0.950 
#ham      SARE_URI_PRIME           confirmed (1) 
#hist     SARE_URI_PRIME           Created by Bob Menschel Aug 09 2004
#counts   SARE_URI_PRIME           7s/1h of 689155 corpus (348140s/341015h RM) 09/18/05
#max      SARE_URI_PRIME           191s/0h of 175589 corpus (98978s/76611h RM) 02/14/05
#counts   SARE_URI_PRIME           92s/0h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05
#counts   SARE_URI_PRIME           27s/0h of 57287 corpus (52272s/5015h MY) 09/22/05
#max      SARE_URI_PRIME           191s/0h of 31513 corpus (27912s/3601h MY) 03/09/05
#counts   SARE_URI_PRIME           0s/0h of 682 corpus (290s/392h CRF) 02/16/05
#counts   SARE_URI_PRIME           15s/0h of 10629 corpus (5847s/4782h CT) 09/18/05
#max      SARE_URI_PRIME           17s/0h of 11015 corpus (6587s/4428h CT) 03/10/05
#counts   SARE_URI_PRIME           1s/0h of 7500 corpus (1767s/5733h ft) 09/18/05

uri       SARE_URI_REFI            /refi\.(com|net|biz|net|us|ws)/i
describe  SARE_URI_REFI            somethingrefi
score     SARE_URI_REFI            1.666
#ham      SARE_URI_REFI            spammer URI spammed into non-spam but inadequately moderated mailing list
#hist     SARE_URI_REFI            Alex Broens, July 2005
#counts   SARE_URI_REFI            1752s/3h of 689155 corpus (348140s/341015h RM) 09/18/05
#counts   SARE_URI_REFI            16s/0h of 10629 corpus (5847s/4782h CT) 09/18/05
#counts   SARE_URI_REFI            2s/0h of 7500 corpus (1767s/5733h ft) 09/18/05
#counts   SARE_URI_REFI            223s/0h of 57287 corpus (52272s/5015h MY) 09/22/05

uri       SARE_URI_REPLICA         /\breplica/i
describe  SARE_URI_REPLICA         body contains link to probable spammer page
score     SARE_URI_REPLICA         1.634
#hist     SARE_URI_REPLICA         Fred Tarasevicius - FU_REPLICA
#counts   SARE_URI_REPLICA         872s/5h of 689155 corpus (348140s/341015h RM) 09/18/05
#max      SARE_URI_REPLICA         1285s/10h of 238365 corpus (112478s/125887h RM) 02/28/05
#counts   SARE_URI_REPLICA         162s/0h of 54828 corpus (17650s/37178h JH-3.01) 03/13/05
#max      SARE_URI_REPLICA         195s/0h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05
#counts   SARE_URI_REPLICA         111s/0h of 57287 corpus (52272s/5015h MY) 09/22/05
#counts   SARE_URI_REPLICA         2s/0h of 682 corpus (290s/392h CRF) 02/16/05
#counts   SARE_URI_REPLICA         18s/0h of 10629 corpus (5847s/4782h CT) 09/18/05
#max      SARE_URI_REPLICA         60s/0h of 11015 corpus (6587s/4428h CT) 03/10/05

uri       SARE_URI_RM              /\brm\.(?:htm|html|php|asp|pl|cgi|jsp)\b/i
describe  SARE_URI_RM              Unsubscribe at this link
score     SARE_URI_RM              1.666
#hist     SARE_URI_RM              Fred Tarasevicius - FU_PAGE_RM
#counts   SARE_URI_RM              6239s/10h of 689155 corpus (348140s/341015h RM) 09/18/05
#counts   SARE_URI_RM              548s/0h of 54828 corpus (17650s/37178h JH-3.01) 03/13/05
#counts   SARE_URI_RM              3s/0h of 57287 corpus (52272s/5015h MY) 09/22/05
#max      SARE_URI_RM              45s/0h of 26190 corpus (22790s/3400h MY) 02/15/05
#counts   SARE_URI_RM              2s/0h of 682 corpus (290s/392h CRF) 02/16/05
#counts   SARE_URI_RM              341s/0h of 10629 corpus (5847s/4782h CT) 09/18/05
#counts   SARE_URI_RM              3s/0h of 7500 corpus (1767s/5733h ft) 09/18/05

body      __SARE_URI_VISIT_US      /\bv(?:i|l|1)+s(?:i|l|1)t[_\W]+(?:us|our)(?:\S+\s+){1,4}(?:https?\:\/\/|www\.)/i
meta      SARE_URI_VISIT_US        (__SARE_URI_VISIT_US && __SARE_META_MURTY3)
describe  SARE_URI_VISIT_US        Visit us at this link
score     SARE_URI_VISIT_US        1.666
#hist     SARE_URI_VISIT_US        Murty Rompalli, 2005-01-03
#counts   SARE_URI_VISIT_US        3591s/6h of 689155 corpus (348140s/341015h RM) 09/18/05
#counts   SARE_URI_VISIT_US        158s/0h of 54828 corpus (17650s/37178h JH-3.01) 03/13/05
#counts   SARE_URI_VISIT_US        2s/0h of 57287 corpus (52272s/5015h MY) 09/22/05
#max      SARE_URI_VISIT_US        35s/0h of 26190 corpus (22790s/3400h MY) 02/15/05
#counts   SARE_URI_VISIT_US        1s/0h of 682 corpus (290s/392h CRF) 02/16/05
#counts   SARE_URI_VISIT_US        4s/0h of 10629 corpus (5847s/4782h CT) 09/18/05
#counts   SARE_URI_VISIT_US        2s/1h of 7500 corpus (1767s/5733h ft) 09/18/05

########  ######################   ##################################################
#    Category:  URI links identified by spammer names
########  ######################   ##################################################

uri       SARE_URI_ITEM            /item.{0,8}[^a-r,t-z]\.com/i
describe  SARE_URI_ITEM            Contains "item" in a URI
score     SARE_URI_ITEM            0.637
#hist     SARE_URI_ITEM            Carl R. Friend, Feb 24 2005
#hist     SARE_URI_ITEM            Bob Menschel, Oct 1 2005, added exclusion for single letter (not s) after item.
#counts   SARE_URI_ITEM            767s/63h of 689155 corpus (348140s/341015h RM) 09/18/05
#counts   SARE_URI_ITEM            6s/6h of 54828 corpus (17650s/37178h JH-3.01) 03/13/05
#counts   SARE_URI_ITEM            240s/0h of 57287 corpus (52272s/5015h MY) 09/22/05
#counts   SARE_URI_ITEM            16s/0h of 10629 corpus (5847s/4782h CT) 09/18/05
#counts   SARE_URI_ITEM            0s/1h of 7500 corpus (1767s/5733h ft) 09/18/05

uri       SARE_URI_MEDS            /(?!medscape.com)med[sz].{0,14}\.(?:com|biz|net|org|us|tv|info)/i
describe  SARE_URI_MEDS            domain selling meds
score     SARE_URI_MEDS            0.842
#stype    SARE_URI_MEDS            max:1.0
#hist     SARE_URI_MEDS            Created by Bob Menschel Aug 29 2004 from rules by Bob M & Fred T
#ham      SARE_URI_MEDS            medscape.com, modsociety.org DomesticPetmeds.com
#counts   SARE_URI_MEDS            1468s/37h of 689155 corpus (348140s/341015h RM) 09/18/05
#max      SARE_URI_MEDS            2657s/12h of 238365 corpus (112478s/125887h RM) 02/28/05
#counts   SARE_URI_MEDS            159s/1h of 57287 corpus (52272s/5015h MY) 09/22/05
#max      SARE_URI_MEDS            498s/1h of 19448 corpus (16863s/2585h MY) 09/06/04
#counts   SARE_URI_MEDS            590s/1h of 54828 corpus (17650s/37178h JH-3.01) 03/13/05
#max      SARE_URI_MEDS            657s/1h of 44759 corpus (16528s/28231h JH-SA3.0rc1) 09/06/04
#counts   SARE_URI_MEDS            13s/0h of 682 corpus (290s/392h CRF) 02/16/05
#counts   SARE_URI_MEDS            87s/0h of 10629 corpus (5847s/4782h CT) 09/18/05
#max      SARE_URI_MEDS            241s/1h of 11015 corpus (6587s/4428h CT) 03/10/05
#counts   SARE_URI_MEDS            6s/5h of 7500 corpus (1767s/5733h ft) 09/18/05

uri       __SARE_URI_MEDS2         m'http://[^/]*med[sz]\.'i
meta      SARE_URI_MEDS2           __SARE_URI_MEDS2 && !SARE_URI_MEDS
describe  SARE_URI_MEDS2           body contains link to known spammer
score     SARE_URI_MEDS2           1.666  
#hist     SARE_URI_MEDS2           RM_usd_meds
#hist     SARE_URI_MEDS2           Converted to meta to exclude dupes with SARE_URI_MEDS Sep 19 2004
#counts   SARE_URI_MEDS2           0s/0h of 238365 corpus (112478s/125887h RM) 02/28/05
#counts   SARE_URI_MEDS2           0s/0h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05
#counts   SARE_URI_MEDS2           0s/0h of 57287 corpus (52272s/5015h MY) 09/22/05
#max      SARE_URI_MEDS2           1s/0h of 26190 corpus (22790s/3400h MY) 02/15/05
#counts   SARE_URI_MEDS2           0s/0h of 682 corpus (290s/392h CRF) 02/16/05
#counts   SARE_URI_MEDS2           0s/0h of 11015 corpus (6587s/4428h CT) 03/10/05

########  ######################   ##################################################
#    Category:  URI links identified by technical attributes
########  ######################   ##################################################

uri       SARE_URI_EQUAL2          /==\.(?:jpg|htm)/i
describe  SARE_URI_EQUAL2          Suspicious URI
score     SARE_URI_EQUAL2          0.684
#hist     SARE_URI_EQUAL2          Alex Pleiner and Chris Santerre, Feb 2005
#counts   SARE_URI_EQUAL2          88s/24h of 689155 corpus (348140s/341015h RM) 09/18/05
#max      SARE_URI_EQUAL2          238s/1h of 197615 corpus (96830s/100785h RM) 02/22/05
#counts   SARE_URI_EQUAL2          17s/0h of 54084 corpus (16906s/37178h JH-3.01) 03/02/05
#counts   SARE_URI_EQUAL2          464s/0h of 57287 corpus (52272s/5015h MY) 09/22/05
#counts   SARE_URI_EQUAL2          0s/0h of 11015 corpus (6587s/4428h CT) 03/10/05
#counts   SARE_URI_EQUAL2          8s/0h of 7500 corpus (1767s/5733h ft) 09/18/05

uri       SARE_URI_REFID1          /\?refid[D=]/i
describe  SARE_URI_REFID1          Spammer signature in URL
score     SARE_URI_REFID1          0.648
#hist     SARE_URI_REFID1          LW_URI_REFID
#counts   SARE_URI_REFID1          1344s/102h of 689155 corpus (348140s/341015h RM) 09/18/05
#counts   SARE_URI_REFID1          68s/0h of 54828 corpus (17650s/37178h JH-3.01) 03/13/05
#counts   SARE_URI_REFID1          210s/4h of 57287 corpus (52272s/5015h MY) 09/22/05
#counts   SARE_URI_REFID1          1s/0h of 682 corpus (290s/392h CRF) 02/16/05
#counts   SARE_URI_REFID1          166s/0h of 10629 corpus (5847s/4782h CT) 09/18/05
#max      SARE_URI_REFID1          207s/0h of 11015 corpus (6587s/4428h CT) 03/10/05

########  ######################   ##################################################
#    Category:  URI links identified by use of randomizing characters
########  ######################   ##################################################

uri       SARE_URI_DIGITS4         m'\b\d{4,}\.(?:com|net|biz|info)\b'i
describe  SARE_URI_DIGITS4         References a multi-digit domain 
score     SARE_URI_DIGITS4         0.415
#hist     SARE_URI_DIGITS4         Created by Bob Menschel Aug 23 2004
#ham      SARE_URI_DIGITS4         The Learning Company <tlcinfo@info.learningco.com> (May, 2002)
#counts   SARE_URI_DIGITS4         679s/82h of 689155 corpus (348140s/341015h RM) 09/18/05
#max      SARE_URI_DIGITS4         905s/28h of 238365 corpus (112478s/125887h RM) 02/28/05
#counts   SARE_URI_DIGITS4         14s/4h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05
#max      SARE_URI_DIGITS4         61s/4h of 44759 corpus (16528s/28231h JH-SA3.0rc1) 09/06/04
#counts   SARE_URI_DIGITS4         9s/3h of 57287 corpus (52272s/5015h MY) 09/22/05
#counts   SARE_URI_DIGITS4         0s/0h of 682 corpus (290s/392h CRF) 02/16/05
#counts   SARE_URI_DIGITS4         2s/3h of 10629 corpus (5847s/4782h CT) 09/18/05
#max      SARE_URI_DIGITS4         6s/0h of 11015 corpus (6587s/4428h CT) 03/10/05

#uri       SARE_URI_SIXCAPS         /[A-Z]{6}\.(?:BIZ|INFO|biz|info)/
meta      SARE_URI_SIXCAPS         0
describe  SARE_URI_SIXCAPS         URI points to a six capital .BIZ domain
score     SARE_URI_SIXCAPS         0.687
#hist     SARE_URI_SIXCAPS         SARE test offered by CRF 4/26/04
#counts   SARE_URI_SIXCAPS         112s/20h of 689155 corpus (348140s/341015h RM) 09/18/05
#max      SARE_URI_SIXCAPS         193s/1h of 175589 corpus (98978s/76611h RM) 02/14/05
#counts   SARE_URI_SIXCAPS         103s/0h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05
#counts   SARE_URI_SIXCAPS         115s/1h of 57287 corpus (52272s/5015h MY) 09/22/05
#counts   SARE_URI_SIXCAPS         0s/1h of 682 corpus (290s/392h CRF) 02/16/05
#counts   SARE_URI_SIXCAPS         215s/0h of 10629 corpus (5847s/4782h CT) 09/18/05

########  ######################   ##################################################
#    Category:  URI links identified by web page/file names
########  ######################   ##################################################

uri       SARE_URI_GEOCIT_NUM      /www\.geocities\.com\/[a-z_]{4,20}_\d{2}/i
describe  SARE_URI_GEOCIT_NUM      geocities URI ends in underscore and two digits
score     SARE_URI_GEOCIT_NUM      0.666
#hist     SARE_URI_GEOCIT_NUM      From john@tradoc.fr  Fri Apr 15 07:05:25 2005 SA Users
#counts   SARE_URI_GEOCIT_NUM      76s/7h of 689155 corpus (348140s/341015h RM) 09/18/05
#counts   SARE_URI_GEOCIT_NUM      0s/0h of 10629 corpus (5847s/4782h CT) 09/18/05
#counts   SARE_URI_GEOCIT_NUM      57s/1h of 7500 corpus (1767s/5733h ft) 09/18/05
#counts   SARE_URI_GEOCIT_NUM      46s/0h of 57287 corpus (52272s/5015h MY) 09/22/05

uri       __SARE_URI_LET_DIG_PIC   /\/[a-z]\d\.(?:gif|jpg)/
meta      SARE_URI_LET_DIG_PIC     __SARE_URI_LET_DIG_PIC && !SARE_URI_VDRUG_GIF
describe  SARE_URI_LET_DIG_PIC     Suspicious file name for graphic
score     SARE_URI_LET_DIG_PIC     1.157
#counts   SARE_URI_LET_DIG_PIC     4567s/34h of 689155 corpus (348140s/341015h RM) 09/18/05
#counts   SARE_URI_LET_DIG_PIC     62s/2h of 57287 corpus (52272s/5015h MY) 09/22/05
#max      SARE_URI_LET_DIG_PIC     356s/2h of 19448 corpus (16863s/2585h MY) 09/06/04
#counts   SARE_URI_LET_DIG_PIC     332s/6h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05
#max      SARE_URI_LET_DIG_PIC     383s/6h of 44759 corpus (16528s/28231h JH-SA3.0rc1) 09/06/04
#counts   SARE_URI_LET_DIG_PIC     6s/0h of 682 corpus (290s/392h CRF) 02/16/05
#counts   SARE_URI_LET_DIG_PIC     8s/1h of 10629 corpus (5847s/4782h CT) 09/18/05
#max      SARE_URI_LET_DIG_PIC     151s/2h of 11015 corpus (6587s/4428h CT) 03/10/05
#counts   SARE_URI_LET_DIG_PIC     2s/2h of 7500 corpus (1767s/5733h ft) 09/18/05

uri       SARE_URI_NO_MORE         m{/nomore\.(?:htm|asp|php)}i
describe  SARE_URI_NO_MORE         Contains a likely spammer unsubscribe link
score     SARE_URI_NO_MORE         0.522
#hist     SARE_URI_NO_MORE         Fred Tarasevicius - FU_PAGE_NO_MORE
#ham      SARE_URI_NO_MORE         http://www.afsc.org/nomore.htm; Student Peace Action Network (SPAN)
#counts   SARE_URI_NO_MORE         4s/9h of 689155 corpus (348140s/341015h RM) 09/18/05
#max      SARE_URI_NO_MORE         456s/3h of 238365 corpus (112478s/125887h RM) 02/28/05
#counts   SARE_URI_NO_MORE         69s/0h of 54828 corpus (17650s/37178h JH-3.01) 03/13/05
#counts   SARE_URI_NO_MORE         3s/0h of 57287 corpus (52272s/5015h MY) 09/22/05
#max      SARE_URI_NO_MORE         150s/0h of 26190 corpus (22790s/3400h MY) 02/15/05
#counts   SARE_URI_NO_MORE         0s/0h of 682 corpus (290s/392h CRF) 02/16/05
#counts   SARE_URI_NO_MORE         18s/0h of 10629 corpus (5847s/4782h CT) 09/18/05
#max      SARE_URI_NO_MORE         70s/0h of 11015 corpus (6587s/4428h CT) 03/10/05

# EOF