Spam: Website errors solicitation

This rule detects messages claiming to have identified errors on a website. The messages typically offer to send pricing or information upon request.

Sublime rule (View on GitHub)

  1name: "Spam: Website errors solicitation"
  2description: "This rule detects messages claiming to have identified errors on a website. The messages typically offer to send pricing or information upon request."
  3type: "rule"
  4severity: "low"
  5source: |
  6  type.inbound
  7  and not profile.by_sender().solicited
  8  // no attachments
  9  and length(attachments) == 0
 10  // subject must contain SEO or web dev spam keywords or be short
 11  and (
 12    (
 13      // SEO or web development service keywords
 14      regex.icontains(strings.replace_confusables(subject.subject),
 15                      '(?:proposal|cost|estimate|error|bug|audit|screenshot|strategy|rankings|issues|fix|website|design|review|price)'
 16      )
 17      or regex.icontains(subject.base,
 18                         '[^\x{2600}-\x{27BF}\x{1F300}-\x{1F9FF}][\x{2600}-\x{27BF}\x{1F300}-\x{1F9FF}]\x{FE0F}?$'
 19      )
 20      // report and follow up keywords
 21      or (
 22        strings.icontains(strings.replace_confusables(subject.subject), "report")
 23        and regex.icontains(strings.replace_confusables(body.current_thread.text),
 24                            "(?:free|send you|can i send|may i send|let me know|interested|get back to me|reply back|just reply)"
 25        )
 26      )
 27      // short subject
 28      or length(subject.base) < 7
 29    )
 30    // or a reply or forward in a thread that mentions website or screenshots
 31    or (
 32      (length(subject.base) < 5 or subject.is_reply or subject.is_forward)
 33      and any(body.previous_threads,
 34              regex.icontains(strings.replace_confusables(.text),
 35                              "(?:screenshot|website)"
 36              )
 37      )
 38    )
 39  )
 40  // body structure and content patterns
 41  and (
 42    // Single thread with no links
 43    (
 44      length(filter(body.current_thread.links,
 45                    not (.href_url.scheme == "mailto" and .parser == "plain")
 46             )
 47      ) == 0
 48      and length(body.previous_threads) == 0
 49      // short message between 20 and 500 chars
 50      and (
 51        20 < length(body.current_thread.text) < 500
 52        or any(map(filter(ml.nlu_classifier(body.current_thread.text).entities,
 53                          .name == "disclaimer"
 54                   ),
 55                   .text
 56               ),
 57               20 < (length(body.current_thread.text) - length(.)) < 500
 58        )
 59      )
 60      // service offering keywords
 61      and regex.icontains(strings.replace_confusables(body.current_thread.text),
 62                          "(?:screenshot|errors? (?:list|report)|plan|quote|rank|professional|price|mistake|visibility|improvement|review|emailed.{0,10}more details)"
 63      )
 64      // generic greeting
 65      and regex.icontains(strings.replace_confusables(body.current_thread.text),
 66                          'h(?:i|ello|ey)\b'
 67      )
 68      // problem or urgency keywords
 69      and regex.icontains(strings.replace_confusables(body.current_thread.text),
 70                          '(?:errors?|report|issues|website|repair|redesign|upgrade|Google\s+.{0,15}find it|glitch|send you|SEO|broken)'
 71      )
 72      // website or page mention
 73      and regex.icontains(strings.replace_confusables(body.current_thread.text),
 74                          "(?:site|website|page|package|SEO)"
 75      )
 76    )
 77    // Single thread with unsubscribe link or $org_domains link
 78    or (
 79      length(body.links) <= 3
 80      and (
 81        // unsubscribe mailto link
 82        regex.icontains(body.html.raw, "mailto:*[++unsubscribe@]")
 83        // or link to found in org_domains
 84        or any(body.links, .href_url.domain.root_domain in~ $org_domains)
 85      )
 86      and length(body.previous_threads) == 0
 87      // short message between 20 and 500 chars
 88      and 20 < length(body.current_thread.text) < 500
 89      // service offering keywords
 90      and regex.icontains(strings.replace_confusables(body.current_thread.text),
 91                          "(?:screenshot|error list|plan|quote|rank|professional|price|mistake)"
 92      )
 93      // generic greeting
 94      and regex.icontains(strings.replace_confusables(body.current_thread.text),
 95                          '(?:h(?:i|ello|ey)|morning)\b'
 96      )
 97      // problem or urgency keywords
 98      and regex.icontains(strings.replace_confusables(body.current_thread.text),
 99                          '(?:error|report|issues|website|repair|redesign|upgrade|Google\s+.{0,15}find it|send you|SEO)'
100      )
101      // website or page mention
102      and regex.icontains(strings.replace_confusables(body.current_thread.text),
103                          "(?:site|website|page|package|SEO)"
104      )
105    )
106    // Multiple thread messages
107    or (
108      length(body.links) == 0
109      // small thread with less than 5 messages
110      and length(body.previous_threads) < 5
111      // check previous messages for spam characteristics
112      and any(body.previous_threads,
113              // short previous messages less than 400 chars
114              length(.text) < 400
115              and (
116                // generic greeting
117                regex.icontains(strings.replace_confusables(.text),
118                                '(?:h(?:i|ello|ey)|morning)\b'
119                )
120                // service offering keywords
121                and regex.icontains(strings.replace_confusables(.text),
122                                    '(?:\berror(?:\s+list)?\b|screenshot|report|plan)'
123                )
124                // previous threads written in English
125                and ml.nlu_classifier(.text).language == "english"
126              )
127      )
128    )
129  )  
130tags:
131  - "Attack surface reduction"
132attack_types:
133  - "Spam"
134detection_methods:
135  - "Content analysis"
136  - "Sender analysis"
137  - "Natural Language Understanding"
138id: "122ea794-f619-5f29-acb2-83261d8f81fc"

Related rules

to-top