Fake voicemail notification (untrusted sender)

This rule detects a common credential phishing vector enticing the user to engage with links under the premise that they have a voicemail to retrieve. The rule looks for voicemail verbiage in the display name, body, subject or a combination of those elements with emojis or a medium to high credential theft NLU Intent from first-time + unsolicited sender.

Sublime rule (View on GitHub)

  1name: "Fake voicemail notification (untrusted sender)"
  2description: |
  3  This rule detects a common credential phishing vector enticing the user to engage with links under the premise that they have a voicemail to retrieve.
  4  The rule looks for voicemail verbiage in the display name, body, subject or a combination of those elements with emojis or a medium to high credential theft NLU Intent from first-time + unsolicited sender.  
  5type: "rule"
  6severity: "medium"
  7source: |
  8  type.inbound
  9  // contains links or attachments
 10  and (
 11    (
 12      0 < length(filter(body.links, .href_url.scheme != "mailto")) <= 25
 13      or 0 < length(distinct(attachments, .md5)) <= 3
 14    )
 15    and 0 <= length(distinct(attachments, .md5)) <= 8
 16  )
 17  
 18  // the subject or display_name need some keywords which are voicemail related
 19  and (
 20    any([subject.subject, sender.display_name],
 21        regex.icontains(.,
 22                        // split phrases that occur within 3 words between or only punctuation between them
 23                        '(?:v[nm](\b|[[:punct:]])?|\bv[o0][il1]ce(?:mail|message)?|audi[o0]|incoming|missed(?:\sa\s)?|left( a)?|wireless)(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:mail|message|msg|recording|received|notif|support|ca[li1][li1]\d*\b|ca[il1][il1](?:er)?|log|transcript(?:ion)?)\b',
 24                        // regex specific to v-mail, v_msg, v,mail, etc
 25                        // list of "secondary" words synced with regex above this one
 26                        'v[[:punct:]](?:mail|message|msg|recording|received|notif|support|ca[li1][li1]\d*\b|ca[il1][il1](?:er)?|log|transcript(?:ion)?\b)',
 27                        // split phrases that start with "caller" that occur within 3 words between or only punctation
 28                        'ca[li1][li1](?:er)?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[nm](\b|[[:punct:]])?|\bv[o0][il1]ce(?:mail|message)?|audi[o0]|missed(?:\sa\s)?|left( a)?)',
 29                        // strong phrases
 30                        '(?:open mp3|audi[o0] note|\.wav|left a vm|[^\s]+voip[^\s]*|unanswered.*ca[li1][li1]|incoming.vm|left msg|wireless ca[li1][li1]er|VM Service|v[o0][il1]ce message|missed.ca[li1][li1](?:e[rd])?|\bca[li1][li1].(?:support|service)(?: for| log)?|missed.{0,10} VM|new v[o0][il1]cemail from|new.v.m.from.\+?\d+|new v[o0][il1]cemail?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}transcript(s|ion)?|message received|new (?:message|call|voicemail).{0,15}(?:info|notification|alert)|incoming transmission|voice note)',
 31                        // starts in the format of `(4)` and contains some voicemail keywords
 32                        '^\(\d\)\s(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:message|voip|v[o0][il1]ce|unread|call)',
 33                        'ca[li1][li1](?:er)?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:playback|transcript)',
 34  
 35                        // obfuscated phone number with at least one digit in the area code and at least one obfuscated number in the last group
 36                        // 555-555-555X, 555-555-XXXX, 555-5XX-XXXX
 37                        '\b1?\(?(\d{3}|\d{2}[\*X]|\d[\*X]{2})\)?[^a-z0-9]{0,2}(\d{2,3}|\d{2}[\*X]|\d[\*X]{2}|[\*X]{2,3})[^a-z0-9]{0,4}(\d{3}[\*X]|\d{2}[\*X]{2}|\d[\*X]{3}|[\*X]{3,4})[^0-9]',
 38                        // obfuscated phone number with at least one digit in the prefix
 39                        // XXX-555-5555, XXX-5XX-XXXX
 40                        '\b1?\(?(\d{2}[\*X]|\d[\*X]{2}|[\*X]{2,3})\)?[^a-z0-9]{0,2}(\d{2,3}|\d{2}[\*X]|\d[\*X]{2})[^a-z0-9]{0,4}(\d{4}|\d{3}[\*X]|\d{2}[\*X]{2}|\d[\*X]{3}|[\*X]{3,4})\b',
 41                        // obfuscated voicemail/voicemessage keywords
 42                        'v[o0][il1]ce[\s\-_]?m(?:ail|sg|essage)?[\*X\.\-_]{2,}',
 43                        'v[o0][il1]cem[\*X\.\-_]{2,}',
 44                        // "X new voice..." patterns
 45                        '\d+\s+new.*v[o0][il1]ce(?:mail|message|m[\*]+)?',
 46                        // sent-message patterns
 47                        '(?:sent|new|incoming)[\s\-]+message.*(v[o0][il1]ce|<.*@.*>)',
 48        )
 49    )
 50    // body.current_thread.text inspection should be very specific to avoid FP
 51    or regex.icontains(strings.replace_confusables(body.current_thread.text),
 52                       // body.current_thread.text,
 53                       'sent (?:from|by) (?:your )?v[o0][il1]ce (?:ma[il1][li1] )?system',
 54                       '(?:(?:new|this) |^)(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0]) (?:message|notification|record)',
 55                       'v[o0][il1]cema[il1][li1] (is )?attached',
 56                       'an? (?:new )?encrypted v[o0][il1]cemail',
 57                       'a (?:new )?pending message',
 58                       'Your? have (?: an?)?incoming v[o0][il1]ceRec',
 59                       "you(?:\'ve| have) a (?:new )?m[il1]ssed ca[li1][li1]",
 60                       'New V[o0][il1]cema[il1][li1] Received',
 61                       'New m[il1]ssed ca[li1][li1] record',
 62                       '\bv[o0][il1]cema[il1][li1] transcript\b',
 63                       'Listen to V[o0][il1]ceMa[il1][li1]',
 64                       'New v[o0][il1]cema[il1][li1] from',
 65                       'v[o0][il1]ce note'
 66    )
 67    // pull out two regexes that could benefit from negations
 68    or (
 69      regex.icontains(body.current_thread.text,
 70                      // body.current_thread.text,
 71                      '(?:you|we) (?:have |received )+(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:\b|\s+)v[o0][il1]ce\s?(?:ma[il1][li1]|aud[il1][o0]|message|notification)',
 72                      'left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0])(?: message|notification)?',
 73      )
 74      and not regex.icontains(body.current_thread.text,
 75                              '(?:I(?:\sjust)?|just(?: called you at (?:\d+[[:punct:]])+) and)? left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:mail)?|audio)(?: message)?'
 76      )
 77      and not regex.icontains(body.current_thread.text,
 78                              'you (?:have |received )my voice\s?(?:mail|audio|message)'
 79      )
 80    )
 81    // Reuse the body.current_thread.text logic against the OCR output of the message screenshot
 82    or (
 83      length(attachments) > 0
 84      and (
 85        all(attachments,
 86            .file_type in $file_types_images
 87            and beta.parse_exif(.).image_height != 1
 88        )
 89        // there is a mix of fake audio attachments and images
 90        or (
 91          length(filter(attachments,
 92                        strings.starts_with(.content_type, "audio")
 93                        // confirm the content type with .file_type
 94                        // we have seen attachments claim to be audio/* files, only to be exploded as something else
 95                        and not .file_type in ("wav", "mp3")
 96                 )
 97          ) 
 98          // the total # of fake audio attachments + the total # of image attachments = the total # of attachments
 99          // meaning, all attachments that are NOT fake audio attachments MUST be images
100          + length(filter(attachments,
101                          .file_type in $file_types_images
102                          and beta.parse_exif(.).image_height != 1
103                   )
104          ) == length(attachments)
105        )
106      )
107      and any((filter(file.explode(file.message_screenshot()), .depth == 0)),
108              regex.icontains(.scan.ocr.raw,
109                              // body.current_thread.text,
110                              'sent (?:from|by) (?:your )?v[o0][il1]ce (?:ma[il1][li1] )?system',
111                              '(?:(?:new|this) |^)(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0]) (?:message|notification|record)',
112                              'v[o0][il1]cema[il1][li1] (is )?attached',
113                              'an? (?:new )?encrypted v[o0][il1]cemail',
114                              'a (?:new )?pending message',
115                              'Your? have (?: an?)?incoming v[o0][il1]ceRec',
116                              "you(?:\'ve| have) a (?:new )?m[il1]ssed ca[li1][li1]",
117                              'New V[o0][il1]cema[il1][li1] Received',
118                              'New m[il1]ssed ca[li1][li1] record',
119                              '\bv[o0][il1]cema[il1][li1] transcript\b',
120                              'Listen to V[o0][il1]ceMa[il1][li1]',
121                              'New v[o0][il1]cema[il1][li1] from',
122                              'v[o0][il1]ce note'
123              )
124              or (
125                regex.icontains(.scan.ocr.raw,
126                                // body.current_thread.text,
127                                '(?:you|we) (?:have |received )+(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:\b|\s+)v[o0][il1]ce\s?(?:ma[il1][li1]|aud[il1][o0]|message|notification)',
128                                'left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:mail)?|audi[o0])(?: message)?',
129                )
130                and not regex.icontains(body.current_thread.text,
131                                        '(?:I(?:\sjust)?|just) left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:voice(?:mail)?|audio)(?: message)?'
132                )
133                and not regex.icontains(body.current_thread.text,
134                                        'you (?:have |received )my voice\s?(?:mail|audio|message)'
135                )
136              )
137      )
138    )
139    or strings.icontains(body.html.raw, '<title>Voicemail Notification</title>')
140    or strings.icontains(body.html.raw, '<!-- Voicemail phone logo')
141  )
142  and 2 of (
143    (
144      // the sender is a freemail
145      sender.email.domain.root_domain in $free_email_providers
146    ),
147    (
148      any(ml.nlu_classifier(body.current_thread.text).intents,
149          .name in ("cred_theft") and .confidence in ("medium", "high")
150      )
151      or 
152      // use the OCR from the message screenshot
153      any(filter(file.explode(file.message_screenshot()), .depth == 0),
154          any(ml.nlu_classifier(.scan.ocr.raw).intents,
155              .name in ("cred_theft") and .confidence in ("medium", "high")
156          )
157      )
158    ),
159    (
160      any(attachments,
161          .content_type in ("html", "text", "text/html")
162          and any(ml.logo_detect(file.html_screenshot(.)).brands,
163                  .name in ("Microsoft") and .confidence in ("medium", "high")
164          )
165      )
166    ),
167    (
168      regex.icontains(sender.display_name,
169                      '(v[o0][il1]ce|audi[o0]|call|missed|caii)(\s?|-)(mail|message|recording|call|caii)|(transcription|Caller.?ID)'
170      )
171    ),
172    // attachment names are often HTML and voice mail related
173    (
174      any(attachments,
175          // this logic is reused below for eml attachments
176          // ensure updates occur both places
177          (
178            .content_type in ("html", "text", "text/html")
179            or .file_type in ("html", "unknown", "svg")
180            or .file_type == "pdf"
181          )
182          and (
183            regex.icontains(.file_name,
184                            '(?:v[o0][il1]ce|aud[i1l][o0]|call|missed|caii|mail|message|recording|call|caii|transcr[il1]ption|v[nm]|audi[o0]|play|listen|unheard|msg)',
185                            // contains a time
186                            // 01min , 60secs
187                            '0?[1-9]\s*min(?:(?:ute)?s)?',
188                            '\d{1,2}\s*s(?:ec(?:ond)?s)?',
189                            // (00:50s)
190                            // 3:26 seconds
191                            '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
192                            // 03min25secs
193                            '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
194                            // [0:39]
195                            // (0:39)
196                            '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s',
197                            // contains an emoji
198                            '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
199            )
200            // somtimes there is no name, it's just the extension which is also strange
201            or .file_name in~ (".htm", ".html")
202            // or sometimes it has no name....
203            or .file_name is null
204          )
205      )
206    ),
207    // attachment contains javascript
208    (
209      any(attachments,
210          (
211            .content_type in ("html", "text", "text/html")
212            or .file_type in ("html", "unknown", "svg")
213          )
214          and (
215            (
216              .size < 1500
217              and any(file.explode(.), length(.scan.html.scripts) > 0)
218            )
219            // bypass the size requirement under these conditions
220            or (
221              // sync with https://github.com/sublime-security/sublime-rules/blob/main/detection-rules/attachment_svg_embedded_js.yml
222              strings.ilike(file.parse_text(.,
223                                            encodings=[
224                                              "ascii",
225                                              "utf8",
226                                              "utf16-le"
227                                            ]
228                            ).text,
229                            "*onload*",
230                            "*window.location.href*",
231                            "*onerror*",
232                            "*CDATA*",
233                            "*<script*",
234                            "*</script*",
235                            "*atob*",
236                            "*location.assign*",
237                            "*decodeURIComponent*"
238              )
239            )
240          )
241      )
242    ),
243    (
244      any(attachments,
245          (
246            .content_type in ("html", "text", "text/html")
247            or .file_type in ("html", "unknown", "svg")
248          )
249          and any(recipients.to,
250                  // the html attachment contains a receipient email address
251                  strings.contains(file.parse_html(..).raw, .email.email)
252                  // the sld of the domain is in the attachment name
253                  or strings.contains(..file_name, .email.domain.sld)
254          )
255      )
256    ),
257    // eml attachments
258    (
259      any(filter(attachments,
260                 .content_type == "message/rfc822" or .file_extension in ('eml')
261          ),
262          // which contain attachments
263          // this is the same logic as above
264          any(file.parse_eml(.).attachments,
265              (
266                .content_type in ("html", "text", "text/html")
267                or .file_type in ("html", "unknown", "svg")
268                or .file_type == "pdf"
269              )
270              and (
271                regex.icontains(.file_name,
272                                '(?:v[o0][il1]ce|aud[il1][o0]|call|missed|caii|mail|message|recording|call|caii|transcr[il1]ption|v[nm]|audi[o0]|play|listen|unheard|msg)',
273                                // contains a time
274                                // 01min , 60secs
275                                '0?[1-9]\s*min(?:(?:ute)?s)?',
276                                '\d{1,2}\s*s(?:ec(?:ond)?s)?',
277                                // (00:50s)
278                                // 3:26 seconds
279                                '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
280                                // 03min25secs
281                                '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
282                                // [0:39]
283                                // (0:39)
284                                '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s',
285                                // contains an emoji
286                                '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
287                )
288                // somtimes there is no name, it's just the extension which is also strange
289                or .file_name in~ (".htm", ".html")
290                // or sometimes it has no name....
291                or .file_name is null
292              )
293          )
294      )
295    ),
296    // attached eml sender/recipeient/subject are all the same as the outer
297    // and have an attachment or body links
298    (
299      any(filter(attachments,
300                 .content_type == "message/rfc822" or .file_extension in ('eml')
301          ),
302          // which contain attachments
303          // this is the same logic as above
304          file.parse_eml(.).subject.subject == subject.subject
305          and file.parse_eml(.).sender.email.email == sender.email.email
306          and (
307            length(file.parse_eml(.).recipients.to) == length(recipients.to)
308            and all(recipients.to,
309                    .email.email in map(file.parse_eml(..).recipients.to,
310                                        .email.email
311                    )
312            )
313          )
314          and (
315            // there are attachments
316            length(file.parse_eml(.).attachments) > 0
317            // or body links
318            or length(filter(file.parse_eml(.).body.links,
319                             .href_url.domain.domain not in $org_domains
320                             and .href_url.domain.root_domain not in $org_domains
321                      )
322            ) > 0
323          )
324      )
325    ),
326    // the body links contain the recipients email
327    (
328      length(filter(recipients.to, .email.email != "" or .email.domain.valid)) > 0
329      and any(body.links,
330              any(recipients.to,
331                  strings.icontains(..href_url.url, .email.email)
332                  or strings.icontains(..href_url.url, .email.local_part)
333              )
334      )
335    ),
336    (
337      length(body.current_thread.text) < 700
338      and regex.icontains(body.current_thread.text,
339                          'Méssãge|Méssage|Recéived|Addréss'
340      )
341    ),
342    (
343      // sender domain matches no body domains
344      // only inspect "links" that have a display_text and display_url is null to remove "plain text" email address from being caught
345      length(filter(body.links,
346                    .display_text is not null
347                    and .display_url.url is null
348                    and .href_url.domain.valid
349             )
350      ) > 0
351      and all(filter(body.links,
352                     .display_text is not null
353                     and .display_url.url is null
354                     and .href_url.domain.valid
355              ),
356              .href_url.domain.root_domain != sender.email.domain.root_domain
357              and .href_url.domain.root_domain not in $org_domains
358              and .href_url.domain.root_domain not in ("aka.ms")
359              and .href_url.domain.root_domain not in (
360                "unitelvoice.com",
361                "googleapis.com",
362                "dialmycalls.com",
363                "ringcentral.biz",
364                "google.com"
365              )
366      )
367    ),
368    // the body links contain vm related phrases
369    (
370      any(body.links,
371          regex.contains(.display_text, '[^a-z]*[A-Z][^a-z]*')
372          and regex.icontains(.display_text,
373                              '(?:v[nm]|v[o0][il1]ce|audi[o0]|ca[li][li]|missed|preview)(\s?|-)(mail|message|recording|call|caii)|transcription|open mp3|audi[o0] note|listen|playback|\(?(?:\*\*\*|[0-9]{3})?.(?:\*\*\*|[0-9]{3})[^a-z]{0,2}(?:\*{4}|\d+\*+)|play|voice note'
374          )
375          // negate FP terms in link display texts
376          and not strings.icontains(.display_text, 'voice call center')
377      )
378    ),
379    (
380      any(body.links,
381          .href_url.path == "/ctt"
382          and regex.icontains(.display_text,
383                              '(v[nm]|v[o0][il1]ce|audi[o0]|call|missed|caii)(\s?|-)(mail|message|recording|call|caii)|transcription|open mp3|audi[o0] note|listen|playback|\(?(?:\*\*\*|[0-9]{3})?.(?:\*\*\*|[0-9]{3})[^a-z]{0,2}(?:\*{4}|\d+\*+)|play|voice note'
384          )
385          // negate FP terms in link display texts
386          and not strings.icontains(.display_text, 'voice call center')
387      )
388    ),
389    // new domains
390    (
391      any(body.links,
392          network.whois(.href_url.domain).days_old < 10
393          and not strings.icontains(.href_url.path, "unsubscribe")
394      )
395    ),
396    // sld use in sender/subject selements
397    (
398      any(recipients.to,
399          // recipient's SLD is in the sender's display name
400          strings.icontains(sender.display_name, .email.domain.sld)
401          // recipient's SLD is in the sender's display name
402          or strings.icontains(subject.subject, .email.domain.sld)
403          // recipient's SLD is in the senders local_part
404          or strings.icontains(sender.email.local_part, .email.domain.sld)
405      )
406    ),
407    // often times the subject or sender display name will contain time references
408    (
409      any([sender.display_name, subject.subject, body.current_thread.text],
410          regex.icontains(.,
411                          // 01min , 60secs
412                          '0?[1-9]\s*min(?:(?:ute)?s)?\b',
413                          '\d{1,2}\s*s(?:ec(?:ond)?s)?\b',
414                          // (00:50s)
415                          // 3:26 seconds
416                          '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
417                          // 03min25secs
418                          '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
419                          // [0:39]
420                          // (0:39)
421                          '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s'
422          )
423      )
424      // resuse the same logic against ORC output of message_screenshot
425      or any(filter(file.explode(file.message_screenshot()), .depth == 0),
426             regex.icontains(.scan.ocr.raw,
427                             // 01min , 60secs
428                             '0?[1-9]\s*min(?:(?:ute)?s)?\b',
429                             '\d{1,2}\s*s(?:ec(?:ond)?s)?\b',
430                             // (00:50s)
431                             // 3:26 seconds
432                             '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
433                             // 03min25secs
434                             '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
435                             // [0:39]
436                             // (0:39)
437                             '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s'
438             )
439      )
440    ),
441    // often times the subject or sender display name will contain dates
442    (
443      any([sender.display_name, subject.subject],
444          // days of week
445          any([
446                'monday',
447                'tuesday',
448                'wednesday',
449                'thursday',
450                'friday',
451                'saturday',
452                'sunday'
453              ],
454              strings.icontains(.., .)
455          )
456          // months
457          // may is problematic for words like "Mayor", "Maybe", "MayFlower", etc
458          or any([
459                   "January",
460                   "February",
461                   "March",
462                   "April",
463                   "June",
464                   "July",
465                   "August",
466                   "September",
467                   "October",
468                   "November",
469                   "December"
470                 ],
471                 strings.icontains(.., .)
472          )
473          // use a regex for May
474          or regex.icontains(., '\bmay\b')
475          // common date formats
476          or regex.contains(.,
477                            // YYYY-MM-DD or YY-MM-DD (ISO 8601 format)
478                            '\d{2}(\d{2})?-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])',
479                            // MM/DD/YYYY or MM/DD/YY (US format)
480                            '(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/\d{2}(\d{2})?',
481                            // DD/MM/YYYY or DD/MM/YY (European format)
482                            '(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/\d{2}(\d{2})?',
483                            // Month DD, YYYY or Month DD, YY (e.g., March 15, 2024 or March 15, 24)
484                            '(January|February|March|April|May|June|July|August|September|October|November|December) (0[1-9]|[12]\d|3[01]), \d{2}(\d{2})?'
485          )
486          // common time formats
487          or regex.contains(.,
488                            // Example: 23:45, 08:30
489                            '([01]\d|2[0-3]):([0-5]\d)',
490                            // Example: 23:45:59, 08:30:12
491                            '([01]\d|2[0-3]):([0-5]\d):([0-5]\d)',
492                            // Example: 08:30 AM, 12:45 pm
493                            '(0[1-9]|1[0-2]):([0-5]\d)\s?([AaPp][Mm])',
494                            // Example: 08:30 AM, 12:45 pm
495                            '(0[1-9]|1[0-2]):([0-5]\d):([0-5]\d) ?([AaPp][Mm])'
496          )
497      )
498      // or use the OCR results from file.message_screenshot
499      or any(filter(file.explode(file.message_screenshot()), .depth == 0),
500             // days of week
501             any([
502                   'monday',
503                   'tuesday',
504                   'wednesday',
505                   'thursday',
506                   'friday',
507                   'saturday',
508                   'sunday'
509                 ],
510                 strings.icontains(..scan.ocr.raw, .)
511             )
512             // months
513             // may is problematic for words like "Mayor", "Maybe", "MayFlower", etc
514             or any([
515                      "January",
516                      "February",
517                      "March",
518                      "April",
519                      "June",
520                      "July",
521                      "August",
522                      "September",
523                      "October",
524                      "November",
525                      "December"
526                    ],
527                    strings.icontains(..scan.ocr.raw, .)
528             )
529             // use a regex for May
530             or regex.contains(.scan.ocr.raw, '\bMay\b')
531             // common date formats
532             or regex.contains(.scan.ocr.raw,
533                               // YYYY-MM-DD or YY-MM-DD (ISO 8601 format)
534                               '\d{2}(\d{2})?-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])',
535                               // MM/DD/YYYY or MM/DD/YY (US format)
536                               '(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/\d{2}(\d{2})?',
537                               // DD/MM/YYYY or DD/MM/YY (European format)
538                               '(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/\d{2}(\d{2})?',
539                               // Month DD, YYYY or Month DD, YY (e.g., March 15, 2024 or March 15, 24)
540                               '(January|February|March|April|May|June|July|August|September|October|November|December) (0[1-9]|[12]\d|3[01]), \d{2}(\d{2})?'
541             )
542             // common time formats
543             or regex.contains(.scan.ocr.raw,
544                               // Example: 23:45, 08:30
545                               '([01]\d|2[0-3]):([0-5]\d)',
546                               // Example: 23:45:59, 08:30:12
547                               '([01]\d|2[0-3]):([0-5]\d):([0-5]\d)',
548                               // Example: 08:30 AM, 12:45 pm
549                               '(0[1-9]|1[0-2]):([0-5]\d)\s?([AaPp][Mm])',
550                               // Example: 08:30 AM, 12:45 pm
551                               '(0[1-9]|1[0-2]):([0-5]\d):([0-5]\d) ?([AaPp][Mm])'
552             )
553      )
554    ),
555    // there are often emoji in the sender display name
556    (
557      any([sender.display_name, subject.subject],
558          // contains an emoji
559          regex.contains(.,
560                         '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
561          )
562          // negate where the emoji occur in tags
563          and not regex.contains(.,
564                                 '^(?:\[[^\]]*\]\s*)*\[[^\]]*[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}][^\]]*\]'
565          )
566      )
567    ),
568    // an attachment is a pdf, image, or document that contains a url
569    (
570      1 <= length(attachments) <= 2
571      and any(attachments,
572              (
573                .file_type in $file_types_images
574                or .file_type == "pdf"
575                or .file_extension in $file_extensions_macros
576              )
577              and any(file.explode(.),
578                      .scan.qr.type == "url"
579                      or strings.icontains(.scan.qr.data, 'http')
580                      or any(recipients.to,
581                             strings.icontains(..scan.qr.data, .email.local_part)
582                             or strings.icontains(..scan.qr.data, .email.email)
583                      )
584              )
585      )
586    )
587  )
588  
589  // negating legit replies and legitimate audio file attachments and known voicemail senders
590  and not (
591    sender.email.domain.valid
592    and sender.email.domain.root_domain in (
593      "magicjack.com",
594      "magicjackforbusiness.com",
595      "unitelvoice.com",
596      "voipinterface.net",
597      "ringcentral.biz",
598      "verizonwireless.com",
599      "t-mobile.com",
600      "justcall.io",
601      "airtel.com",
602      "grasshopper.com",
603      "ooma.com",
604      "ui.com"
605    )
606  )
607  and not (
608    any(attachments,
609        strings.starts_with(.content_type, "audio")
610        // confirm the content type with .file_type
611        // we have seen attachments claim to be audio/* files, only to be exploded as something else
612        and .file_type in ("wav", "mp3")
613    )
614  )
615  and not (
616    (
617      strings.istarts_with(subject.subject, "RE:")
618      // out of office auto-reply
619      // the NLU model will handle these better natively soon
620      or strings.istarts_with(subject.subject, "Automatic reply:")
621    )
622    and (length(headers.references) > 0 or headers.in_reply_to is not null)
623  )
624  // bounce-back negations
625  and not any(attachments,
626              any(file.parse_eml(.).attachments,
627                  .content_type == "message/delivery-status"
628              )
629  )
630  // bounce-back negations
631  and not (
632    any(attachments,
633        .content_type in ("message/delivery-status", "text/calendar")
634    )
635  )
636  // negate newsletters
637  and not (
638    (
639      any(ml.nlu_classifier(body.current_thread.text).topics,
640          .name in (
641            "Newsletters and Digests",
642            "B2B Cold Outreach",
643            "Events and Webinars"
644          )
645          and .confidence == "high"
646      )
647    )
648    and not strings.icontains(body.current_thread.text,
649                              "this voicemail was shared by"
650    )
651  )
652  // negate bouncebacks from proofpoint
653  and not (
654    sender.display_name == "Mail Delivery Subsystem"
655    and strings.ends_with(headers.message_id, "pphosted.com>")
656    and any(headers.hops,
657            .index == 0 and strings.contains(.received.server.raw, "pphosted.com")
658    )
659    and any(attachments, .content_type == "message/rfc822")
660  )
661  // negate CheckPoint encrypted messages
662  and not (
663    // CheckPoint banner
664    length(attachments) == 1
665    and any(body.links, .href_url.domain.root_domain == "checkpointcloudsec.com")
666    and strings.istarts_with(headers.message_id, "<encrypted")
667    and any(headers.domains, .root_domain == "checkpointcloudsec.com")
668  )
669  // an impersonated high trust domain
670  and (
671    (
672      sender.email.domain.root_domain in $high_trust_sender_root_domains
673      and not headers.auth_summary.dmarc.pass
674      // service abuse
675      and not sender.email.email in ("noreply-application-integration@google.com")
676    )
677    or sender.email.domain.root_domain not in $high_trust_sender_root_domains
678  
679    // sender profile
680    or (
681      (
682        not sender.email.domain.root_domain in $org_domains
683        and (profile.by_sender_email().prevalence not in ("common"))
684        and not profile.by_sender_email().solicited
685      )
686      or (
687        profile.by_sender_email().any_messages_malicious_or_spam
688        and not profile.by_sender_email().any_messages_benign
689      )
690      // match if the sender is in org domains but failed auth
691      or (
692        sender.email.domain.domain in $org_domains
693        and not coalesce(headers.auth_summary.dmarc.pass, false)
694      )
695      // match if the sender address is blank or null
696      or (regex.match(sender.email.email, "") or sender.email.email is null)
697    )
698  )  
699attack_types:
700  - "Credential Phishing"
701tactics_and_techniques:
702  - "Social engineering"
703detection_methods:
704  - "Content analysis"
705  - "Natural Language Understanding"
706  - "Sender analysis"
707  - "URL analysis"
708id: "74ba7787-e543-5ce8-b6eb-e1ecdb8f1d67"
to-top