Fake voicemail notification (untrusted sender)

This rule detects a common credential phishing vector enticing the user to engage with links under the premise that they have a voicemail to retrieve. The rule looks for voicemail verbiage in the display name, body, subject or a combination of those elements with emojis or a medium to high credential theft NLU Intent from first-time + unsolicited sender.

Sublime rule (View on GitHub)

  1name: "Fake voicemail notification (untrusted sender)"
  2description: |
  3  This rule detects a common credential phishing vector enticing the user to engage with links under the premise that they have a voicemail to retrieve.
  4  The rule looks for voicemail verbiage in the display name, body, subject or a combination of those elements with emojis or a medium to high credential theft NLU Intent from first-time + unsolicited sender.  
  5type: "rule"
  6severity: "medium"
  7source: |
  8  type.inbound
  9  // contains links or attachments
 10  and (
 11    (0 < length(body.links) <= 25 or 0 < length(distinct(attachments, .md5)) <= 3)
 12    and 0 <= length(distinct(attachments, .md5)) <= 8
 13  )
 14  
 15  // the subject or display_name need some keywords which are voicemail related
 16  and (
 17    any([subject.subject, sender.display_name],
 18        regex.icontains(.,
 19                        // split phrases that occur within 3 words between or only punctuation between them
 20                        '(?:v[nm](\b|[[:punct:]])?|\bv[o0][il1]ce(?:mail|message)?|audi[o0]|incoming|missed(?:\sa\s)?|left( a)?|wireless)(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:mail|message|msg|recording|received|notif|support|ca[li1][li1]\d*\b|ca[il1][il1](?:er)?|log|transcript(?:ion)?)\b',
 21                        // regex specific to v-mail, v_msg, v,mail, etc
 22                        // list of "secondary" words synced with regex above this one
 23                        'v[[:punct:]](?:mail|message|msg|recording|received|notif|support|ca[li1][li1]\d*\b|ca[il1][il1](?:er)?|log|transcript(?:ion)?\b)',
 24                        // split phrases that start with "caller" that occur within 3 words between or only punctation 
 25                        'ca[li1][li1](?:er)?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[nm](\b|[[:punct:]])?|\bv[o0][il1]ce(?:mail|message)?|audi[o0]|missed(?:\sa\s)?|left( a)?)',
 26                        // strong phrases
 27                        '(?:open mp3|audi[o0] note|\.wav|left a vm|[^\s]+voip[^\s]*|unanswered.*ca[li1][li1]|incoming.vm|left msg|wireless ca[li1][li1]er|VM Service|v[o0][il1]ce message|missed.ca[li1][li1](?:e[rd])?|\bca[li1][li1].(?:support|service)(?: for| log)?|missed.{0,10} VM|new v[o0][il1]cemail from|new.v.m.from.\+?\d+|new v[o0][il1]cemail?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}transcript(s|ion)?|message received|new (?:message|call|voicemail).{0,15}(?:info|notification|alert)|incoming transmission|voice note)',
 28                        // starts in the format of `(4)` and contains some voicemail keywords
 29                        '^\(\d\)\s(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:message|voip|v[o0][il1]ce|unread|call)',
 30                        'ca[li1][li1](?:er)?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:playback|transcript)',
 31  
 32                        // obfuscated phone number with at least one digit in the area code and at least one obfuscated number in the last group
 33                        // 555-555-555X, 555-555-XXXX, 555-5XX-XXXX
 34                        '\b1?\(?(\d{3}|\d{2}[\*X]|\d[\*X]{2})\)?[^a-z0-9]{0,2}(\d{2,3}|\d{2}[\*X]|\d[\*X]{2}|[\*X]{2,3})[^a-z0-9]{0,4}(\d{3}[\*X]|\d{2}[\*X]{2}|\d[\*X]{3}|[\*X]{3,4})[^0-9]',
 35                        // obfuscated phone number with at least one digit in the prefix
 36                        // XXX-555-5555, XXX-5XX-XXXX
 37                        '\b1?\(?(\d{2}[\*X]|\d[\*X]{2}|[\*X]{2,3})\)?[^a-z0-9]{0,2}(\d{2,3}|\d{2}[\*X]|\d[\*X]{2})[^a-z0-9]{0,4}(\d{4}|\d{3}[\*X]|\d{2}[\*X]{2}|\d[\*X]{3}|[\*X]{3,4})\b',
 38                        // obfuscated voicemail/voicemessage keywords
 39                        'v[o0][il1]ce[\s\-_]?m(?:ail|sg|essage)?[\*X\.\-_]{2,}',
 40                        'v[o0][il1]cem[\*X\.\-_]{2,}',
 41                        // "X new voice..." patterns
 42                        '\d+\s+new.*v[o0][il1]ce(?:mail|message|m[\*]+)?',
 43                        // sent-message patterns
 44                        '(?:sent|new|incoming)[\s\-]+message.*v[o0][il1]ce',
 45        )
 46    )
 47    // body.current_thread.text inspection should be very specific to avoid FP
 48    or regex.icontains(strings.replace_confusables(body.current_thread.text),
 49                       // body.current_thread.text,
 50                       'sent (?:from|by) (?:your )?v[o0][il1]ce (?:ma[il1][li1] )?system',
 51                       '(?:(?:new|this) |^)(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0]) (?:message|notification|record)',
 52                       'v[o0][il1]cema[il1][li1] (is )?attached',
 53                       'an? (?:new )?encrypted v[o0][il1]cemail',
 54                       'a (?:new )?pending message',
 55                       'Your? have (?: an?)?incoming v[o0][il1]ceRec',
 56                       "you(?:\'ve| have) a (?:new )?m[il1]ssed ca[li1][li1]",
 57                       'New V[o0][il1]cema[il1][li1] Received',
 58                       'New m[il1]ssed ca[li1][li1] record',
 59                       '\bv[o0][il1]cema[il1][li1] transcript\b',
 60                       'Listen to V[o0][il1]ceMa[il1][li1]',
 61                       'New v[o0][il1]cema[il1][li1] from',
 62                       'v[o0][il1]ce note'
 63    )
 64    // pull out two regexes that could benefit from negations
 65    or (
 66      regex.icontains(body.current_thread.text,
 67                      // body.current_thread.text,
 68                      '(?:you|we) (?:have |received )+(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:\b|\s+)v[o0][il1]ce\s?(?:ma[il1][li1]|aud[il1][o0]|message|notification)',
 69                      'left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0])(?: message|notification)?',
 70      )
 71      and not regex.icontains(body.current_thread.text,
 72                              '(?:I(?:\sjust)?|just(?: called you at (?:\d+[[:punct:]])+) and)? left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:mail)?|audio)(?: message)?'
 73      )
 74      and not regex.icontains(body.current_thread.text,
 75                              'you (?:have |received )my voice\s?(?:mail|audio|message)'
 76      )
 77    )
 78    // Reuse the body.current_thread.text logic against the OCR output of the message screenshot
 79    or (
 80      length(attachments) > 0
 81      and (
 82        all(attachments, .file_type in $file_types_images and beta.parse_exif(.).image_height != 1)
 83        or (
 84          // there is a mix of fake audio attachments and images
 85          length(filter(filter(attachments,
 86                               strings.starts_with(.content_type, "audio")
 87                        ),
 88                        // confirm the content type with file.explode
 89                        // we have seen attachments claim to be audio/* files, only to be exploded as something else
 90                        any(file.explode(.),
 91                            not strings.starts_with(.flavors.mime, "audio")
 92                        )
 93                 )
 94          // the total # of fake audio attachments + the total # of image attachments = the total # of attachments
 95          // meaning, all attachments that are NOT fake audio attachments MUST be images
 96          ) + length(filter(attachments, .file_type in $file_types_images and beta.parse_exif(.).image_height != 1)) == length(attachments
 97          )
 98        )
 99      )
100      and any((filter(file.explode(file.message_screenshot()), .depth == 0)),
101              regex.icontains(.scan.ocr.raw,
102                              // body.current_thread.text,
103                              'sent (?:from|by) (?:your )?v[o0][il1]ce (?:ma[il1][li1] )?system',
104                              '(?:(?:new|this) |^)(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0]) (?:message|notification|record)',
105                              'v[o0][il1]cema[il1][li1] (is )?attached',
106                              'an? (?:new )?encrypted v[o0][il1]cemail',
107                              'a (?:new )?pending message',
108                              'Your? have (?: an?)?incoming v[o0][il1]ceRec',
109                              "you(?:\'ve| have) a (?:new )?m[il1]ssed ca[li1][li1]",
110                              'New V[o0][il1]cema[il1][li1] Received',
111                              'New m[il1]ssed ca[li1][li1] record',
112                              '\bv[o0][il1]cema[il1][li1] transcript\b',
113                              'Listen to V[o0][il1]ceMa[il1][li1]',
114                              'New v[o0][il1]cema[il1][li1] from',
115                              'v[o0][il1]ce note'
116              )
117              or (
118                regex.icontains(.scan.ocr.raw,
119                                // body.current_thread.text,
120                                '(?:you|we) (?:have |received )+(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:\b|\s+)v[o0][il1]ce\s?(?:ma[il1][li1]|aud[il1][o0]|message|notification)',
121                                'left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:mail)?|audi[o0])(?: message)?',
122                )
123                and not regex.icontains(body.current_thread.text,
124                                        '(?:I(?:\sjust)?|just) left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:voice(?:mail)?|audio)(?: message)?'
125                )
126                and not regex.icontains(body.current_thread.text,
127                                        'you (?:have |received )my voice\s?(?:mail|audio|message)'
128                )
129              )
130      )
131    )
132    or strings.icontains(body.html.raw, '<title>Voicemail Notification</title>')
133    or strings.icontains(body.html.raw, '<!-- Voicemail phone logo')
134  )
135  and 2 of (
136    (
137      // the sender is a freemail
138      sender.email.domain.root_domain in $free_email_providers
139    ),
140    (
141      any(ml.nlu_classifier(body.current_thread.text).intents,
142          .name in ("cred_theft") and .confidence in ("medium", "high")
143      )
144      or 
145      // use the OCR from the message screenshot
146      any(filter(file.explode(file.message_screenshot()), .depth == 0),
147          any(ml.nlu_classifier(.scan.ocr.raw).intents,
148              .name in ("cred_theft") and .confidence in ("medium", "high")
149          )
150      )
151    ),
152    (
153      any(attachments,
154          .content_type in ("html", "text", "text/html")
155          and any(ml.logo_detect(file.html_screenshot(.)).brands,
156                  .name in ("Microsoft") and .confidence in ("medium", "high")
157          )
158      )
159    ),
160    (
161      regex.icontains(sender.display_name,
162                      '(v[o0][il1]ce|audi[o0]|call|missed|caii)(\s?|-)(mail|message|recording|call|caii)|(transcription|Caller.?ID)'
163      )
164    ),
165    // attachment names are often HTML and voice mail related
166    (
167      any(attachments,
168          // this logic is reused below for eml attachments
169          // ensure updates occur both places
170          (
171            .content_type in ("html", "text", "text/html")
172            or .file_type in ("html", "unknown", "svg")
173            or .file_type == "pdf"
174          )
175          and (
176            regex.icontains(.file_name,
177                            '(?:v[o0][il1]ce|aud[i1l][o0]|call|missed|caii|mail|message|recording|call|caii|transcr[il1]ption|v[nm]|audi[o0]|play|listen|unheard|msg)',
178                            // contains a time
179                            // 01min , 60secs
180                            '0?[1-9]\s*min(?:(?:ute)?s)?',
181                            '\d{1,2}\s*s(?:ec(?:ond)?s)?',
182                            // (00:50s)
183                            // 3:26 seconds
184                            '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
185                            // 03min25secs
186                            '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
187                            // [0:39] 
188                            // (0:39) 
189                            '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s',
190                            // contains an emoji
191                            '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
192            )
193            // somtimes there is no name, it's just the extension which is also strange
194            or .file_name in~ (".htm", ".html")
195            // or sometimes it has no name....
196            or .file_name is null
197          )
198      )
199    ),
200    // attachment contains javascript
201    (
202      any(attachments,
203          (
204            .content_type in ("html", "text", "text/html")
205            or .file_type in ("html", "unknown", "svg")
206          )
207          and (
208            (
209              .size < 1500
210              and any(file.explode(.), length(.scan.html.scripts) > 0)
211            )
212            // bypass the size requirement under these conditions
213            or (
214              // sync with https://github.com/sublime-security/sublime-rules/blob/main/detection-rules/attachment_svg_embedded_js.yml
215              strings.ilike(file.parse_text(.,
216                                            encodings=[
217                                              "ascii",
218                                              "utf8",
219                                              "utf16-le"
220                                            ]
221                            ).text,
222                            "*onload*",
223                            "*window.location.href*",
224                            "*onerror*",
225                            "*CDATA*",
226                            "*<script*",
227                            "*</script*",
228                            "*atob*",
229                            "*location.assign*",
230                            "*decodeURIComponent*"
231              )
232            )
233          )
234      )
235    ),
236    (
237      any(attachments,
238          (
239            .content_type in ("html", "text", "text/html")
240            or .file_type in ("html", "unknown", "svg")
241          )
242          and any(recipients.to,
243                  // the html attachment contains a receipient email address
244                  strings.contains(file.parse_html(..).raw, .email.email)
245                  // the sld of the domain is in the attachment name
246                  or strings.contains(..file_name, .email.domain.sld)
247          )
248      )
249    ),
250    // eml attachments
251    (
252      any(filter(attachments, .content_type == "message/rfc822" or .file_extension in ('eml')),
253          // which contain attachments
254          // this is the same logic as above
255          any(file.parse_eml(.).attachments,
256              (
257                .content_type in ("html", "text", "text/html")
258                or .file_type in ("html", "unknown", "svg")
259                or .file_type == "pdf"
260              )
261              and (
262                regex.icontains(.file_name,
263                                '(?:v[o0][il1]ce|aud[il1][o0]|call|missed|caii|mail|message|recording|call|caii|transcr[il1]ption|v[nm]|audi[o0]|play|listen|unheard|msg)',
264                                // contains a time
265                                // 01min , 60secs
266                                '0?[1-9]\s*min(?:(?:ute)?s)?',
267                                '\d{1,2}\s*s(?:ec(?:ond)?s)?',
268                                // (00:50s)
269                                // 3:26 seconds
270                                '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
271                                // 03min25secs
272                                '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
273                                // [0:39] 
274                                // (0:39) 
275                                '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s',
276                                // contains an emoji
277                                '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
278                )
279                // somtimes there is no name, it's just the extension which is also strange
280                or .file_name in~ (".htm", ".html")
281                // or sometimes it has no name....
282                or .file_name is null
283              )
284          )
285      )
286    ),
287    // attached eml sender/recipeient/subject are all the same as the outer
288    // and have an attachment or body links
289    (
290      any(filter(attachments, .content_type == "message/rfc822" or .file_extension in ('eml')),
291          // which contain attachments
292          // this is the same logic as above
293          file.parse_eml(.).subject.subject == subject.subject
294          and file.parse_eml(.).sender.email.email == sender.email.email
295          and (
296            length(file.parse_eml(.).recipients.to) == length(recipients.to)
297            and all(recipients.to,
298                    .email.email in map(file.parse_eml(..).recipients.to,
299                                        .email.email
300                    )
301            )
302          )
303          and (
304            // there are attachments
305            length(file.parse_eml(.).attachments) > 0
306            // or body links
307            or length(filter(file.parse_eml(.).body.links,
308                             .href_url.domain.domain not in $org_domains
309                             and .href_url.domain.root_domain not in $org_domains
310                      )
311            ) > 0
312          )
313      )
314    ),
315    // the body links contain the recipients email
316    (
317      length(filter(recipients.to, .email.email != "" or .email.domain.valid)) > 0
318      and any(body.links,
319              any(recipients.to,
320                  strings.icontains(..href_url.url, .email.email)
321                  or strings.icontains(..href_url.url, .email.local_part)
322              )
323      )
324    ),
325    (
326      length(body.current_thread.text) < 700
327      and regex.icontains(body.current_thread.text,
328                          'Méssãge|Méssage|Recéived|Addréss'
329      )
330    ),
331    (
332      // sender domain matches no body domains
333      // only inspect "links" that have a display_text and display_url is null to remove "plain text" email address from being caught
334      length(filter(body.links,
335                    .display_text is not null
336                    and .display_url.url is null
337                    and .href_url.domain.valid
338             )
339      ) > 0
340      and all(filter(body.links,
341                     .display_text is not null
342                     and .display_url.url is null
343                     and .href_url.domain.valid
344              ),
345              .href_url.domain.root_domain != sender.email.domain.root_domain
346              and .href_url.domain.root_domain not in $org_domains
347              and .href_url.domain.root_domain not in ("aka.ms")
348              and .href_url.domain.root_domain not in (
349                "unitelvoice.com",
350                "googleapis.com",
351                "dialmycalls.com",
352                "ringcentral.biz",
353                "google.com"
354              )
355      )
356    ),
357    // the body links contain vm related phrases
358    (
359      any(body.links,
360          regex.contains(.display_text, '[^a-z]*[A-Z][^a-z]*')
361          and regex.icontains(.display_text,
362                              '(?:v[nm]|v[o0][il1]ce|audi[o0]|ca[li][li]|missed|preview)(\s?|-)(mail|message|recording|call|caii)|transcription|open mp3|audi[o0] note|listen|playback|\(?(?:\*\*\*|[0-9]{3})?.(?:\*\*\*|[0-9]{3})[^a-z]{0,2}(?:\*{4}|\d+\*+)|play|voice note'
363          )
364          // negate FP terms in link display texts
365          and not strings.icontains(.display_text, 'voice call center')
366      )
367    ),
368    (
369      any(body.links,
370          .href_url.path == "/ctt"
371          and regex.icontains(.display_text,
372                              '(v[nm]|v[o0][il1]ce|audi[o0]|call|missed|caii)(\s?|-)(mail|message|recording|call|caii)|transcription|open mp3|audi[o0] note|listen|playback|\(?(?:\*\*\*|[0-9]{3})?.(?:\*\*\*|[0-9]{3})[^a-z]{0,2}(?:\*{4}|\d+\*+)|play|voice note'
373          )
374          // negate FP terms in link display texts
375          and not strings.icontains(.display_text, 'voice call center')
376      )
377    ),
378    // new domains
379    (
380      any(body.links,
381          network.whois(.href_url.domain).days_old < 10
382          and not strings.icontains(.href_url.path, "unsubscribe")
383      )
384    ),
385    // sld use in sender/subject selements
386    (
387      any(recipients.to,
388          // recipient's SLD is in the sender's display name
389          strings.icontains(sender.display_name, .email.domain.sld)
390          // recipient's SLD is in the sender's display name
391          or strings.icontains(subject.subject, .email.domain.sld)
392          // recipient's SLD is in the senders local_part
393          or strings.icontains(sender.email.local_part, .email.domain.sld)
394      )
395    ),
396    // often times the subject or sender display name will contain time references
397    (
398      any([sender.display_name, subject.subject, body.current_thread.text],
399          regex.icontains(.,
400                          // 01min , 60secs
401                          '0?[1-9]\s*min(?:(?:ute)?s)?\b',
402                          '\d{1,2}\s*s(?:ec(?:ond)?s)?\b',
403                          // (00:50s)
404                          // 3:26 seconds
405                          '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
406                          // 03min25secs
407                          '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
408                          // [0:39] 
409                          // (0:39) 
410                          '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s'
411          )
412      )
413      // resuse the same logic against ORC output of message_screenshot
414      or any(filter(file.explode(file.message_screenshot()), .depth == 0),
415             regex.icontains(.scan.ocr.raw,
416                             // 01min , 60secs
417                             '0?[1-9]\s*min(?:(?:ute)?s)?\b',
418                             '\d{1,2}\s*s(?:ec(?:ond)?s)?\b',
419                             // (00:50s)
420                             // 3:26 seconds
421                             '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
422                             // 03min25secs
423                             '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
424                             // [0:39] 
425                             // (0:39) 
426                             '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s'
427             )
428      )
429    ),
430    // often times the subject or sender display name will contain dates
431    (
432      any([sender.display_name, subject.subject],
433          // days of week
434          any([
435                'monday',
436                'tuesday',
437                'wednesday',
438                'thursday',
439                'friday',
440                'saturday',
441                'sunday'
442              ],
443              strings.icontains(.., .)
444          )
445          // months
446          // may is problematic for words like "Mayor", "Maybe", "MayFlower", etc
447          or any([
448                   "January",
449                   "February",
450                   "March",
451                   "April",
452                   "June",
453                   "July",
454                   "August",
455                   "September",
456                   "October",
457                   "November",
458                   "December"
459                 ],
460                 strings.icontains(.., .)
461          )
462          // use a regex for May
463          or regex.icontains(., '\bmay\b')
464          // common date formats
465          or regex.contains(.,
466                            // YYYY-MM-DD or YY-MM-DD (ISO 8601 format)
467                            '\d{2}(\d{2})?-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])',
468                            // MM/DD/YYYY or MM/DD/YY (US format)
469                            '(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/\d{2}(\d{2})?',
470                            // DD/MM/YYYY or DD/MM/YY (European format)
471                            '(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/\d{2}(\d{2})?',
472                            // Month DD, YYYY or Month DD, YY (e.g., March 15, 2024 or March 15, 24)
473                            '(January|February|March|April|May|June|July|August|September|October|November|December) (0[1-9]|[12]\d|3[01]), \d{2}(\d{2})?'
474          )
475          // common time formats
476          or regex.contains(.,
477                            // Example: 23:45, 08:30
478                            '([01]\d|2[0-3]):([0-5]\d)',
479                            // Example: 23:45:59, 08:30:12
480                            '([01]\d|2[0-3]):([0-5]\d):([0-5]\d)',
481                            // Example: 08:30 AM, 12:45 pm
482                            '(0[1-9]|1[0-2]):([0-5]\d)\s?([AaPp][Mm])',
483                            // Example: 08:30 AM, 12:45 pm
484                            '(0[1-9]|1[0-2]):([0-5]\d):([0-5]\d) ?([AaPp][Mm])'
485          )
486      )
487      // or use the OCR results from file.message_screenshot
488      or any(filter(file.explode(file.message_screenshot()), .depth == 0),
489             // days of week
490             any([
491                   'monday',
492                   'tuesday',
493                   'wednesday',
494                   'thursday',
495                   'friday',
496                   'saturday',
497                   'sunday'
498                 ],
499                 strings.icontains(..scan.ocr.raw, .)
500             )
501             // months
502             // may is problematic for words like "Mayor", "Maybe", "MayFlower", etc
503             or any([
504                      "January",
505                      "February",
506                      "March",
507                      "April",
508                      "June",
509                      "July",
510                      "August",
511                      "September",
512                      "October",
513                      "November",
514                      "December"
515                    ],
516                    strings.icontains(..scan.ocr.raw, .)
517             )
518             // use a regex for May
519             or regex.contains(.scan.ocr.raw, '\bMay\b')
520             // common date formats
521             or regex.contains(.scan.ocr.raw,
522                               // YYYY-MM-DD or YY-MM-DD (ISO 8601 format)
523                               '\d{2}(\d{2})?-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])',
524                               // MM/DD/YYYY or MM/DD/YY (US format)
525                               '(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/\d{2}(\d{2})?',
526                               // DD/MM/YYYY or DD/MM/YY (European format)
527                               '(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/\d{2}(\d{2})?',
528                               // Month DD, YYYY or Month DD, YY (e.g., March 15, 2024 or March 15, 24)
529                               '(January|February|March|April|May|June|July|August|September|October|November|December) (0[1-9]|[12]\d|3[01]), \d{2}(\d{2})?'
530             )
531             // common time formats
532             or regex.contains(.scan.ocr.raw,
533                               // Example: 23:45, 08:30
534                               '([01]\d|2[0-3]):([0-5]\d)',
535                               // Example: 23:45:59, 08:30:12
536                               '([01]\d|2[0-3]):([0-5]\d):([0-5]\d)',
537                               // Example: 08:30 AM, 12:45 pm
538                               '(0[1-9]|1[0-2]):([0-5]\d)\s?([AaPp][Mm])',
539                               // Example: 08:30 AM, 12:45 pm
540                               '(0[1-9]|1[0-2]):([0-5]\d):([0-5]\d) ?([AaPp][Mm])'
541             )
542      )
543    ),
544    // there are often emoji in the sender display name
545    (
546      any([sender.display_name, subject.subject],
547          // contains an emoji
548          regex.contains(.,
549                         '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
550          )
551          // negate where the emoji occur in tags
552          and not regex.contains(.,
553                                 '^(?:\[[^\]]*\]\s*)*\[[^\]]*[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}][^\]]*\]'
554          )
555      )
556    ),
557    // an attachment is a pdf, image, or document that contains a url
558    (
559      1 <= length(attachments) <= 2
560      and any(attachments,
561              (
562                .file_type in $file_types_images
563                or .file_type == "pdf"
564                or .file_extension in $file_extensions_macros
565              )
566              and any(file.explode(.),
567                      .scan.qr.type == "url"
568                      or strings.icontains(.scan.qr.data, 'http')
569                      or any(recipients.to,
570                             strings.icontains(..scan.qr.data, .email.local_part)
571                             or strings.icontains(..scan.qr.data, .email.email)
572                      )
573              )
574      )
575    )
576  )
577  
578  // negating legit replies and legitimate audio file attachments and known voicemail senders
579  and not (
580    sender.email.domain.valid
581    and sender.email.domain.root_domain in (
582      "magicjack.com",
583      "unitelvoice.com",
584      "voipinterface.net",
585      "ringcentral.biz",
586      "verizonwireless.com",
587      "t-mobile.com",
588      "justcall.io",
589      "airtel.com"
590    )
591  )
592  and not (
593    any(filter(attachments, strings.starts_with(.content_type, "audio")),
594        // confirm the content type with file.explode
595        // we have seen attachments claim to be audio/* files, only to be exploded as something else
596        any(file.explode(.), strings.starts_with(.flavors.mime, "audio"))
597    )
598  )
599  and not (
600    (
601      strings.istarts_with(subject.subject, "RE:")
602      // out of office auto-reply
603      // the NLU model will handle these better natively soon
604      or strings.istarts_with(subject.subject, "Automatic reply:")
605    )
606    and (
607      length(headers.references) > 0
608      or any(headers.hops, any(.fields, strings.ilike(.name, "In-Reply-To")))
609    )
610  )
611  // negate highly trusted sender domains unless they fail DMARC authentication
612  and (
613    (
614      sender.email.domain.root_domain in $high_trust_sender_root_domains
615      and not headers.auth_summary.dmarc.pass
616    )
617    or sender.email.domain.root_domain not in $high_trust_sender_root_domains
618  )
619  // bounce-back negations
620  and not any(attachments,
621              any(file.parse_eml(.).attachments,
622                  .content_type == "message/delivery-status"
623              )
624  )
625  // bounce-back negations
626  and not (
627    any(attachments,
628        .content_type in ("message/delivery-status", "text/calendar")
629    )
630  )
631  // negate newsletters 
632  and not any(ml.nlu_classifier(body.current_thread.text).topics,
633              .name in ("Newsletters and Digests", "B2B Cold Outreach", "Events and Webinars") and .confidence == "high"
634  )
635  // negate bouncebacks from proofpoint
636  and not (
637    sender.display_name == "Mail Delivery Subsystem"
638    and strings.ends_with(headers.message_id, "pphosted.com>")
639    and any(headers.hops,
640            .index == 0 and strings.contains(.received.server.raw, "pphosted.com")
641    )
642    and any(attachments, .content_type == "message/rfc822")
643  )
644  // negate CheckPoint encrypted messages
645  and not (
646      // CheckPoint banner
647      length(attachments) == 1
648      and any(body.links, .href_url.domain.root_domain == "checkpointcloudsec.com")
649      and strings.istarts_with(headers.message_id, "<encrypted")
650      and any(headers.domains, .root_domain == "checkpointcloudsec.com")
651  )
652  // an impersonated high trust domain 
653  and (
654    (
655      sender.email.domain.root_domain in $high_trust_sender_root_domains
656      and not headers.auth_summary.dmarc.pass
657    )
658  
659    // sender profile
660    or (
661      (
662        not sender.email.domain.root_domain in $org_domains
663        and (profile.by_sender_email().prevalence not in ("common"))
664        and not profile.by_sender_email().solicited
665      )
666      or (
667        profile.by_sender_email().any_messages_malicious_or_spam
668        and not profile.by_sender_email().any_messages_benign
669      )
670      // match if the sender is in org domains but failed auth
671      or (
672        sender.email.domain.domain in $org_domains
673        and not coalesce(headers.auth_summary.dmarc.pass, false)
674      )
675      // match if the sender address is blank or null
676      or (
677          regex.match(sender.email.email, "")
678          or sender.email.email is null
679      )
680    )
681  )  
682attack_types:
683  - "Credential Phishing"
684tactics_and_techniques:
685  - "Social engineering"
686detection_methods:
687  - "Content analysis"
688  - "Natural Language Understanding"
689  - "Sender analysis"
690  - "URL analysis"
691id: "74ba7787-e543-5ce8-b6eb-e1ecdb8f1d67"
to-top