Fake voicemail notification (untrusted sender)

This rule detects a common credential phishing vector enticing the user to engage with links under the premise that they have a voicemail to retrieve. The rule looks for voicemail verbiage in the display name, body, subject or a combination of those elements with emojis or a medium to high credential theft NLU Intent from first-time + unsolicited sender.

Sublime rule (View on GitHub)

  1name: "Fake voicemail notification (untrusted sender)"
  2description: |
  3  This rule detects a common credential phishing vector enticing the user to engage with links under the premise that they have a voicemail to retrieve.
  4  The rule looks for voicemail verbiage in the display name, body, subject or a combination of those elements with emojis or a medium to high credential theft NLU Intent from first-time + unsolicited sender.  
  5type: "rule"
  6severity: "medium"
  7source: |
  8  type.inbound
  9  // contains links or attachments
 10  and (
 11    (0 < length(body.links) <= 25 or 0 < length(distinct(attachments, .md5)) <= 3)
 12    and 0 <= length(distinct(attachments, .md5)) <= 8
 13  )
 14  
 15  // the subject or display_name need some keywords which are voicemail related
 16  and (
 17    any([subject.subject, sender.display_name],
 18        regex.icontains(.,
 19                        // split phrases that occur within 3 words between or only punctuation between them
 20                        '(?:v[nm](\b|[[:punct:]])?|\bvoice(?:mail|message)?|audi[o0]|incoming|missed(?:\sa\s)?|left( a)?|wireless)(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:mail|message|msg|recording|received|notif|support|ca[li1][li1]\d*\b|ca[il1][il1](?:er)?|log|transcript(?:ion)?)\b',
 21                        // regex specific to v-mail, v_msg, v,mail, etc
 22                        // list of "secondary" words synced with regex above this one
 23                        'v[[:punct:]](?:mail|message|msg|recording|received|notif|support|ca[li1][li1]\d*\b|ca[il1][il1](?:er)?|log|transcript(?:ion)?\b)',
 24                        // split phrases that start with "caller" that occur within 3 words between or only punctation 
 25                        'ca[li1][li1](?:er)?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[nm](\b|[[:punct:]])?|\bvoice(?:mail|message)?|audi[o0]|missed(?:\sa\s)?|left( a)?)',
 26                        // strong phrases
 27                        '(?:open mp3|audi[o0] note|\.wav|left a vm|[^\s]+voip[^\s]*|unanswered.*ca[li1][li1]|incoming.vm|left msg|wireless ca[li1][li1]er|VM Service|voice message|missed.ca[li1][li1](?:e[rd])?|\bca[li1][li1].(?:support|service)(?: for| log)?|missed.{0,10} VM|new voicemail from|new.v.m.from.\+?\d+|new voicemail?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}transcript(s|ion)?|message received|incoming transmission|voice note)',
 28                        // starts in the format of `(4)` and contains some voicemail keywords
 29                        '^\(\d\)\s(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:message|voip|voice|unread|call)',
 30                        'ca[li1][li1](?:er)?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:playback|transcript)',
 31  
 32                        // obfuscated phone number with at least one digit in the area code and at least one obfuscated number in the last group
 33                        // 555-555-555X, 555-555-XXXX, 555-5XX-XXXX
 34                        '\b1?\(?(\d{3}|\d{2}[\*X]|\d[\*X]{2})\)?[^a-z0-9]{0,2}(\d{2,3}|\d{2}[\*X]|\d[\*X]{2}|[\*X]{2,3})[^a-z0-9]{0,4}(\d{3}[\*X]|\d{2}[\*X]{2}|\d[\*X]{3}|[\*X]{3,4})[^0-9]',
 35                        // obfuscated phone number with at least one digit in the prefix
 36                        // XXX-555-5555, XXX-5XX-XXXX
 37                        '\b1?\(?(\d{2}[\*X]|\d[\*X]{2}|[\*X]{2,3})\)?[^a-z0-9]{0,2}(\d{2,3}|\d{2}[\*X]|\d[\*X]{2})[^a-z0-9]{0,4}(\d{4}|\d{3}[\*X]|\d{2}[\*X]{2}|\d[\*X]{3}|[\*X]{3,4})\b',
 38        )
 39    )
 40    // body.current_thread.text inspection should be very specific to avoid FP
 41    or regex.icontains(strings.replace_confusables(body.current_thread.text),
 42                       // body.current_thread.text,
 43                       'sent (?:from|by) (?:your )?voice (?:mail )?system',
 44                       '(?:new|this) (?:voice(?:mail)?|audi[o0]) (?:message|notification|record)',
 45                       'voicemail (is )?attached',
 46                       'an? (?:new )?encrypted voicemail',
 47                       'a (?:new )?pending message',
 48                       'Your? have (?: an?)?incoming voiceRec',
 49                       "you(?:\'ve| have) a (?:new )?missed ca[li1][li1]",
 50                       'New Voicemail Received',
 51                       'New missed ca[li1][li1] record',
 52                       '\bvoicemail transcript\b',
 53                       'Listen to VoiceMail',
 54                       'New voicemail from',
 55                       'voice note'
 56    )
 57    // pull out two regexes that could benefit from negations
 58    or (
 59      regex.icontains(body.current_thread.text,
 60                      // body.current_thread.text,
 61                      '(?:you|we) (?:have |received )+(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:\b|\s+)voice\s?(?:mail|audi[o0]|message|notification)',
 62                      'left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:voice(?:mail)?|audi[o0])(?: message|notification)?',
 63      )
 64      and not regex.icontains(body.current_thread.text,
 65                              '(?:I(?:\sjust)?|just(?: called you at (?:\d+[[:punct:]])+) and)? left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:voice(?:mail)?|audio)(?: message)?'
 66      )
 67      and not regex.icontains(body.current_thread.text,
 68                              'you (?:have |received )my voice\s?(?:mail|audio|message)'
 69      )
 70    )
 71    // Reuse the body.current_thread.text logic against the OCR output of the message screenshot
 72    or (
 73      length(attachments) > 0
 74      and (
 75        all(attachments, .file_type in $file_types_images)
 76        or (
 77          // there is a mix of fake audio attachments and images
 78          length(filter(filter(attachments,
 79                               strings.starts_with(.content_type, "audio")
 80                        ),
 81                        // confirm the content type with file.explode
 82                        // we have seen attachments claim to be audio/* files, only to be exploded as something else
 83                        any(file.explode(.),
 84                            not strings.starts_with(.flavors.mime, "audio")
 85                        )
 86                 )
 87          // the total # of fake audio attachments + the total # of image attachments = the total # of attachments
 88          // meaning, all attachments that are NOT fake audio attachments MUST be images
 89          ) + length(filter(attachments, .file_type in $file_types_images)) == length(attachments
 90          )
 91        )
 92      )
 93      and any((filter(file.explode(beta.message_screenshot()), .depth == 0)),
 94              regex.icontains(.scan.ocr.raw,
 95                              // body.current_thread.text,
 96                              'sent (?:from|by) (?:your )?voice (?:mail )?system',
 97                              'new (?:voice(?:mail)?|audio) (?:message|notification|record)',
 98                              'voicemail (is )?attached',
 99                              'an? (?:new )?encrypted voicemail',
100                              'a (?:new )?pending message',
101                              'Your? have (?: an?)?incoming voiceRec',
102                              "you(?:\'ve| have) a (?:new )?missed ca[li1][li1]",
103                              'New Voicemail Received',
104                              'New missed ca[li1][li1] record',
105                              'voicemail transcript(?:ion)?',
106                              'Listen to VoiceMail',
107                              'New voicemail from',
108                              'voice note'
109              )
110              or (
111                regex.icontains(.scan.ocr.raw,
112                                // body.current_thread.text,
113                                'you (?:have |received )*(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}\bvoice\s?(?:mail|audi[o0]|message)',
114                                'left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:voice(?:mail)?|audi[o0])(?: message)?',
115                )
116                and not regex.icontains(body.current_thread.text,
117                                        '(?:I(?:\sjust)?|just) left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:voice(?:mail)?|audio)(?: message)?'
118                )
119                and not regex.icontains(body.current_thread.text,
120                                        'you (?:have |received )my voice\s?(?:mail|audio|message)'
121                )
122              )
123      )
124    )
125    or strings.icontains(body.html.raw, '<title>Voicemail Notification</title>')
126    or strings.icontains(body.html.raw, '<!-- Voicemail phone logo')
127  )
128  and 2 of (
129    (
130      // the sender is a freemail
131      sender.email.domain.root_domain in $free_email_providers
132    ),
133    (
134      any(ml.nlu_classifier(body.current_thread.text).intents,
135          .name in ("cred_theft") and .confidence in ("medium", "high")
136      )
137      or 
138      // use the OCR from the message screenshot
139      any(filter(file.explode(beta.message_screenshot()), .depth == 0),
140          any(ml.nlu_classifier(.scan.ocr.raw).intents,
141              .name in ("cred_theft") and .confidence in ("medium", "high")
142          )
143      )
144    ),
145    (
146      any(attachments,
147          .content_type in ("html", "text", "text/html")
148          and any(ml.logo_detect(file.html_screenshot(.)).brands,
149                  .name in ("Microsoft") and .confidence in ("medium", "high")
150          )
151      )
152    ),
153    (
154      regex.icontains(sender.display_name,
155                      '(voice|audi[o0]|call|missed|caii)(\s?|-)(mail|message|recording|call|caii)|(transcription|Caller.?ID)'
156      )
157    ),
158    // attachment names are often HTML and voice mail related
159    (
160      any(attachments,
161          // this logic is reused below for eml attachments
162          // ensure updates occur both places
163          (
164            .content_type in ("html", "text", "text/html")
165            or .file_type in ("html", "unknown", "svg")
166            or .file_type == "pdf"
167          )
168          and (
169            regex.icontains(.file_name,
170                            '(?:voice|aud[i1l][o0]|call|missed|caii|mail|message|recording|call|caii|transcr[il1]ption|v[nm]|audi[o0]|play|listen|unheard|msg)',
171                            // contains a time
172                            // 01min , 60secs
173                            '0?[1-9]\s*min(?:(?:ute)?s)?',
174                            '\d{1,2}\s*s(?:ec(?:ond)?s)?',
175                            // (00:50s)
176                            // 3:26 seconds
177                            '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
178                            // 03min25secs
179                            '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
180                            // [0:39] 
181                            // (0:39) 
182                            '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s',
183                            // contains an emoji
184                            '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
185            )
186            // somtimes there is no name, it's just the extension which is also strange
187            or .file_name in~ (".htm", ".html")
188            // or sometimes it has no name....
189            or .file_name is null
190          )
191      )
192    ),
193    // attachment contains javascript
194    (
195      any(attachments,
196          (
197            .content_type in ("html", "text", "text/html")
198            or .file_type in ("html", "unknown", "svg")
199          )
200          and (
201            (
202              .size < 1500
203              and any(file.explode(.), length(.scan.html.scripts) > 0)
204            )
205            // bypass the size requirement under these conditions
206            or (
207              // sync with https://github.com/sublime-security/sublime-rules/blob/main/detection-rules/attachment_svg_embedded_js.yml
208              strings.ilike(file.parse_text(.,
209                                            encodings=[
210                                              "ascii",
211                                              "utf8",
212                                              "utf16-le"
213                                            ]
214                            ).text,
215                            "*onload*",
216                            "*window.location.href*",
217                            "*onerror*",
218                            "*CDATA*",
219                            "*<script*",
220                            "*</script*",
221                            "*atob*",
222                            "*location.assign*",
223                            "*decodeURIComponent*"
224              )
225            )
226          )
227      )
228    ),
229    (
230      any(attachments,
231          (
232            .content_type in ("html", "text", "text/html")
233            or .file_type in ("html", "unknown", "svg")
234          )
235          and any(recipients.to,
236                  // the html attachment contains a receipient email address
237                  strings.contains(file.parse_html(..).raw, .email.email)
238                  // the sld of the domain is in the attachment name
239                  or strings.contains(..file_name, .email.domain.sld)
240          )
241      )
242    ),
243    // eml attachments
244    (
245      any(filter(attachments, .content_type == "message/rfc822"),
246          // which contain attachments
247          // this is the same logic as above
248          any(file.parse_eml(.).attachments,
249              (
250                .content_type in ("html", "text", "text/html")
251                or .file_type in ("html", "unknown", "svg")
252                or .file_type == "pdf"
253              )
254              and (
255                regex.icontains(.file_name,
256                                '(?:voice|aud[il1][o0]|call|missed|caii|mail|message|recording|call|caii|transcr[il1]ption|v[nm]|audi[o0]|play|listen|unheard|msg)',
257                                // contains a time
258                                // 01min , 60secs
259                                '0?[1-9]\s*min(?:(?:ute)?s)?',
260                                '\d{1,2}\s*s(?:ec(?:ond)?s)?',
261                                // (00:50s)
262                                // 3:26 seconds
263                                '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
264                                // 03min25secs
265                                '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
266                                // [0:39] 
267                                // (0:39) 
268                                '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s',
269                                // contains an emoji
270                                '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
271                )
272                // somtimes there is no name, it's just the extension which is also strange
273                or .file_name in~ (".htm", ".html")
274                // or sometimes it has no name....
275                or .file_name is null
276              )
277          )
278      )
279    ),
280    // attached eml sender/recipeient/subject are all the same as the outer
281    // and have an attachment or body links
282    (
283      any(filter(attachments, .content_type == "message/rfc822"),
284          // which contain attachments
285          // this is the same logic as above
286          file.parse_eml(.).subject.subject == subject.subject
287          and file.parse_eml(.).sender.email.email == sender.email.email
288          and (
289            length(file.parse_eml(.).recipients.to) == length(recipients.to)
290            and all(recipients.to,
291                    .email.email in map(file.parse_eml(..).recipients.to,
292                                        .email.email
293                    )
294            )
295          )
296          and (
297            // there are attachments
298            length(file.parse_eml(.).attachments) > 0
299            // or body links
300            or length(filter(file.parse_eml(.).body.links,
301                             .href_url.domain.domain not in $org_domains
302                             and .href_url.domain.root_domain not in $org_domains
303                      )
304            ) > 0
305          )
306      )
307    ),
308    // the body links contain the recipients email
309    (
310      length(filter(recipients.to, .email.email != "" or .email.domain.valid)) > 0
311      and any(body.links,
312              any(recipients.to,
313                  strings.icontains(..href_url.url, .email.email)
314                  or strings.icontains(..href_url.url, .email.local_part)
315              )
316      )
317    ),
318    (
319      length(body.current_thread.text) < 700
320      and regex.icontains(body.current_thread.text,
321                          'Méssãge|Méssage|Recéived|Addréss'
322      )
323    ),
324    (
325      // sender domain matches no body domains
326      // only inspect "links" that have a display_text and display_url is null to remove "plain text" email address from being caught
327      length(filter(body.links,
328                    .display_text is not null
329                    and .display_url.url is null
330                    and .href_url.domain.valid
331             )
332      ) > 0
333      and all(filter(body.links,
334                     .display_text is not null
335                     and .display_url.url is null
336                     and .href_url.domain.valid
337              ),
338              .href_url.domain.root_domain != sender.email.domain.root_domain
339              and .href_url.domain.root_domain not in $org_domains
340              and .href_url.domain.root_domain not in ("aka.ms")
341              and .href_url.domain.root_domain not in (
342                "unitelvoice.com",
343                "googleapis.com",
344                "dialmycalls.com",
345                "ringcentral.biz",
346                "google.com"
347              )
348      )
349    ),
350    // the body links contain vm related phrases
351    (
352      any(body.links,
353          regex.contains(.display_text, '[^a-z]*[A-Z][^a-z]*')
354          and regex.icontains(.display_text,
355                              '(v[nm]|voice|audi[o0]|call|missed|caii)(\s?|-)(mail|message|recording|call|caii)|transcription|open mp3|audi[o0] note|listen|playback|\(?(?:\*\*\*|[0-9]{3})?.(?:\*\*\*|[0-9]{3})[^a-z]{0,2}(?:\*{4}|\d+\*+)|play|voice note'
356          )
357          // negate FP terms in link display texts
358          and not strings.icontains(.display_text, 'voice call center')
359      )
360    ),
361    (
362      any(body.links,
363          .href_url.path == "/ctt"
364          and regex.icontains(.display_text,
365                              '(v[nm]|voice|audi[o0]|call|missed|caii)(\s?|-)(mail|message|recording|call|caii)|transcription|open mp3|audi[o0] note|listen|playback|\(?(?:\*\*\*|[0-9]{3})?.(?:\*\*\*|[0-9]{3})[^a-z]{0,2}(?:\*{4}|\d+\*+)|play|voice note'
366          )
367          // negate FP terms in link display texts
368          and not strings.icontains(.display_text, 'voice call center')
369      )
370    ),
371    // new domains
372    (
373      any(body.links,
374          network.whois(.href_url.domain).days_old < 10
375          and not strings.icontains(.href_url.path, "unsubscribe")
376      )
377    ),
378    // sld use in sender/subject selements
379    (
380      any(recipients.to,
381          // recipient's SLD is in the sender's display name
382          strings.icontains(sender.display_name, .email.domain.sld)
383          // recipient's SLD is in the sender's display name
384          or strings.icontains(subject.subject, .email.domain.sld)
385          // recipient's SLD is in the senders local_part
386          or strings.icontains(sender.email.local_part, .email.domain.sld)
387      )
388    ),
389    // often times the subject or sender display name will contain time references
390    (
391      any([sender.display_name, subject.subject, body.current_thread.text],
392          regex.icontains(.,
393                          // 01min , 60secs
394                          '0?[1-9]\s*min(?:(?:ute)?s)?\b',
395                          '\d{1,2}\s*s(?:ec(?:ond)?s)?\b',
396                          // (00:50s)
397                          // 3:26 seconds
398                          '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
399                          // 03min25secs
400                          '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
401                          // [0:39] 
402                          // (0:39) 
403                          '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s'
404          )
405      )
406      // resuse the same logic against ORC output of message_screenshot
407      or any(filter(file.explode(beta.message_screenshot()), .depth == 0),
408             regex.icontains(.scan.ocr.raw,
409                             // 01min , 60secs
410                             '0?[1-9]\s*min(?:(?:ute)?s)?\b',
411                             '\d{1,2}\s*s(?:ec(?:ond)?s)?\b',
412                             // (00:50s)
413                             // 3:26 seconds
414                             '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
415                             // 03min25secs
416                             '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
417                             // [0:39] 
418                             // (0:39) 
419                             '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s'
420             )
421      )
422    ),
423    // often times the subject or sender display name will contain dates
424    (
425      any([sender.display_name, subject.subject],
426          // days of week
427          any([
428                'monday',
429                'tuesday',
430                'wednesday',
431                'thursday',
432                'friday',
433                'saturday',
434                'sunday'
435              ],
436              strings.icontains(.., .)
437          )
438          // months
439          // may is problematic for words like "Mayor", "Maybe", "MayFlower", etc
440          or any([
441                   "January",
442                   "February",
443                   "March",
444                   "April",
445                   "June",
446                   "July",
447                   "August",
448                   "September",
449                   "October",
450                   "November",
451                   "December"
452                 ],
453                 strings.icontains(.., .)
454          )
455          // use a regex for May
456          or regex.icontains(., '\bmay\b')
457          // common date formats
458          or regex.contains(.,
459                            // YYYY-MM-DD or YY-MM-DD (ISO 8601 format)
460                            '\d{2}(\d{2})?-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])',
461                            // MM/DD/YYYY or MM/DD/YY (US format)
462                            '(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/\d{2}(\d{2})?',
463                            // DD/MM/YYYY or DD/MM/YY (European format)
464                            '(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/\d{2}(\d{2})?',
465                            // Month DD, YYYY or Month DD, YY (e.g., March 15, 2024 or March 15, 24)
466                            '(January|February|March|April|May|June|July|August|September|October|November|December) (0[1-9]|[12]\d|3[01]), \d{2}(\d{2})?'
467          )
468          // common time formats
469          or regex.contains(.,
470                            // Example: 23:45, 08:30
471                            '([01]\d|2[0-3]):([0-5]\d)',
472                            // Example: 23:45:59, 08:30:12
473                            '([01]\d|2[0-3]):([0-5]\d):([0-5]\d)',
474                            // Example: 08:30 AM, 12:45 pm
475                            '(0[1-9]|1[0-2]):([0-5]\d)\s?([AaPp][Mm])',
476                            // Example: 08:30 AM, 12:45 pm
477                            '(0[1-9]|1[0-2]):([0-5]\d):([0-5]\d) ?([AaPp][Mm])'
478          )
479      )
480      // or use the OCR results from beta.message_screenshot
481      or any(filter(file.explode(beta.message_screenshot()), .depth == 0),
482             // days of week
483             any([
484                   'monday',
485                   'tuesday',
486                   'wednesday',
487                   'thursday',
488                   'friday',
489                   'saturday',
490                   'sunday'
491                 ],
492                 strings.icontains(..scan.ocr.raw, .)
493             )
494             // months
495             // may is problematic for words like "Mayor", "Maybe", "MayFlower", etc
496             or any([
497                      "January",
498                      "February",
499                      "March",
500                      "April",
501                      "June",
502                      "July",
503                      "August",
504                      "September",
505                      "October",
506                      "November",
507                      "December"
508                    ],
509                    strings.icontains(..scan.ocr.raw, .)
510             )
511             // use a regex for May
512             or regex.contains(.scan.ocr.raw, '\bMay\b')
513             // common date formats
514             or regex.contains(.scan.ocr.raw,
515                               // YYYY-MM-DD or YY-MM-DD (ISO 8601 format)
516                               '\d{2}(\d{2})?-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])',
517                               // MM/DD/YYYY or MM/DD/YY (US format)
518                               '(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/\d{2}(\d{2})?',
519                               // DD/MM/YYYY or DD/MM/YY (European format)
520                               '(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/\d{2}(\d{2})?',
521                               // Month DD, YYYY or Month DD, YY (e.g., March 15, 2024 or March 15, 24)
522                               '(January|February|March|April|May|June|July|August|September|October|November|December) (0[1-9]|[12]\d|3[01]), \d{2}(\d{2})?'
523             )
524             // common time formats
525             or regex.contains(.scan.ocr.raw,
526                               // Example: 23:45, 08:30
527                               '([01]\d|2[0-3]):([0-5]\d)',
528                               // Example: 23:45:59, 08:30:12
529                               '([01]\d|2[0-3]):([0-5]\d):([0-5]\d)',
530                               // Example: 08:30 AM, 12:45 pm
531                               '(0[1-9]|1[0-2]):([0-5]\d)\s?([AaPp][Mm])',
532                               // Example: 08:30 AM, 12:45 pm
533                               '(0[1-9]|1[0-2]):([0-5]\d):([0-5]\d) ?([AaPp][Mm])'
534             )
535      )
536    ),
537    // there are often emoji in the sender display name
538    (
539      any([sender.display_name, subject.subject],
540          // contains an emoji
541          regex.contains(.,
542                         '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
543          )
544          // negate where the emoji occur in tags
545          and not regex.contains(.,
546                                 '^(?:\[[^\]]*\]\s*)*\[[^\]]*[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}][^\]]*\]'
547          )
548      )
549    ),
550    // an attachment is a pdf, image, or document that contains a url
551    (
552      1 <= length(attachments) <= 2
553      and any(attachments,
554              (
555                .file_type in $file_types_images
556                or .file_type == "pdf"
557                or .file_extension in $file_extensions_macros
558              )
559              and any(file.explode(.),
560                      .scan.qr.type == "url"
561                      or strings.icontains(.scan.qr.data, 'http')
562                      or any(recipients.to,
563                             strings.icontains(..scan.qr.data, .email.local_part)
564                             or strings.icontains(..scan.qr.data, .email.email)
565                      )
566              )
567      )
568    )
569  )
570  
571  // negating legit replies and legitimate audio file attachments and known voicemail senders
572  and not (
573    sender.email.domain.valid
574    and sender.email.domain.root_domain in (
575      "magicjack.com",
576      "unitelvoice.com",
577      "voipinterface.net",
578      "ringcentral.biz",
579      "verizonwireless.com",
580      "t-mobile.com",
581      "justcall.io",
582      "airtel.com"
583    )
584  )
585  and not (
586    any(filter(attachments, strings.starts_with(.content_type, "audio")),
587        // confirm the content type with file.explode
588        // we have seen attachments claim to be audio/* files, only to be exploded as something else
589        any(file.explode(.), strings.starts_with(.flavors.mime, "audio"))
590    )
591  )
592  and not (
593    (
594      strings.istarts_with(subject.subject, "RE:")
595      // out of office auto-reply
596      // the NLU model will handle these better natively soon
597      or strings.istarts_with(subject.subject, "Automatic reply:")
598    )
599    and (
600      length(headers.references) > 0
601      or any(headers.hops, any(.fields, strings.ilike(.name, "In-Reply-To")))
602    )
603  )
604  // negate highly trusted sender domains unless they fail DMARC authentication
605  and (
606    (
607      sender.email.domain.root_domain in $high_trust_sender_root_domains
608      and not headers.auth_summary.dmarc.pass
609    )
610    or sender.email.domain.root_domain not in $high_trust_sender_root_domains
611  )
612  // bounce-back negations
613  and not any(attachments,
614              any(file.parse_eml(.).attachments,
615                  .content_type == "message/delivery-status"
616              )
617  )
618  // bounce-back negations
619  and not (
620    any(attachments,
621        .content_type in ("message/delivery-status", "text/calendar")
622    )
623  )
624  // negate newsletters 
625  and not any(ml.nlu_classifier(body.current_thread.text).topics,
626              .name == "Newsletters and Digests" and .confidence == "high"
627  )
628  // negate bouncebacks from proofpoint
629  and not (
630    sender.display_name == "Mail Delivery Subsystem"
631    and strings.ends_with(headers.message_id, "pphosted.com>")
632    and any(headers.hops,
633            .index == 0 and strings.contains(.received.server.raw, "pphosted.com")
634    )
635    and any(attachments, .content_type == "message/rfc822")
636  )
637  // an impersonated high trust domain 
638  and (
639    (
640      sender.email.domain.root_domain in $high_trust_sender_root_domains
641      and not headers.auth_summary.dmarc.pass
642    )
643  
644    // sender profile
645    or (
646      (
647        not sender.email.domain.root_domain in $org_domains
648        and (profile.by_sender().prevalence not in ("common"))
649        and not profile.by_sender().solicited
650      )
651      or (
652        profile.by_sender().any_messages_malicious_or_spam
653        and not profile.by_sender().any_messages_benign
654      )
655      // match if the sender is in org domains but failed auth
656      or (
657        sender.email.domain.domain in $org_domains
658        and not coalesce(headers.auth_summary.dmarc.pass, false)
659      )
660    )
661  )  
662
663attack_types:
664  - "Credential Phishing"
665tactics_and_techniques:
666  - "Social engineering"
667detection_methods:
668  - "Content analysis"
669  - "Natural Language Understanding"
670  - "Sender analysis"
671  - "URL analysis"
672id: "74ba7787-e543-5ce8-b6eb-e1ecdb8f1d67"
to-top