Fake voicemail notification (untrusted sender)
This rule detects a common credential phishing vector enticing the user to engage with links under the premise that they have a voicemail to retrieve. The rule looks for voicemail verbiage in the display name, body, subject or a combination of those elements with emojis or a medium to high credential theft NLU Intent from first-time + unsolicited sender.
Sublime rule (View on GitHub)
1name: "Fake voicemail notification (untrusted sender)"
2description: |
3 This rule detects a common credential phishing vector enticing the user to engage with links under the premise that they have a voicemail to retrieve.
4 The rule looks for voicemail verbiage in the display name, body, subject or a combination of those elements with emojis or a medium to high credential theft NLU Intent from first-time + unsolicited sender.
5type: "rule"
6severity: "medium"
7source: |
8 type.inbound
9 // contains links or attachments
10 and (
11 (0 < length(body.links) <= 25 or 0 < length(distinct(attachments, .md5)) <= 3)
12 and 0 <= length(distinct(attachments, .md5)) <= 8
13 )
14
15 // the subject or display_name need some keywords which are voicemail related
16 and (
17 any([subject.subject, sender.display_name],
18 regex.icontains(.,
19 // split phrases that occur within 3 words between or only punctuation between them
20 '(?:v[nm](\b|[[:punct:]])?|\bv[o0][il1]ce(?:mail|message)?|audi[o0]|incoming|missed(?:\sa\s)?|left( a)?|wireless)(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:mail|message|msg|recording|received|notif|support|ca[li1][li1]\d*\b|ca[il1][il1](?:er)?|log|transcript(?:ion)?)\b',
21 // regex specific to v-mail, v_msg, v,mail, etc
22 // list of "secondary" words synced with regex above this one
23 'v[[:punct:]](?:mail|message|msg|recording|received|notif|support|ca[li1][li1]\d*\b|ca[il1][il1](?:er)?|log|transcript(?:ion)?\b)',
24 // split phrases that start with "caller" that occur within 3 words between or only punctation
25 'ca[li1][li1](?:er)?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[nm](\b|[[:punct:]])?|\bv[o0][il1]ce(?:mail|message)?|audi[o0]|missed(?:\sa\s)?|left( a)?)',
26 // strong phrases
27 '(?:open mp3|audi[o0] note|\.wav|left a vm|[^\s]+voip[^\s]*|unanswered.*ca[li1][li1]|incoming.vm|left msg|wireless ca[li1][li1]er|VM Service|v[o0][il1]ce message|missed.ca[li1][li1](?:e[rd])?|\bca[li1][li1].(?:support|service)(?: for| log)?|missed.{0,10} VM|new v[o0][il1]cemail from|new.v.m.from.\+?\d+|new v[o0][il1]cemail?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}transcript(s|ion)?|message received|new (?:message|call|voicemail).{0,15}(?:info|notification|alert)|incoming transmission|voice note)',
28 // starts in the format of `(4)` and contains some voicemail keywords
29 '^\(\d\)\s(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:message|voip|v[o0][il1]ce|unread|call)',
30 'ca[li1][li1](?:er)?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:playback|transcript)',
31
32 // obfuscated phone number with at least one digit in the area code and at least one obfuscated number in the last group
33 // 555-555-555X, 555-555-XXXX, 555-5XX-XXXX
34 '\b1?\(?(\d{3}|\d{2}[\*X]|\d[\*X]{2})\)?[^a-z0-9]{0,2}(\d{2,3}|\d{2}[\*X]|\d[\*X]{2}|[\*X]{2,3})[^a-z0-9]{0,4}(\d{3}[\*X]|\d{2}[\*X]{2}|\d[\*X]{3}|[\*X]{3,4})[^0-9]',
35 // obfuscated phone number with at least one digit in the prefix
36 // XXX-555-5555, XXX-5XX-XXXX
37 '\b1?\(?(\d{2}[\*X]|\d[\*X]{2}|[\*X]{2,3})\)?[^a-z0-9]{0,2}(\d{2,3}|\d{2}[\*X]|\d[\*X]{2})[^a-z0-9]{0,4}(\d{4}|\d{3}[\*X]|\d{2}[\*X]{2}|\d[\*X]{3}|[\*X]{3,4})\b',
38 // obfuscated voicemail/voicemessage keywords
39 'v[o0][il1]ce[\s\-_]?m(?:ail|sg|essage)?[\*X\.\-_]{2,}',
40 'v[o0][il1]cem[\*X\.\-_]{2,}',
41 // "X new voice..." patterns
42 '\d+\s+new.*v[o0][il1]ce(?:mail|message|m[\*]+)?',
43 // sent-message patterns
44 '(?:sent|new|incoming)[\s\-]+message.*v[o0][il1]ce',
45 )
46 )
47 // body.current_thread.text inspection should be very specific to avoid FP
48 or regex.icontains(strings.replace_confusables(body.current_thread.text),
49 // body.current_thread.text,
50 'sent (?:from|by) (?:your )?v[o0][il1]ce (?:ma[il1][li1] )?system',
51 '(?:(?:new|this) |^)(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0]) (?:message|notification|record)',
52 'v[o0][il1]cema[il1][li1] (is )?attached',
53 'an? (?:new )?encrypted v[o0][il1]cemail',
54 'a (?:new )?pending message',
55 'Your? have (?: an?)?incoming v[o0][il1]ceRec',
56 "you(?:\'ve| have) a (?:new )?m[il1]ssed ca[li1][li1]",
57 'New V[o0][il1]cema[il1][li1] Received',
58 'New m[il1]ssed ca[li1][li1] record',
59 '\bv[o0][il1]cema[il1][li1] transcript\b',
60 'Listen to V[o0][il1]ceMa[il1][li1]',
61 'New v[o0][il1]cema[il1][li1] from',
62 'v[o0][il1]ce note'
63 )
64 // pull out two regexes that could benefit from negations
65 or (
66 regex.icontains(body.current_thread.text,
67 // body.current_thread.text,
68 '(?:you|we) (?:have |received )+(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:\b|\s+)v[o0][il1]ce\s?(?:ma[il1][li1]|aud[il1][o0]|message|notification)',
69 'left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0])(?: message|notification)?',
70 )
71 and not regex.icontains(body.current_thread.text,
72 '(?:I(?:\sjust)?|just(?: called you at (?:\d+[[:punct:]])+) and)? left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:mail)?|audio)(?: message)?'
73 )
74 and not regex.icontains(body.current_thread.text,
75 'you (?:have |received )my voice\s?(?:mail|audio|message)'
76 )
77 )
78 // Reuse the body.current_thread.text logic against the OCR output of the message screenshot
79 or (
80 length(attachments) > 0
81 and (
82 all(attachments,
83 .file_type in $file_types_images
84 and beta.parse_exif(.).image_height != 1
85 )
86 or (
87 // there is a mix of fake audio attachments and images
88 length(filter(filter(attachments,
89 strings.starts_with(.content_type, "audio")
90 ),
91 // confirm the content type with file.explode
92 // we have seen attachments claim to be audio/* files, only to be exploded as something else
93 any(file.explode(.),
94 not strings.starts_with(.flavors.mime, "audio")
95 )
96 )
97 // the total # of fake audio attachments + the total # of image attachments = the total # of attachments
98 // meaning, all attachments that are NOT fake audio attachments MUST be images
99 ) + length(filter(attachments,
100 .file_type in $file_types_images
101 and beta.parse_exif(.).image_height != 1
102 )
103 ) == length(attachments)
104 )
105 )
106 and any((filter(file.explode(file.message_screenshot()), .depth == 0)),
107 regex.icontains(.scan.ocr.raw,
108 // body.current_thread.text,
109 'sent (?:from|by) (?:your )?v[o0][il1]ce (?:ma[il1][li1] )?system',
110 '(?:(?:new|this) |^)(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0]) (?:message|notification|record)',
111 'v[o0][il1]cema[il1][li1] (is )?attached',
112 'an? (?:new )?encrypted v[o0][il1]cemail',
113 'a (?:new )?pending message',
114 'Your? have (?: an?)?incoming v[o0][il1]ceRec',
115 "you(?:\'ve| have) a (?:new )?m[il1]ssed ca[li1][li1]",
116 'New V[o0][il1]cema[il1][li1] Received',
117 'New m[il1]ssed ca[li1][li1] record',
118 '\bv[o0][il1]cema[il1][li1] transcript\b',
119 'Listen to V[o0][il1]ceMa[il1][li1]',
120 'New v[o0][il1]cema[il1][li1] from',
121 'v[o0][il1]ce note'
122 )
123 or (
124 regex.icontains(.scan.ocr.raw,
125 // body.current_thread.text,
126 '(?:you|we) (?:have |received )+(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:\b|\s+)v[o0][il1]ce\s?(?:ma[il1][li1]|aud[il1][o0]|message|notification)',
127 'left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:mail)?|audi[o0])(?: message)?',
128 )
129 and not regex.icontains(body.current_thread.text,
130 '(?:I(?:\sjust)?|just) left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:voice(?:mail)?|audio)(?: message)?'
131 )
132 and not regex.icontains(body.current_thread.text,
133 'you (?:have |received )my voice\s?(?:mail|audio|message)'
134 )
135 )
136 )
137 )
138 or strings.icontains(body.html.raw, '<title>Voicemail Notification</title>')
139 or strings.icontains(body.html.raw, '<!-- Voicemail phone logo')
140 )
141 and 2 of (
142 (
143 // the sender is a freemail
144 sender.email.domain.root_domain in $free_email_providers
145 ),
146 (
147 any(ml.nlu_classifier(body.current_thread.text).intents,
148 .name in ("cred_theft") and .confidence in ("medium", "high")
149 )
150 or
151 // use the OCR from the message screenshot
152 any(filter(file.explode(file.message_screenshot()), .depth == 0),
153 any(ml.nlu_classifier(.scan.ocr.raw).intents,
154 .name in ("cred_theft") and .confidence in ("medium", "high")
155 )
156 )
157 ),
158 (
159 any(attachments,
160 .content_type in ("html", "text", "text/html")
161 and any(ml.logo_detect(file.html_screenshot(.)).brands,
162 .name in ("Microsoft") and .confidence in ("medium", "high")
163 )
164 )
165 ),
166 (
167 regex.icontains(sender.display_name,
168 '(v[o0][il1]ce|audi[o0]|call|missed|caii)(\s?|-)(mail|message|recording|call|caii)|(transcription|Caller.?ID)'
169 )
170 ),
171 // attachment names are often HTML and voice mail related
172 (
173 any(attachments,
174 // this logic is reused below for eml attachments
175 // ensure updates occur both places
176 (
177 .content_type in ("html", "text", "text/html")
178 or .file_type in ("html", "unknown", "svg")
179 or .file_type == "pdf"
180 )
181 and (
182 regex.icontains(.file_name,
183 '(?:v[o0][il1]ce|aud[i1l][o0]|call|missed|caii|mail|message|recording|call|caii|transcr[il1]ption|v[nm]|audi[o0]|play|listen|unheard|msg)',
184 // contains a time
185 // 01min , 60secs
186 '0?[1-9]\s*min(?:(?:ute)?s)?',
187 '\d{1,2}\s*s(?:ec(?:ond)?s)?',
188 // (00:50s)
189 // 3:26 seconds
190 '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
191 // 03min25secs
192 '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
193 // [0:39]
194 // (0:39)
195 '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s',
196 // contains an emoji
197 '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
198 )
199 // somtimes there is no name, it's just the extension which is also strange
200 or .file_name in~ (".htm", ".html")
201 // or sometimes it has no name....
202 or .file_name is null
203 )
204 )
205 ),
206 // attachment contains javascript
207 (
208 any(attachments,
209 (
210 .content_type in ("html", "text", "text/html")
211 or .file_type in ("html", "unknown", "svg")
212 )
213 and (
214 (
215 .size < 1500
216 and any(file.explode(.), length(.scan.html.scripts) > 0)
217 )
218 // bypass the size requirement under these conditions
219 or (
220 // sync with https://github.com/sublime-security/sublime-rules/blob/main/detection-rules/attachment_svg_embedded_js.yml
221 strings.ilike(file.parse_text(.,
222 encodings=[
223 "ascii",
224 "utf8",
225 "utf16-le"
226 ]
227 ).text,
228 "*onload*",
229 "*window.location.href*",
230 "*onerror*",
231 "*CDATA*",
232 "*<script*",
233 "*</script*",
234 "*atob*",
235 "*location.assign*",
236 "*decodeURIComponent*"
237 )
238 )
239 )
240 )
241 ),
242 (
243 any(attachments,
244 (
245 .content_type in ("html", "text", "text/html")
246 or .file_type in ("html", "unknown", "svg")
247 )
248 and any(recipients.to,
249 // the html attachment contains a receipient email address
250 strings.contains(file.parse_html(..).raw, .email.email)
251 // the sld of the domain is in the attachment name
252 or strings.contains(..file_name, .email.domain.sld)
253 )
254 )
255 ),
256 // eml attachments
257 (
258 any(filter(attachments,
259 .content_type == "message/rfc822" or .file_extension in ('eml')
260 ),
261 // which contain attachments
262 // this is the same logic as above
263 any(file.parse_eml(.).attachments,
264 (
265 .content_type in ("html", "text", "text/html")
266 or .file_type in ("html", "unknown", "svg")
267 or .file_type == "pdf"
268 )
269 and (
270 regex.icontains(.file_name,
271 '(?:v[o0][il1]ce|aud[il1][o0]|call|missed|caii|mail|message|recording|call|caii|transcr[il1]ption|v[nm]|audi[o0]|play|listen|unheard|msg)',
272 // contains a time
273 // 01min , 60secs
274 '0?[1-9]\s*min(?:(?:ute)?s)?',
275 '\d{1,2}\s*s(?:ec(?:ond)?s)?',
276 // (00:50s)
277 // 3:26 seconds
278 '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
279 // 03min25secs
280 '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
281 // [0:39]
282 // (0:39)
283 '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s',
284 // contains an emoji
285 '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
286 )
287 // somtimes there is no name, it's just the extension which is also strange
288 or .file_name in~ (".htm", ".html")
289 // or sometimes it has no name....
290 or .file_name is null
291 )
292 )
293 )
294 ),
295 // attached eml sender/recipeient/subject are all the same as the outer
296 // and have an attachment or body links
297 (
298 any(filter(attachments,
299 .content_type == "message/rfc822" or .file_extension in ('eml')
300 ),
301 // which contain attachments
302 // this is the same logic as above
303 file.parse_eml(.).subject.subject == subject.subject
304 and file.parse_eml(.).sender.email.email == sender.email.email
305 and (
306 length(file.parse_eml(.).recipients.to) == length(recipients.to)
307 and all(recipients.to,
308 .email.email in map(file.parse_eml(..).recipients.to,
309 .email.email
310 )
311 )
312 )
313 and (
314 // there are attachments
315 length(file.parse_eml(.).attachments) > 0
316 // or body links
317 or length(filter(file.parse_eml(.).body.links,
318 .href_url.domain.domain not in $org_domains
319 and .href_url.domain.root_domain not in $org_domains
320 )
321 ) > 0
322 )
323 )
324 ),
325 // the body links contain the recipients email
326 (
327 length(filter(recipients.to, .email.email != "" or .email.domain.valid)) > 0
328 and any(body.links,
329 any(recipients.to,
330 strings.icontains(..href_url.url, .email.email)
331 or strings.icontains(..href_url.url, .email.local_part)
332 )
333 )
334 ),
335 (
336 length(body.current_thread.text) < 700
337 and regex.icontains(body.current_thread.text,
338 'Méssãge|Méssage|Recéived|Addréss'
339 )
340 ),
341 (
342 // sender domain matches no body domains
343 // only inspect "links" that have a display_text and display_url is null to remove "plain text" email address from being caught
344 length(filter(body.links,
345 .display_text is not null
346 and .display_url.url is null
347 and .href_url.domain.valid
348 )
349 ) > 0
350 and all(filter(body.links,
351 .display_text is not null
352 and .display_url.url is null
353 and .href_url.domain.valid
354 ),
355 .href_url.domain.root_domain != sender.email.domain.root_domain
356 and .href_url.domain.root_domain not in $org_domains
357 and .href_url.domain.root_domain not in ("aka.ms")
358 and .href_url.domain.root_domain not in (
359 "unitelvoice.com",
360 "googleapis.com",
361 "dialmycalls.com",
362 "ringcentral.biz",
363 "google.com"
364 )
365 )
366 ),
367 // the body links contain vm related phrases
368 (
369 any(body.links,
370 regex.contains(.display_text, '[^a-z]*[A-Z][^a-z]*')
371 and regex.icontains(.display_text,
372 '(?:v[nm]|v[o0][il1]ce|audi[o0]|ca[li][li]|missed|preview)(\s?|-)(mail|message|recording|call|caii)|transcription|open mp3|audi[o0] note|listen|playback|\(?(?:\*\*\*|[0-9]{3})?.(?:\*\*\*|[0-9]{3})[^a-z]{0,2}(?:\*{4}|\d+\*+)|play|voice note'
373 )
374 // negate FP terms in link display texts
375 and not strings.icontains(.display_text, 'voice call center')
376 )
377 ),
378 (
379 any(body.links,
380 .href_url.path == "/ctt"
381 and regex.icontains(.display_text,
382 '(v[nm]|v[o0][il1]ce|audi[o0]|call|missed|caii)(\s?|-)(mail|message|recording|call|caii)|transcription|open mp3|audi[o0] note|listen|playback|\(?(?:\*\*\*|[0-9]{3})?.(?:\*\*\*|[0-9]{3})[^a-z]{0,2}(?:\*{4}|\d+\*+)|play|voice note'
383 )
384 // negate FP terms in link display texts
385 and not strings.icontains(.display_text, 'voice call center')
386 )
387 ),
388 // new domains
389 (
390 any(body.links,
391 network.whois(.href_url.domain).days_old < 10
392 and not strings.icontains(.href_url.path, "unsubscribe")
393 )
394 ),
395 // sld use in sender/subject selements
396 (
397 any(recipients.to,
398 // recipient's SLD is in the sender's display name
399 strings.icontains(sender.display_name, .email.domain.sld)
400 // recipient's SLD is in the sender's display name
401 or strings.icontains(subject.subject, .email.domain.sld)
402 // recipient's SLD is in the senders local_part
403 or strings.icontains(sender.email.local_part, .email.domain.sld)
404 )
405 ),
406 // often times the subject or sender display name will contain time references
407 (
408 any([sender.display_name, subject.subject, body.current_thread.text],
409 regex.icontains(.,
410 // 01min , 60secs
411 '0?[1-9]\s*min(?:(?:ute)?s)?\b',
412 '\d{1,2}\s*s(?:ec(?:ond)?s)?\b',
413 // (00:50s)
414 // 3:26 seconds
415 '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
416 // 03min25secs
417 '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
418 // [0:39]
419 // (0:39)
420 '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s'
421 )
422 )
423 // resuse the same logic against ORC output of message_screenshot
424 or any(filter(file.explode(file.message_screenshot()), .depth == 0),
425 regex.icontains(.scan.ocr.raw,
426 // 01min , 60secs
427 '0?[1-9]\s*min(?:(?:ute)?s)?\b',
428 '\d{1,2}\s*s(?:ec(?:ond)?s)?\b',
429 // (00:50s)
430 // 3:26 seconds
431 '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
432 // 03min25secs
433 '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
434 // [0:39]
435 // (0:39)
436 '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s'
437 )
438 )
439 ),
440 // often times the subject or sender display name will contain dates
441 (
442 any([sender.display_name, subject.subject],
443 // days of week
444 any([
445 'monday',
446 'tuesday',
447 'wednesday',
448 'thursday',
449 'friday',
450 'saturday',
451 'sunday'
452 ],
453 strings.icontains(.., .)
454 )
455 // months
456 // may is problematic for words like "Mayor", "Maybe", "MayFlower", etc
457 or any([
458 "January",
459 "February",
460 "March",
461 "April",
462 "June",
463 "July",
464 "August",
465 "September",
466 "October",
467 "November",
468 "December"
469 ],
470 strings.icontains(.., .)
471 )
472 // use a regex for May
473 or regex.icontains(., '\bmay\b')
474 // common date formats
475 or regex.contains(.,
476 // YYYY-MM-DD or YY-MM-DD (ISO 8601 format)
477 '\d{2}(\d{2})?-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])',
478 // MM/DD/YYYY or MM/DD/YY (US format)
479 '(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/\d{2}(\d{2})?',
480 // DD/MM/YYYY or DD/MM/YY (European format)
481 '(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/\d{2}(\d{2})?',
482 // Month DD, YYYY or Month DD, YY (e.g., March 15, 2024 or March 15, 24)
483 '(January|February|March|April|May|June|July|August|September|October|November|December) (0[1-9]|[12]\d|3[01]), \d{2}(\d{2})?'
484 )
485 // common time formats
486 or regex.contains(.,
487 // Example: 23:45, 08:30
488 '([01]\d|2[0-3]):([0-5]\d)',
489 // Example: 23:45:59, 08:30:12
490 '([01]\d|2[0-3]):([0-5]\d):([0-5]\d)',
491 // Example: 08:30 AM, 12:45 pm
492 '(0[1-9]|1[0-2]):([0-5]\d)\s?([AaPp][Mm])',
493 // Example: 08:30 AM, 12:45 pm
494 '(0[1-9]|1[0-2]):([0-5]\d):([0-5]\d) ?([AaPp][Mm])'
495 )
496 )
497 // or use the OCR results from file.message_screenshot
498 or any(filter(file.explode(file.message_screenshot()), .depth == 0),
499 // days of week
500 any([
501 'monday',
502 'tuesday',
503 'wednesday',
504 'thursday',
505 'friday',
506 'saturday',
507 'sunday'
508 ],
509 strings.icontains(..scan.ocr.raw, .)
510 )
511 // months
512 // may is problematic for words like "Mayor", "Maybe", "MayFlower", etc
513 or any([
514 "January",
515 "February",
516 "March",
517 "April",
518 "June",
519 "July",
520 "August",
521 "September",
522 "October",
523 "November",
524 "December"
525 ],
526 strings.icontains(..scan.ocr.raw, .)
527 )
528 // use a regex for May
529 or regex.contains(.scan.ocr.raw, '\bMay\b')
530 // common date formats
531 or regex.contains(.scan.ocr.raw,
532 // YYYY-MM-DD or YY-MM-DD (ISO 8601 format)
533 '\d{2}(\d{2})?-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])',
534 // MM/DD/YYYY or MM/DD/YY (US format)
535 '(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/\d{2}(\d{2})?',
536 // DD/MM/YYYY or DD/MM/YY (European format)
537 '(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/\d{2}(\d{2})?',
538 // Month DD, YYYY or Month DD, YY (e.g., March 15, 2024 or March 15, 24)
539 '(January|February|March|April|May|June|July|August|September|October|November|December) (0[1-9]|[12]\d|3[01]), \d{2}(\d{2})?'
540 )
541 // common time formats
542 or regex.contains(.scan.ocr.raw,
543 // Example: 23:45, 08:30
544 '([01]\d|2[0-3]):([0-5]\d)',
545 // Example: 23:45:59, 08:30:12
546 '([01]\d|2[0-3]):([0-5]\d):([0-5]\d)',
547 // Example: 08:30 AM, 12:45 pm
548 '(0[1-9]|1[0-2]):([0-5]\d)\s?([AaPp][Mm])',
549 // Example: 08:30 AM, 12:45 pm
550 '(0[1-9]|1[0-2]):([0-5]\d):([0-5]\d) ?([AaPp][Mm])'
551 )
552 )
553 ),
554 // there are often emoji in the sender display name
555 (
556 any([sender.display_name, subject.subject],
557 // contains an emoji
558 regex.contains(.,
559 '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
560 )
561 // negate where the emoji occur in tags
562 and not regex.contains(.,
563 '^(?:\[[^\]]*\]\s*)*\[[^\]]*[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}][^\]]*\]'
564 )
565 )
566 ),
567 // an attachment is a pdf, image, or document that contains a url
568 (
569 1 <= length(attachments) <= 2
570 and any(attachments,
571 (
572 .file_type in $file_types_images
573 or .file_type == "pdf"
574 or .file_extension in $file_extensions_macros
575 )
576 and any(file.explode(.),
577 .scan.qr.type == "url"
578 or strings.icontains(.scan.qr.data, 'http')
579 or any(recipients.to,
580 strings.icontains(..scan.qr.data, .email.local_part)
581 or strings.icontains(..scan.qr.data, .email.email)
582 )
583 )
584 )
585 )
586 )
587
588 // negating legit replies and legitimate audio file attachments and known voicemail senders
589 and not (
590 sender.email.domain.valid
591 and sender.email.domain.root_domain in (
592 "magicjack.com",
593 "unitelvoice.com",
594 "voipinterface.net",
595 "ringcentral.biz",
596 "verizonwireless.com",
597 "t-mobile.com",
598 "justcall.io",
599 "airtel.com"
600 )
601 )
602 and not (
603 any(filter(attachments, strings.starts_with(.content_type, "audio")),
604 // confirm the content type with file.explode
605 // we have seen attachments claim to be audio/* files, only to be exploded as something else
606 any(file.explode(.), strings.starts_with(.flavors.mime, "audio"))
607 )
608 )
609 and not (
610 (
611 strings.istarts_with(subject.subject, "RE:")
612 // out of office auto-reply
613 // the NLU model will handle these better natively soon
614 or strings.istarts_with(subject.subject, "Automatic reply:")
615 )
616 and (
617 length(headers.references) > 0
618 or any(headers.hops, any(.fields, strings.ilike(.name, "In-Reply-To")))
619 )
620 )
621 // negate highly trusted sender domains unless they fail DMARC authentication
622 and (
623 (
624 sender.email.domain.root_domain in $high_trust_sender_root_domains
625 and not headers.auth_summary.dmarc.pass
626 )
627 or sender.email.domain.root_domain not in $high_trust_sender_root_domains
628 )
629 // bounce-back negations
630 and not any(attachments,
631 any(file.parse_eml(.).attachments,
632 .content_type == "message/delivery-status"
633 )
634 )
635 // bounce-back negations
636 and not (
637 any(attachments,
638 .content_type in ("message/delivery-status", "text/calendar")
639 )
640 )
641 // negate newsletters
642 and not (
643 (
644 any(ml.nlu_classifier(body.current_thread.text).topics,
645 .name in (
646 "Newsletters and Digests",
647 "B2B Cold Outreach",
648 "Events and Webinars"
649 )
650 and .confidence == "high"
651 )
652 )
653 and not strings.icontains(body.current_thread.text,
654 "this voicemail was shared by"
655 )
656 )
657 // negate bouncebacks from proofpoint
658 and not (
659 sender.display_name == "Mail Delivery Subsystem"
660 and strings.ends_with(headers.message_id, "pphosted.com>")
661 and any(headers.hops,
662 .index == 0 and strings.contains(.received.server.raw, "pphosted.com")
663 )
664 and any(attachments, .content_type == "message/rfc822")
665 )
666 // negate CheckPoint encrypted messages
667 and not (
668 // CheckPoint banner
669 length(attachments) == 1
670 and any(body.links, .href_url.domain.root_domain == "checkpointcloudsec.com")
671 and strings.istarts_with(headers.message_id, "<encrypted")
672 and any(headers.domains, .root_domain == "checkpointcloudsec.com")
673 )
674 // an impersonated high trust domain
675 and (
676 (
677 sender.email.domain.root_domain in $high_trust_sender_root_domains
678 and not headers.auth_summary.dmarc.pass
679 )
680
681 // sender profile
682 or (
683 (
684 not sender.email.domain.root_domain in $org_domains
685 and (profile.by_sender_email().prevalence not in ("common"))
686 and not profile.by_sender_email().solicited
687 )
688 or (
689 profile.by_sender_email().any_messages_malicious_or_spam
690 and not profile.by_sender_email().any_messages_benign
691 )
692 // match if the sender is in org domains but failed auth
693 or (
694 sender.email.domain.domain in $org_domains
695 and not coalesce(headers.auth_summary.dmarc.pass, false)
696 )
697 // match if the sender address is blank or null
698 or (regex.match(sender.email.email, "") or sender.email.email is null)
699 )
700 )
701attack_types:
702 - "Credential Phishing"
703tactics_and_techniques:
704 - "Social engineering"
705detection_methods:
706 - "Content analysis"
707 - "Natural Language Understanding"
708 - "Sender analysis"
709 - "URL analysis"
710id: "74ba7787-e543-5ce8-b6eb-e1ecdb8f1d67"