Fake voicemail notification (untrusted sender)
This rule detects a common credential phishing vector enticing the user to engage with links under the premise that they have a voicemail to retrieve. The rule looks for voicemail verbiage in the display name, body, subject or a combination of those elements with emojis or a medium to high credential theft NLU Intent from first-time + unsolicited sender.
Sublime rule (View on GitHub)
1name: "Fake voicemail notification (untrusted sender)"
2description: |
3 This rule detects a common credential phishing vector enticing the user to engage with links under the premise that they have a voicemail to retrieve.
4 The rule looks for voicemail verbiage in the display name, body, subject or a combination of those elements with emojis or a medium to high credential theft NLU Intent from first-time + unsolicited sender.
5type: "rule"
6severity: "medium"
7source: |
8 type.inbound
9 // contains links or attachments
10 and (
11 (0 < length(body.links) <= 25 or 0 < length(distinct(attachments, .md5)) <= 3)
12 and 0 <= length(distinct(attachments, .md5)) <= 8
13 )
14
15 // the subject or display_name need some keywords which are voicemail related
16 and (
17 any([subject.subject, sender.display_name],
18 regex.icontains(.,
19 // split phrases that occur within 3 words between or only punctuation between them
20 '(?:v[nm](\b|[[:punct:]])?|\bv[o0][il1]ce(?:mail|message)?|audi[o0]|incoming|missed(?:\sa\s)?|left( a)?|wireless)(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:mail|message|msg|recording|received|notif|support|ca[li1][li1]\d*\b|ca[il1][il1](?:er)?|log|transcript(?:ion)?)\b',
21 // regex specific to v-mail, v_msg, v,mail, etc
22 // list of "secondary" words synced with regex above this one
23 'v[[:punct:]](?:mail|message|msg|recording|received|notif|support|ca[li1][li1]\d*\b|ca[il1][il1](?:er)?|log|transcript(?:ion)?\b)',
24 // split phrases that start with "caller" that occur within 3 words between or only punctation
25 'ca[li1][li1](?:er)?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[nm](\b|[[:punct:]])?|\bv[o0][il1]ce(?:mail|message)?|audi[o0]|missed(?:\sa\s)?|left( a)?)',
26 // strong phrases
27 '(?:open mp3|audi[o0] note|\.wav|left a vm|[^\s]+voip[^\s]*|unanswered.*ca[li1][li1]|incoming.vm|left msg|wireless ca[li1][li1]er|VM Service|v[o0][il1]ce message|missed.ca[li1][li1](?:e[rd])?|\bca[li1][li1].(?:support|service)(?: for| log)?|missed.{0,10} VM|new v[o0][il1]cemail from|new.v.m.from.\+?\d+|new v[o0][il1]cemail?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}transcript(s|ion)?|message received|new (?:message|call|voicemail).{0,15}(?:info|notification|alert)|incoming transmission|voice note)',
28 // starts in the format of `(4)` and contains some voicemail keywords
29 '^\(\d\)\s(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:message|voip|v[o0][il1]ce|unread|call)',
30 'ca[li1][li1](?:er)?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:playback|transcript)',
31
32 // obfuscated phone number with at least one digit in the area code and at least one obfuscated number in the last group
33 // 555-555-555X, 555-555-XXXX, 555-5XX-XXXX
34 '\b1?\(?(\d{3}|\d{2}[\*X]|\d[\*X]{2})\)?[^a-z0-9]{0,2}(\d{2,3}|\d{2}[\*X]|\d[\*X]{2}|[\*X]{2,3})[^a-z0-9]{0,4}(\d{3}[\*X]|\d{2}[\*X]{2}|\d[\*X]{3}|[\*X]{3,4})[^0-9]',
35 // obfuscated phone number with at least one digit in the prefix
36 // XXX-555-5555, XXX-5XX-XXXX
37 '\b1?\(?(\d{2}[\*X]|\d[\*X]{2}|[\*X]{2,3})\)?[^a-z0-9]{0,2}(\d{2,3}|\d{2}[\*X]|\d[\*X]{2})[^a-z0-9]{0,4}(\d{4}|\d{3}[\*X]|\d{2}[\*X]{2}|\d[\*X]{3}|[\*X]{3,4})\b',
38 // obfuscated voicemail/voicemessage keywords
39 'v[o0][il1]ce[\s\-_]?m(?:ail|sg|essage)?[\*X\.\-_]{2,}',
40 'v[o0][il1]cem[\*X\.\-_]{2,}',
41 // "X new voice..." patterns
42 '\d+\s+new.*v[o0][il1]ce(?:mail|message|m[\*]+)?',
43 // sent-message patterns
44 '(?:sent|new|incoming)[\s\-]+message.*v[o0][il1]ce',
45 )
46 )
47 // body.current_thread.text inspection should be very specific to avoid FP
48 or regex.icontains(strings.replace_confusables(body.current_thread.text),
49 // body.current_thread.text,
50 'sent (?:from|by) (?:your )?v[o0][il1]ce (?:ma[il1][li1] )?system',
51 '(?:(?:new|this) |^)(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0]) (?:message|notification|record)',
52 'v[o0][il1]cema[il1][li1] (is )?attached',
53 'an? (?:new )?encrypted v[o0][il1]cemail',
54 'a (?:new )?pending message',
55 'Your? have (?: an?)?incoming v[o0][il1]ceRec',
56 "you(?:\'ve| have) a (?:new )?m[il1]ssed ca[li1][li1]",
57 'New V[o0][il1]cema[il1][li1] Received',
58 'New m[il1]ssed ca[li1][li1] record',
59 '\bv[o0][il1]cema[il1][li1] transcript\b',
60 'Listen to V[o0][il1]ceMa[il1][li1]',
61 'New v[o0][il1]cema[il1][li1] from',
62 'v[o0][il1]ce note'
63 )
64 // pull out two regexes that could benefit from negations
65 or (
66 regex.icontains(body.current_thread.text,
67 // body.current_thread.text,
68 '(?:you|we) (?:have |received )+(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:\b|\s+)v[o0][il1]ce\s?(?:ma[il1][li1]|aud[il1][o0]|message|notification)',
69 'left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0])(?: message|notification)?',
70 )
71 and not regex.icontains(body.current_thread.text,
72 '(?:I(?:\sjust)?|just(?: called you at (?:\d+[[:punct:]])+) and)? left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:mail)?|audio)(?: message)?'
73 )
74 and not regex.icontains(body.current_thread.text,
75 'you (?:have |received )my voice\s?(?:mail|audio|message)'
76 )
77 )
78 // Reuse the body.current_thread.text logic against the OCR output of the message screenshot
79 or (
80 length(attachments) > 0
81 and (
82 all(attachments, .file_type in $file_types_images and beta.parse_exif(.).image_height != 1)
83 or (
84 // there is a mix of fake audio attachments and images
85 length(filter(filter(attachments,
86 strings.starts_with(.content_type, "audio")
87 ),
88 // confirm the content type with file.explode
89 // we have seen attachments claim to be audio/* files, only to be exploded as something else
90 any(file.explode(.),
91 not strings.starts_with(.flavors.mime, "audio")
92 )
93 )
94 // the total # of fake audio attachments + the total # of image attachments = the total # of attachments
95 // meaning, all attachments that are NOT fake audio attachments MUST be images
96 ) + length(filter(attachments, .file_type in $file_types_images and beta.parse_exif(.).image_height != 1)) == length(attachments
97 )
98 )
99 )
100 and any((filter(file.explode(file.message_screenshot()), .depth == 0)),
101 regex.icontains(.scan.ocr.raw,
102 // body.current_thread.text,
103 'sent (?:from|by) (?:your )?v[o0][il1]ce (?:ma[il1][li1] )?system',
104 '(?:(?:new|this) |^)(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0]) (?:message|notification|record)',
105 'v[o0][il1]cema[il1][li1] (is )?attached',
106 'an? (?:new )?encrypted v[o0][il1]cemail',
107 'a (?:new )?pending message',
108 'Your? have (?: an?)?incoming v[o0][il1]ceRec',
109 "you(?:\'ve| have) a (?:new )?m[il1]ssed ca[li1][li1]",
110 'New V[o0][il1]cema[il1][li1] Received',
111 'New m[il1]ssed ca[li1][li1] record',
112 '\bv[o0][il1]cema[il1][li1] transcript\b',
113 'Listen to V[o0][il1]ceMa[il1][li1]',
114 'New v[o0][il1]cema[il1][li1] from',
115 'v[o0][il1]ce note'
116 )
117 or (
118 regex.icontains(.scan.ocr.raw,
119 // body.current_thread.text,
120 '(?:you|we) (?:have |received )+(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:\b|\s+)v[o0][il1]ce\s?(?:ma[il1][li1]|aud[il1][o0]|message|notification)',
121 'left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:mail)?|audi[o0])(?: message)?',
122 )
123 and not regex.icontains(body.current_thread.text,
124 '(?:I(?:\sjust)?|just) left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:voice(?:mail)?|audio)(?: message)?'
125 )
126 and not regex.icontains(body.current_thread.text,
127 'you (?:have |received )my voice\s?(?:mail|audio|message)'
128 )
129 )
130 )
131 )
132 or strings.icontains(body.html.raw, '<title>Voicemail Notification</title>')
133 or strings.icontains(body.html.raw, '<!-- Voicemail phone logo')
134 )
135 and 2 of (
136 (
137 // the sender is a freemail
138 sender.email.domain.root_domain in $free_email_providers
139 ),
140 (
141 any(ml.nlu_classifier(body.current_thread.text).intents,
142 .name in ("cred_theft") and .confidence in ("medium", "high")
143 )
144 or
145 // use the OCR from the message screenshot
146 any(filter(file.explode(file.message_screenshot()), .depth == 0),
147 any(ml.nlu_classifier(.scan.ocr.raw).intents,
148 .name in ("cred_theft") and .confidence in ("medium", "high")
149 )
150 )
151 ),
152 (
153 any(attachments,
154 .content_type in ("html", "text", "text/html")
155 and any(ml.logo_detect(file.html_screenshot(.)).brands,
156 .name in ("Microsoft") and .confidence in ("medium", "high")
157 )
158 )
159 ),
160 (
161 regex.icontains(sender.display_name,
162 '(v[o0][il1]ce|audi[o0]|call|missed|caii)(\s?|-)(mail|message|recording|call|caii)|(transcription|Caller.?ID)'
163 )
164 ),
165 // attachment names are often HTML and voice mail related
166 (
167 any(attachments,
168 // this logic is reused below for eml attachments
169 // ensure updates occur both places
170 (
171 .content_type in ("html", "text", "text/html")
172 or .file_type in ("html", "unknown", "svg")
173 or .file_type == "pdf"
174 )
175 and (
176 regex.icontains(.file_name,
177 '(?:v[o0][il1]ce|aud[i1l][o0]|call|missed|caii|mail|message|recording|call|caii|transcr[il1]ption|v[nm]|audi[o0]|play|listen|unheard|msg)',
178 // contains a time
179 // 01min , 60secs
180 '0?[1-9]\s*min(?:(?:ute)?s)?',
181 '\d{1,2}\s*s(?:ec(?:ond)?s)?',
182 // (00:50s)
183 // 3:26 seconds
184 '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
185 // 03min25secs
186 '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
187 // [0:39]
188 // (0:39)
189 '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s',
190 // contains an emoji
191 '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
192 )
193 // somtimes there is no name, it's just the extension which is also strange
194 or .file_name in~ (".htm", ".html")
195 // or sometimes it has no name....
196 or .file_name is null
197 )
198 )
199 ),
200 // attachment contains javascript
201 (
202 any(attachments,
203 (
204 .content_type in ("html", "text", "text/html")
205 or .file_type in ("html", "unknown", "svg")
206 )
207 and (
208 (
209 .size < 1500
210 and any(file.explode(.), length(.scan.html.scripts) > 0)
211 )
212 // bypass the size requirement under these conditions
213 or (
214 // sync with https://github.com/sublime-security/sublime-rules/blob/main/detection-rules/attachment_svg_embedded_js.yml
215 strings.ilike(file.parse_text(.,
216 encodings=[
217 "ascii",
218 "utf8",
219 "utf16-le"
220 ]
221 ).text,
222 "*onload*",
223 "*window.location.href*",
224 "*onerror*",
225 "*CDATA*",
226 "*<script*",
227 "*</script*",
228 "*atob*",
229 "*location.assign*",
230 "*decodeURIComponent*"
231 )
232 )
233 )
234 )
235 ),
236 (
237 any(attachments,
238 (
239 .content_type in ("html", "text", "text/html")
240 or .file_type in ("html", "unknown", "svg")
241 )
242 and any(recipients.to,
243 // the html attachment contains a receipient email address
244 strings.contains(file.parse_html(..).raw, .email.email)
245 // the sld of the domain is in the attachment name
246 or strings.contains(..file_name, .email.domain.sld)
247 )
248 )
249 ),
250 // eml attachments
251 (
252 any(filter(attachments, .content_type == "message/rfc822" or .file_extension in ('eml')),
253 // which contain attachments
254 // this is the same logic as above
255 any(file.parse_eml(.).attachments,
256 (
257 .content_type in ("html", "text", "text/html")
258 or .file_type in ("html", "unknown", "svg")
259 or .file_type == "pdf"
260 )
261 and (
262 regex.icontains(.file_name,
263 '(?:v[o0][il1]ce|aud[il1][o0]|call|missed|caii|mail|message|recording|call|caii|transcr[il1]ption|v[nm]|audi[o0]|play|listen|unheard|msg)',
264 // contains a time
265 // 01min , 60secs
266 '0?[1-9]\s*min(?:(?:ute)?s)?',
267 '\d{1,2}\s*s(?:ec(?:ond)?s)?',
268 // (00:50s)
269 // 3:26 seconds
270 '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
271 // 03min25secs
272 '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
273 // [0:39]
274 // (0:39)
275 '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s',
276 // contains an emoji
277 '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
278 )
279 // somtimes there is no name, it's just the extension which is also strange
280 or .file_name in~ (".htm", ".html")
281 // or sometimes it has no name....
282 or .file_name is null
283 )
284 )
285 )
286 ),
287 // attached eml sender/recipeient/subject are all the same as the outer
288 // and have an attachment or body links
289 (
290 any(filter(attachments, .content_type == "message/rfc822" or .file_extension in ('eml')),
291 // which contain attachments
292 // this is the same logic as above
293 file.parse_eml(.).subject.subject == subject.subject
294 and file.parse_eml(.).sender.email.email == sender.email.email
295 and (
296 length(file.parse_eml(.).recipients.to) == length(recipients.to)
297 and all(recipients.to,
298 .email.email in map(file.parse_eml(..).recipients.to,
299 .email.email
300 )
301 )
302 )
303 and (
304 // there are attachments
305 length(file.parse_eml(.).attachments) > 0
306 // or body links
307 or length(filter(file.parse_eml(.).body.links,
308 .href_url.domain.domain not in $org_domains
309 and .href_url.domain.root_domain not in $org_domains
310 )
311 ) > 0
312 )
313 )
314 ),
315 // the body links contain the recipients email
316 (
317 length(filter(recipients.to, .email.email != "" or .email.domain.valid)) > 0
318 and any(body.links,
319 any(recipients.to,
320 strings.icontains(..href_url.url, .email.email)
321 or strings.icontains(..href_url.url, .email.local_part)
322 )
323 )
324 ),
325 (
326 length(body.current_thread.text) < 700
327 and regex.icontains(body.current_thread.text,
328 'Méssãge|Méssage|Recéived|Addréss'
329 )
330 ),
331 (
332 // sender domain matches no body domains
333 // only inspect "links" that have a display_text and display_url is null to remove "plain text" email address from being caught
334 length(filter(body.links,
335 .display_text is not null
336 and .display_url.url is null
337 and .href_url.domain.valid
338 )
339 ) > 0
340 and all(filter(body.links,
341 .display_text is not null
342 and .display_url.url is null
343 and .href_url.domain.valid
344 ),
345 .href_url.domain.root_domain != sender.email.domain.root_domain
346 and .href_url.domain.root_domain not in $org_domains
347 and .href_url.domain.root_domain not in ("aka.ms")
348 and .href_url.domain.root_domain not in (
349 "unitelvoice.com",
350 "googleapis.com",
351 "dialmycalls.com",
352 "ringcentral.biz",
353 "google.com"
354 )
355 )
356 ),
357 // the body links contain vm related phrases
358 (
359 any(body.links,
360 regex.contains(.display_text, '[^a-z]*[A-Z][^a-z]*')
361 and regex.icontains(.display_text,
362 '(?:v[nm]|v[o0][il1]ce|audi[o0]|ca[li][li]|missed|preview)(\s?|-)(mail|message|recording|call|caii)|transcription|open mp3|audi[o0] note|listen|playback|\(?(?:\*\*\*|[0-9]{3})?.(?:\*\*\*|[0-9]{3})[^a-z]{0,2}(?:\*{4}|\d+\*+)|play|voice note'
363 )
364 // negate FP terms in link display texts
365 and not strings.icontains(.display_text, 'voice call center')
366 )
367 ),
368 (
369 any(body.links,
370 .href_url.path == "/ctt"
371 and regex.icontains(.display_text,
372 '(v[nm]|v[o0][il1]ce|audi[o0]|call|missed|caii)(\s?|-)(mail|message|recording|call|caii)|transcription|open mp3|audi[o0] note|listen|playback|\(?(?:\*\*\*|[0-9]{3})?.(?:\*\*\*|[0-9]{3})[^a-z]{0,2}(?:\*{4}|\d+\*+)|play|voice note'
373 )
374 // negate FP terms in link display texts
375 and not strings.icontains(.display_text, 'voice call center')
376 )
377 ),
378 // new domains
379 (
380 any(body.links,
381 network.whois(.href_url.domain).days_old < 10
382 and not strings.icontains(.href_url.path, "unsubscribe")
383 )
384 ),
385 // sld use in sender/subject selements
386 (
387 any(recipients.to,
388 // recipient's SLD is in the sender's display name
389 strings.icontains(sender.display_name, .email.domain.sld)
390 // recipient's SLD is in the sender's display name
391 or strings.icontains(subject.subject, .email.domain.sld)
392 // recipient's SLD is in the senders local_part
393 or strings.icontains(sender.email.local_part, .email.domain.sld)
394 )
395 ),
396 // often times the subject or sender display name will contain time references
397 (
398 any([sender.display_name, subject.subject, body.current_thread.text],
399 regex.icontains(.,
400 // 01min , 60secs
401 '0?[1-9]\s*min(?:(?:ute)?s)?\b',
402 '\d{1,2}\s*s(?:ec(?:ond)?s)?\b',
403 // (00:50s)
404 // 3:26 seconds
405 '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
406 // 03min25secs
407 '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
408 // [0:39]
409 // (0:39)
410 '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s'
411 )
412 )
413 // resuse the same logic against ORC output of message_screenshot
414 or any(filter(file.explode(file.message_screenshot()), .depth == 0),
415 regex.icontains(.scan.ocr.raw,
416 // 01min , 60secs
417 '0?[1-9]\s*min(?:(?:ute)?s)?\b',
418 '\d{1,2}\s*s(?:ec(?:ond)?s)?\b',
419 // (00:50s)
420 // 3:26 seconds
421 '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
422 // 03min25secs
423 '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
424 // [0:39]
425 // (0:39)
426 '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s'
427 )
428 )
429 ),
430 // often times the subject or sender display name will contain dates
431 (
432 any([sender.display_name, subject.subject],
433 // days of week
434 any([
435 'monday',
436 'tuesday',
437 'wednesday',
438 'thursday',
439 'friday',
440 'saturday',
441 'sunday'
442 ],
443 strings.icontains(.., .)
444 )
445 // months
446 // may is problematic for words like "Mayor", "Maybe", "MayFlower", etc
447 or any([
448 "January",
449 "February",
450 "March",
451 "April",
452 "June",
453 "July",
454 "August",
455 "September",
456 "October",
457 "November",
458 "December"
459 ],
460 strings.icontains(.., .)
461 )
462 // use a regex for May
463 or regex.icontains(., '\bmay\b')
464 // common date formats
465 or regex.contains(.,
466 // YYYY-MM-DD or YY-MM-DD (ISO 8601 format)
467 '\d{2}(\d{2})?-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])',
468 // MM/DD/YYYY or MM/DD/YY (US format)
469 '(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/\d{2}(\d{2})?',
470 // DD/MM/YYYY or DD/MM/YY (European format)
471 '(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/\d{2}(\d{2})?',
472 // Month DD, YYYY or Month DD, YY (e.g., March 15, 2024 or March 15, 24)
473 '(January|February|March|April|May|June|July|August|September|October|November|December) (0[1-9]|[12]\d|3[01]), \d{2}(\d{2})?'
474 )
475 // common time formats
476 or regex.contains(.,
477 // Example: 23:45, 08:30
478 '([01]\d|2[0-3]):([0-5]\d)',
479 // Example: 23:45:59, 08:30:12
480 '([01]\d|2[0-3]):([0-5]\d):([0-5]\d)',
481 // Example: 08:30 AM, 12:45 pm
482 '(0[1-9]|1[0-2]):([0-5]\d)\s?([AaPp][Mm])',
483 // Example: 08:30 AM, 12:45 pm
484 '(0[1-9]|1[0-2]):([0-5]\d):([0-5]\d) ?([AaPp][Mm])'
485 )
486 )
487 // or use the OCR results from file.message_screenshot
488 or any(filter(file.explode(file.message_screenshot()), .depth == 0),
489 // days of week
490 any([
491 'monday',
492 'tuesday',
493 'wednesday',
494 'thursday',
495 'friday',
496 'saturday',
497 'sunday'
498 ],
499 strings.icontains(..scan.ocr.raw, .)
500 )
501 // months
502 // may is problematic for words like "Mayor", "Maybe", "MayFlower", etc
503 or any([
504 "January",
505 "February",
506 "March",
507 "April",
508 "June",
509 "July",
510 "August",
511 "September",
512 "October",
513 "November",
514 "December"
515 ],
516 strings.icontains(..scan.ocr.raw, .)
517 )
518 // use a regex for May
519 or regex.contains(.scan.ocr.raw, '\bMay\b')
520 // common date formats
521 or regex.contains(.scan.ocr.raw,
522 // YYYY-MM-DD or YY-MM-DD (ISO 8601 format)
523 '\d{2}(\d{2})?-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])',
524 // MM/DD/YYYY or MM/DD/YY (US format)
525 '(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/\d{2}(\d{2})?',
526 // DD/MM/YYYY or DD/MM/YY (European format)
527 '(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/\d{2}(\d{2})?',
528 // Month DD, YYYY or Month DD, YY (e.g., March 15, 2024 or March 15, 24)
529 '(January|February|March|April|May|June|July|August|September|October|November|December) (0[1-9]|[12]\d|3[01]), \d{2}(\d{2})?'
530 )
531 // common time formats
532 or regex.contains(.scan.ocr.raw,
533 // Example: 23:45, 08:30
534 '([01]\d|2[0-3]):([0-5]\d)',
535 // Example: 23:45:59, 08:30:12
536 '([01]\d|2[0-3]):([0-5]\d):([0-5]\d)',
537 // Example: 08:30 AM, 12:45 pm
538 '(0[1-9]|1[0-2]):([0-5]\d)\s?([AaPp][Mm])',
539 // Example: 08:30 AM, 12:45 pm
540 '(0[1-9]|1[0-2]):([0-5]\d):([0-5]\d) ?([AaPp][Mm])'
541 )
542 )
543 ),
544 // there are often emoji in the sender display name
545 (
546 any([sender.display_name, subject.subject],
547 // contains an emoji
548 regex.contains(.,
549 '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
550 )
551 // negate where the emoji occur in tags
552 and not regex.contains(.,
553 '^(?:\[[^\]]*\]\s*)*\[[^\]]*[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}][^\]]*\]'
554 )
555 )
556 ),
557 // an attachment is a pdf, image, or document that contains a url
558 (
559 1 <= length(attachments) <= 2
560 and any(attachments,
561 (
562 .file_type in $file_types_images
563 or .file_type == "pdf"
564 or .file_extension in $file_extensions_macros
565 )
566 and any(file.explode(.),
567 .scan.qr.type == "url"
568 or strings.icontains(.scan.qr.data, 'http')
569 or any(recipients.to,
570 strings.icontains(..scan.qr.data, .email.local_part)
571 or strings.icontains(..scan.qr.data, .email.email)
572 )
573 )
574 )
575 )
576 )
577
578 // negating legit replies and legitimate audio file attachments and known voicemail senders
579 and not (
580 sender.email.domain.valid
581 and sender.email.domain.root_domain in (
582 "magicjack.com",
583 "unitelvoice.com",
584 "voipinterface.net",
585 "ringcentral.biz",
586 "verizonwireless.com",
587 "t-mobile.com",
588 "justcall.io",
589 "airtel.com"
590 )
591 )
592 and not (
593 any(filter(attachments, strings.starts_with(.content_type, "audio")),
594 // confirm the content type with file.explode
595 // we have seen attachments claim to be audio/* files, only to be exploded as something else
596 any(file.explode(.), strings.starts_with(.flavors.mime, "audio"))
597 )
598 )
599 and not (
600 (
601 strings.istarts_with(subject.subject, "RE:")
602 // out of office auto-reply
603 // the NLU model will handle these better natively soon
604 or strings.istarts_with(subject.subject, "Automatic reply:")
605 )
606 and (
607 length(headers.references) > 0
608 or any(headers.hops, any(.fields, strings.ilike(.name, "In-Reply-To")))
609 )
610 )
611 // negate highly trusted sender domains unless they fail DMARC authentication
612 and (
613 (
614 sender.email.domain.root_domain in $high_trust_sender_root_domains
615 and not headers.auth_summary.dmarc.pass
616 )
617 or sender.email.domain.root_domain not in $high_trust_sender_root_domains
618 )
619 // bounce-back negations
620 and not any(attachments,
621 any(file.parse_eml(.).attachments,
622 .content_type == "message/delivery-status"
623 )
624 )
625 // bounce-back negations
626 and not (
627 any(attachments,
628 .content_type in ("message/delivery-status", "text/calendar")
629 )
630 )
631 // negate newsletters
632 and not any(ml.nlu_classifier(body.current_thread.text).topics,
633 .name in ("Newsletters and Digests", "B2B Cold Outreach", "Events and Webinars") and .confidence == "high"
634 )
635 // negate bouncebacks from proofpoint
636 and not (
637 sender.display_name == "Mail Delivery Subsystem"
638 and strings.ends_with(headers.message_id, "pphosted.com>")
639 and any(headers.hops,
640 .index == 0 and strings.contains(.received.server.raw, "pphosted.com")
641 )
642 and any(attachments, .content_type == "message/rfc822")
643 )
644 // negate CheckPoint encrypted messages
645 and not (
646 // CheckPoint banner
647 length(attachments) == 1
648 and any(body.links, .href_url.domain.root_domain == "checkpointcloudsec.com")
649 and strings.istarts_with(headers.message_id, "<encrypted")
650 and any(headers.domains, .root_domain == "checkpointcloudsec.com")
651 )
652 // an impersonated high trust domain
653 and (
654 (
655 sender.email.domain.root_domain in $high_trust_sender_root_domains
656 and not headers.auth_summary.dmarc.pass
657 )
658
659 // sender profile
660 or (
661 (
662 not sender.email.domain.root_domain in $org_domains
663 and (profile.by_sender_email().prevalence not in ("common"))
664 and not profile.by_sender_email().solicited
665 )
666 or (
667 profile.by_sender_email().any_messages_malicious_or_spam
668 and not profile.by_sender_email().any_messages_benign
669 )
670 // match if the sender is in org domains but failed auth
671 or (
672 sender.email.domain.domain in $org_domains
673 and not coalesce(headers.auth_summary.dmarc.pass, false)
674 )
675 // match if the sender address is blank or null
676 or (
677 regex.match(sender.email.email, "")
678 or sender.email.email is null
679 )
680 )
681 )
682attack_types:
683 - "Credential Phishing"
684tactics_and_techniques:
685 - "Social engineering"
686detection_methods:
687 - "Content analysis"
688 - "Natural Language Understanding"
689 - "Sender analysis"
690 - "URL analysis"
691id: "74ba7787-e543-5ce8-b6eb-e1ecdb8f1d67"