Fake voicemail notification (untrusted sender)
This rule detects a common credential phishing vector enticing the user to engage with links under the premise that they have a voicemail to retrieve. The rule looks for voicemail verbiage in the display name, body, subject or a combination of those elements with emojis or a medium to high credential theft NLU Intent from first-time + unsolicited sender.
Sublime rule (View on GitHub)
1name: "Fake voicemail notification (untrusted sender)"
2description: |
3 This rule detects a common credential phishing vector enticing the user to engage with links under the premise that they have a voicemail to retrieve.
4 The rule looks for voicemail verbiage in the display name, body, subject or a combination of those elements with emojis or a medium to high credential theft NLU Intent from first-time + unsolicited sender.
5type: "rule"
6severity: "medium"
7source: |
8 type.inbound
9 // contains links or attachments
10 and (
11 (0 < length(body.links) <= 25 or 0 < length(distinct(attachments, .md5)) <= 3)
12 and 0 <= length(distinct(attachments, .md5)) <= 8
13 )
14
15 // the subject or display_name need some keywords which are voicemail related
16 and (
17 any([subject.subject, sender.display_name],
18 regex.icontains(.,
19 // split phrases that occur within 3 words between or only punctuation between them
20 '(?:v[nm](\b|[[:punct:]])?|\bv[o0][il1]ce(?:mail|message)?|audi[o0]|incoming|missed(?:\sa\s)?|left( a)?|wireless)(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:mail|message|msg|recording|received|notif|support|ca[li1][li1]\d*\b|ca[il1][il1](?:er)?|log|transcript(?:ion)?)\b',
21 // regex specific to v-mail, v_msg, v,mail, etc
22 // list of "secondary" words synced with regex above this one
23 'v[[:punct:]](?:mail|message|msg|recording|received|notif|support|ca[li1][li1]\d*\b|ca[il1][il1](?:er)?|log|transcript(?:ion)?\b)',
24 // split phrases that start with "caller" that occur within 3 words between or only punctation
25 'ca[li1][li1](?:er)?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[nm](\b|[[:punct:]])?|\bv[o0][il1]ce(?:mail|message)?|audi[o0]|missed(?:\sa\s)?|left( a)?)',
26 // strong phrases
27 '(?:open mp3|audi[o0] note|\.wav|left a vm|[^\s]+voip[^\s]*|unanswered.*ca[li1][li1]|incoming.vm|left msg|wireless ca[li1][li1]er|VM Service|v[o0][il1]ce message|missed.ca[li1][li1](?:e[rd])?|\bca[li1][li1].(?:support|service)(?: for| log)?|missed.{0,10} VM|new v[o0][il1]cemail from|new.v.m.from.\+?\d+|new v[o0][il1]cemail?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}transcript(s|ion)?|message received|new (?:message|call|voicemail).{0,15}(?:info|notification|alert)|incoming transmission|voice note)',
28 // starts in the format of `(4)` and contains some voicemail keywords
29 '^\(\d\)\s(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:message|voip|v[o0][il1]ce|unread|call)',
30 'ca[li1][li1](?:er)?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:playback|transcript)',
31
32 // obfuscated phone number with at least one digit in the area code and at least one obfuscated number in the last group
33 // 555-555-555X, 555-555-XXXX, 555-5XX-XXXX
34 '\b1?\(?(\d{3}|\d{2}[\*X]|\d[\*X]{2})\)?[^a-z0-9]{0,2}(\d{2,3}|\d{2}[\*X]|\d[\*X]{2}|[\*X]{2,3})[^a-z0-9]{0,4}(\d{3}[\*X]|\d{2}[\*X]{2}|\d[\*X]{3}|[\*X]{3,4})[^0-9]',
35 // obfuscated phone number with at least one digit in the prefix
36 // XXX-555-5555, XXX-5XX-XXXX
37 '\b1?\(?(\d{2}[\*X]|\d[\*X]{2}|[\*X]{2,3})\)?[^a-z0-9]{0,2}(\d{2,3}|\d{2}[\*X]|\d[\*X]{2})[^a-z0-9]{0,4}(\d{4}|\d{3}[\*X]|\d{2}[\*X]{2}|\d[\*X]{3}|[\*X]{3,4})\b',
38 // obfuscated voicemail/voicemessage keywords
39 'v[o0][il1]ce[\s\-_]?m(?:ail|sg|essage)?[\*X\.\-_]{2,}',
40 'v[o0][il1]cem[\*X\.\-_]{2,}',
41 // "X new voice..." patterns
42 '\d+\s+new.*v[o0][il1]ce(?:mail|message|m[\*]+)?',
43 // sent-message patterns
44 '(?:sent|new|incoming)[\s\-]+message.*(v[o0][il1]ce|<.*@.*>)',
45 )
46 )
47 // body.current_thread.text inspection should be very specific to avoid FP
48 or regex.icontains(strings.replace_confusables(body.current_thread.text),
49 // body.current_thread.text,
50 'sent (?:from|by) (?:your )?v[o0][il1]ce (?:ma[il1][li1] )?system',
51 '(?:(?:new|this) |^)(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0]) (?:message|notification|record)',
52 'v[o0][il1]cema[il1][li1] (is )?attached',
53 'an? (?:new )?encrypted v[o0][il1]cemail',
54 'a (?:new )?pending message',
55 'Your? have (?: an?)?incoming v[o0][il1]ceRec',
56 "you(?:\'ve| have) a (?:new )?m[il1]ssed ca[li1][li1]",
57 'New V[o0][il1]cema[il1][li1] Received',
58 'New m[il1]ssed ca[li1][li1] record',
59 '\bv[o0][il1]cema[il1][li1] transcript\b',
60 'Listen to V[o0][il1]ceMa[il1][li1]',
61 'New v[o0][il1]cema[il1][li1] from',
62 'v[o0][il1]ce note'
63 )
64 // pull out two regexes that could benefit from negations
65 or (
66 regex.icontains(body.current_thread.text,
67 // body.current_thread.text,
68 '(?:you|we) (?:have |received )+(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:\b|\s+)v[o0][il1]ce\s?(?:ma[il1][li1]|aud[il1][o0]|message|notification)',
69 'left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0])(?: message|notification)?',
70 )
71 and not regex.icontains(body.current_thread.text,
72 '(?:I(?:\sjust)?|just(?: called you at (?:\d+[[:punct:]])+) and)? left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:mail)?|audio)(?: message)?'
73 )
74 and not regex.icontains(body.current_thread.text,
75 'you (?:have |received )my voice\s?(?:mail|audio|message)'
76 )
77 )
78 // Reuse the body.current_thread.text logic against the OCR output of the message screenshot
79 or (
80 length(attachments) > 0
81 and (
82 all(attachments,
83 .file_type in $file_types_images
84 and beta.parse_exif(.).image_height != 1
85 )
86 // there is a mix of fake audio attachments and images
87 or (
88 length(filter(attachments,
89 strings.starts_with(.content_type, "audio")
90 // confirm the content type with .file_type
91 // we have seen attachments claim to be audio/* files, only to be exploded as something else
92 and not .file_type in ("wav", "mp3")
93 )
94 )
95 // the total # of fake audio attachments + the total # of image attachments = the total # of attachments
96 // meaning, all attachments that are NOT fake audio attachments MUST be images
97 + length(filter(attachments,
98 .file_type in $file_types_images
99 and beta.parse_exif(.).image_height != 1
100 )
101 ) == length(attachments)
102 )
103 )
104 and any((filter(file.explode(file.message_screenshot()), .depth == 0)),
105 regex.icontains(.scan.ocr.raw,
106 // body.current_thread.text,
107 'sent (?:from|by) (?:your )?v[o0][il1]ce (?:ma[il1][li1] )?system',
108 '(?:(?:new|this) |^)(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0]) (?:message|notification|record)',
109 'v[o0][il1]cema[il1][li1] (is )?attached',
110 'an? (?:new )?encrypted v[o0][il1]cemail',
111 'a (?:new )?pending message',
112 'Your? have (?: an?)?incoming v[o0][il1]ceRec',
113 "you(?:\'ve| have) a (?:new )?m[il1]ssed ca[li1][li1]",
114 'New V[o0][il1]cema[il1][li1] Received',
115 'New m[il1]ssed ca[li1][li1] record',
116 '\bv[o0][il1]cema[il1][li1] transcript\b',
117 'Listen to V[o0][il1]ceMa[il1][li1]',
118 'New v[o0][il1]cema[il1][li1] from',
119 'v[o0][il1]ce note'
120 )
121 or (
122 regex.icontains(.scan.ocr.raw,
123 // body.current_thread.text,
124 '(?:you|we) (?:have |received )+(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:\b|\s+)v[o0][il1]ce\s?(?:ma[il1][li1]|aud[il1][o0]|message|notification)',
125 'left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:mail)?|audi[o0])(?: message)?',
126 )
127 and not regex.icontains(body.current_thread.text,
128 '(?:I(?:\sjust)?|just) left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:voice(?:mail)?|audio)(?: message)?'
129 )
130 and not regex.icontains(body.current_thread.text,
131 'you (?:have |received )my voice\s?(?:mail|audio|message)'
132 )
133 )
134 )
135 )
136 or strings.icontains(body.html.raw, '<title>Voicemail Notification</title>')
137 or strings.icontains(body.html.raw, '<!-- Voicemail phone logo')
138 )
139 and 2 of (
140 (
141 // the sender is a freemail
142 sender.email.domain.root_domain in $free_email_providers
143 ),
144 (
145 any(ml.nlu_classifier(body.current_thread.text).intents,
146 .name in ("cred_theft") and .confidence in ("medium", "high")
147 )
148 or
149 // use the OCR from the message screenshot
150 any(filter(file.explode(file.message_screenshot()), .depth == 0),
151 any(ml.nlu_classifier(.scan.ocr.raw).intents,
152 .name in ("cred_theft") and .confidence in ("medium", "high")
153 )
154 )
155 ),
156 (
157 any(attachments,
158 .content_type in ("html", "text", "text/html")
159 and any(ml.logo_detect(file.html_screenshot(.)).brands,
160 .name in ("Microsoft") and .confidence in ("medium", "high")
161 )
162 )
163 ),
164 (
165 regex.icontains(sender.display_name,
166 '(v[o0][il1]ce|audi[o0]|call|missed|caii)(\s?|-)(mail|message|recording|call|caii)|(transcription|Caller.?ID)'
167 )
168 ),
169 // attachment names are often HTML and voice mail related
170 (
171 any(attachments,
172 // this logic is reused below for eml attachments
173 // ensure updates occur both places
174 (
175 .content_type in ("html", "text", "text/html")
176 or .file_type in ("html", "unknown", "svg")
177 or .file_type == "pdf"
178 )
179 and (
180 regex.icontains(.file_name,
181 '(?:v[o0][il1]ce|aud[i1l][o0]|call|missed|caii|mail|message|recording|call|caii|transcr[il1]ption|v[nm]|audi[o0]|play|listen|unheard|msg)',
182 // contains a time
183 // 01min , 60secs
184 '0?[1-9]\s*min(?:(?:ute)?s)?',
185 '\d{1,2}\s*s(?:ec(?:ond)?s)?',
186 // (00:50s)
187 // 3:26 seconds
188 '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
189 // 03min25secs
190 '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
191 // [0:39]
192 // (0:39)
193 '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s',
194 // contains an emoji
195 '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
196 )
197 // somtimes there is no name, it's just the extension which is also strange
198 or .file_name in~ (".htm", ".html")
199 // or sometimes it has no name....
200 or .file_name is null
201 )
202 )
203 ),
204 // attachment contains javascript
205 (
206 any(attachments,
207 (
208 .content_type in ("html", "text", "text/html")
209 or .file_type in ("html", "unknown", "svg")
210 )
211 and (
212 (
213 .size < 1500
214 and any(file.explode(.), length(.scan.html.scripts) > 0)
215 )
216 // bypass the size requirement under these conditions
217 or (
218 // sync with https://github.com/sublime-security/sublime-rules/blob/main/detection-rules/attachment_svg_embedded_js.yml
219 strings.ilike(file.parse_text(.,
220 encodings=[
221 "ascii",
222 "utf8",
223 "utf16-le"
224 ]
225 ).text,
226 "*onload*",
227 "*window.location.href*",
228 "*onerror*",
229 "*CDATA*",
230 "*<script*",
231 "*</script*",
232 "*atob*",
233 "*location.assign*",
234 "*decodeURIComponent*"
235 )
236 )
237 )
238 )
239 ),
240 (
241 any(attachments,
242 (
243 .content_type in ("html", "text", "text/html")
244 or .file_type in ("html", "unknown", "svg")
245 )
246 and any(recipients.to,
247 // the html attachment contains a receipient email address
248 strings.contains(file.parse_html(..).raw, .email.email)
249 // the sld of the domain is in the attachment name
250 or strings.contains(..file_name, .email.domain.sld)
251 )
252 )
253 ),
254 // eml attachments
255 (
256 any(filter(attachments,
257 .content_type == "message/rfc822" or .file_extension in ('eml')
258 ),
259 // which contain attachments
260 // this is the same logic as above
261 any(file.parse_eml(.).attachments,
262 (
263 .content_type in ("html", "text", "text/html")
264 or .file_type in ("html", "unknown", "svg")
265 or .file_type == "pdf"
266 )
267 and (
268 regex.icontains(.file_name,
269 '(?:v[o0][il1]ce|aud[il1][o0]|call|missed|caii|mail|message|recording|call|caii|transcr[il1]ption|v[nm]|audi[o0]|play|listen|unheard|msg)',
270 // contains a time
271 // 01min , 60secs
272 '0?[1-9]\s*min(?:(?:ute)?s)?',
273 '\d{1,2}\s*s(?:ec(?:ond)?s)?',
274 // (00:50s)
275 // 3:26 seconds
276 '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
277 // 03min25secs
278 '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
279 // [0:39]
280 // (0:39)
281 '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s',
282 // contains an emoji
283 '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
284 )
285 // somtimes there is no name, it's just the extension which is also strange
286 or .file_name in~ (".htm", ".html")
287 // or sometimes it has no name....
288 or .file_name is null
289 )
290 )
291 )
292 ),
293 // attached eml sender/recipeient/subject are all the same as the outer
294 // and have an attachment or body links
295 (
296 any(filter(attachments,
297 .content_type == "message/rfc822" or .file_extension in ('eml')
298 ),
299 // which contain attachments
300 // this is the same logic as above
301 file.parse_eml(.).subject.subject == subject.subject
302 and file.parse_eml(.).sender.email.email == sender.email.email
303 and (
304 length(file.parse_eml(.).recipients.to) == length(recipients.to)
305 and all(recipients.to,
306 .email.email in map(file.parse_eml(..).recipients.to,
307 .email.email
308 )
309 )
310 )
311 and (
312 // there are attachments
313 length(file.parse_eml(.).attachments) > 0
314 // or body links
315 or length(filter(file.parse_eml(.).body.links,
316 .href_url.domain.domain not in $org_domains
317 and .href_url.domain.root_domain not in $org_domains
318 )
319 ) > 0
320 )
321 )
322 ),
323 // the body links contain the recipients email
324 (
325 length(filter(recipients.to, .email.email != "" or .email.domain.valid)) > 0
326 and any(body.links,
327 any(recipients.to,
328 strings.icontains(..href_url.url, .email.email)
329 or strings.icontains(..href_url.url, .email.local_part)
330 )
331 )
332 ),
333 (
334 length(body.current_thread.text) < 700
335 and regex.icontains(body.current_thread.text,
336 'Méssãge|Méssage|Recéived|Addréss'
337 )
338 ),
339 (
340 // sender domain matches no body domains
341 // only inspect "links" that have a display_text and display_url is null to remove "plain text" email address from being caught
342 length(filter(body.links,
343 .display_text is not null
344 and .display_url.url is null
345 and .href_url.domain.valid
346 )
347 ) > 0
348 and all(filter(body.links,
349 .display_text is not null
350 and .display_url.url is null
351 and .href_url.domain.valid
352 ),
353 .href_url.domain.root_domain != sender.email.domain.root_domain
354 and .href_url.domain.root_domain not in $org_domains
355 and .href_url.domain.root_domain not in ("aka.ms")
356 and .href_url.domain.root_domain not in (
357 "unitelvoice.com",
358 "googleapis.com",
359 "dialmycalls.com",
360 "ringcentral.biz",
361 "google.com"
362 )
363 )
364 ),
365 // the body links contain vm related phrases
366 (
367 any(body.links,
368 regex.contains(.display_text, '[^a-z]*[A-Z][^a-z]*')
369 and regex.icontains(.display_text,
370 '(?:v[nm]|v[o0][il1]ce|audi[o0]|ca[li][li]|missed|preview)(\s?|-)(mail|message|recording|call|caii)|transcription|open mp3|audi[o0] note|listen|playback|\(?(?:\*\*\*|[0-9]{3})?.(?:\*\*\*|[0-9]{3})[^a-z]{0,2}(?:\*{4}|\d+\*+)|play|voice note'
371 )
372 // negate FP terms in link display texts
373 and not strings.icontains(.display_text, 'voice call center')
374 )
375 ),
376 (
377 any(body.links,
378 .href_url.path == "/ctt"
379 and regex.icontains(.display_text,
380 '(v[nm]|v[o0][il1]ce|audi[o0]|call|missed|caii)(\s?|-)(mail|message|recording|call|caii)|transcription|open mp3|audi[o0] note|listen|playback|\(?(?:\*\*\*|[0-9]{3})?.(?:\*\*\*|[0-9]{3})[^a-z]{0,2}(?:\*{4}|\d+\*+)|play|voice note'
381 )
382 // negate FP terms in link display texts
383 and not strings.icontains(.display_text, 'voice call center')
384 )
385 ),
386 // new domains
387 (
388 any(body.links,
389 network.whois(.href_url.domain).days_old < 10
390 and not strings.icontains(.href_url.path, "unsubscribe")
391 )
392 ),
393 // sld use in sender/subject selements
394 (
395 any(recipients.to,
396 // recipient's SLD is in the sender's display name
397 strings.icontains(sender.display_name, .email.domain.sld)
398 // recipient's SLD is in the sender's display name
399 or strings.icontains(subject.subject, .email.domain.sld)
400 // recipient's SLD is in the senders local_part
401 or strings.icontains(sender.email.local_part, .email.domain.sld)
402 )
403 ),
404 // often times the subject or sender display name will contain time references
405 (
406 any([sender.display_name, subject.subject, body.current_thread.text],
407 regex.icontains(.,
408 // 01min , 60secs
409 '0?[1-9]\s*min(?:(?:ute)?s)?\b',
410 '\d{1,2}\s*s(?:ec(?:ond)?s)?\b',
411 // (00:50s)
412 // 3:26 seconds
413 '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
414 // 03min25secs
415 '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
416 // [0:39]
417 // (0:39)
418 '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s'
419 )
420 )
421 // resuse the same logic against ORC output of message_screenshot
422 or any(filter(file.explode(file.message_screenshot()), .depth == 0),
423 regex.icontains(.scan.ocr.raw,
424 // 01min , 60secs
425 '0?[1-9]\s*min(?:(?:ute)?s)?\b',
426 '\d{1,2}\s*s(?:ec(?:ond)?s)?\b',
427 // (00:50s)
428 // 3:26 seconds
429 '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
430 // 03min25secs
431 '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
432 // [0:39]
433 // (0:39)
434 '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s'
435 )
436 )
437 ),
438 // often times the subject or sender display name will contain dates
439 (
440 any([sender.display_name, subject.subject],
441 // days of week
442 any([
443 'monday',
444 'tuesday',
445 'wednesday',
446 'thursday',
447 'friday',
448 'saturday',
449 'sunday'
450 ],
451 strings.icontains(.., .)
452 )
453 // months
454 // may is problematic for words like "Mayor", "Maybe", "MayFlower", etc
455 or any([
456 "January",
457 "February",
458 "March",
459 "April",
460 "June",
461 "July",
462 "August",
463 "September",
464 "October",
465 "November",
466 "December"
467 ],
468 strings.icontains(.., .)
469 )
470 // use a regex for May
471 or regex.icontains(., '\bmay\b')
472 // common date formats
473 or regex.contains(.,
474 // YYYY-MM-DD or YY-MM-DD (ISO 8601 format)
475 '\d{2}(\d{2})?-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])',
476 // MM/DD/YYYY or MM/DD/YY (US format)
477 '(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/\d{2}(\d{2})?',
478 // DD/MM/YYYY or DD/MM/YY (European format)
479 '(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/\d{2}(\d{2})?',
480 // Month DD, YYYY or Month DD, YY (e.g., March 15, 2024 or March 15, 24)
481 '(January|February|March|April|May|June|July|August|September|October|November|December) (0[1-9]|[12]\d|3[01]), \d{2}(\d{2})?'
482 )
483 // common time formats
484 or regex.contains(.,
485 // Example: 23:45, 08:30
486 '([01]\d|2[0-3]):([0-5]\d)',
487 // Example: 23:45:59, 08:30:12
488 '([01]\d|2[0-3]):([0-5]\d):([0-5]\d)',
489 // Example: 08:30 AM, 12:45 pm
490 '(0[1-9]|1[0-2]):([0-5]\d)\s?([AaPp][Mm])',
491 // Example: 08:30 AM, 12:45 pm
492 '(0[1-9]|1[0-2]):([0-5]\d):([0-5]\d) ?([AaPp][Mm])'
493 )
494 )
495 // or use the OCR results from file.message_screenshot
496 or any(filter(file.explode(file.message_screenshot()), .depth == 0),
497 // days of week
498 any([
499 'monday',
500 'tuesday',
501 'wednesday',
502 'thursday',
503 'friday',
504 'saturday',
505 'sunday'
506 ],
507 strings.icontains(..scan.ocr.raw, .)
508 )
509 // months
510 // may is problematic for words like "Mayor", "Maybe", "MayFlower", etc
511 or any([
512 "January",
513 "February",
514 "March",
515 "April",
516 "June",
517 "July",
518 "August",
519 "September",
520 "October",
521 "November",
522 "December"
523 ],
524 strings.icontains(..scan.ocr.raw, .)
525 )
526 // use a regex for May
527 or regex.contains(.scan.ocr.raw, '\bMay\b')
528 // common date formats
529 or regex.contains(.scan.ocr.raw,
530 // YYYY-MM-DD or YY-MM-DD (ISO 8601 format)
531 '\d{2}(\d{2})?-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])',
532 // MM/DD/YYYY or MM/DD/YY (US format)
533 '(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/\d{2}(\d{2})?',
534 // DD/MM/YYYY or DD/MM/YY (European format)
535 '(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/\d{2}(\d{2})?',
536 // Month DD, YYYY or Month DD, YY (e.g., March 15, 2024 or March 15, 24)
537 '(January|February|March|April|May|June|July|August|September|October|November|December) (0[1-9]|[12]\d|3[01]), \d{2}(\d{2})?'
538 )
539 // common time formats
540 or regex.contains(.scan.ocr.raw,
541 // Example: 23:45, 08:30
542 '([01]\d|2[0-3]):([0-5]\d)',
543 // Example: 23:45:59, 08:30:12
544 '([01]\d|2[0-3]):([0-5]\d):([0-5]\d)',
545 // Example: 08:30 AM, 12:45 pm
546 '(0[1-9]|1[0-2]):([0-5]\d)\s?([AaPp][Mm])',
547 // Example: 08:30 AM, 12:45 pm
548 '(0[1-9]|1[0-2]):([0-5]\d):([0-5]\d) ?([AaPp][Mm])'
549 )
550 )
551 ),
552 // there are often emoji in the sender display name
553 (
554 any([sender.display_name, subject.subject],
555 // contains an emoji
556 regex.contains(.,
557 '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
558 )
559 // negate where the emoji occur in tags
560 and not regex.contains(.,
561 '^(?:\[[^\]]*\]\s*)*\[[^\]]*[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}][^\]]*\]'
562 )
563 )
564 ),
565 // an attachment is a pdf, image, or document that contains a url
566 (
567 1 <= length(attachments) <= 2
568 and any(attachments,
569 (
570 .file_type in $file_types_images
571 or .file_type == "pdf"
572 or .file_extension in $file_extensions_macros
573 )
574 and any(file.explode(.),
575 .scan.qr.type == "url"
576 or strings.icontains(.scan.qr.data, 'http')
577 or any(recipients.to,
578 strings.icontains(..scan.qr.data, .email.local_part)
579 or strings.icontains(..scan.qr.data, .email.email)
580 )
581 )
582 )
583 )
584 )
585
586 // negating legit replies and legitimate audio file attachments and known voicemail senders
587 and not (
588 sender.email.domain.valid
589 and sender.email.domain.root_domain in (
590 "magicjack.com",
591 "magicjackforbusiness.com",
592 "unitelvoice.com",
593 "voipinterface.net",
594 "ringcentral.biz",
595 "verizonwireless.com",
596 "t-mobile.com",
597 "justcall.io",
598 "airtel.com",
599 "grasshopper.com",
600 "ooma.com",
601 "ui.com"
602 )
603 )
604 and not (
605 any(attachments,
606 strings.starts_with(.content_type, "audio")
607 // confirm the content type with .file_type
608 // we have seen attachments claim to be audio/* files, only to be exploded as something else
609 and .file_type in ("wav", "mp3")
610 )
611 )
612 and not (
613 (
614 strings.istarts_with(subject.subject, "RE:")
615 // out of office auto-reply
616 // the NLU model will handle these better natively soon
617 or strings.istarts_with(subject.subject, "Automatic reply:")
618 )
619 and (
620 length(headers.references) > 0
621 or any(headers.hops, any(.fields, strings.ilike(.name, "In-Reply-To")))
622 )
623 )
624 // bounce-back negations
625 and not any(attachments,
626 any(file.parse_eml(.).attachments,
627 .content_type == "message/delivery-status"
628 )
629 )
630 // bounce-back negations
631 and not (
632 any(attachments,
633 .content_type in ("message/delivery-status", "text/calendar")
634 )
635 )
636 // negate newsletters
637 and not (
638 (
639 any(ml.nlu_classifier(body.current_thread.text).topics,
640 .name in (
641 "Newsletters and Digests",
642 "B2B Cold Outreach",
643 "Events and Webinars"
644 )
645 and .confidence == "high"
646 )
647 )
648 and not strings.icontains(body.current_thread.text,
649 "this voicemail was shared by"
650 )
651 )
652 // negate bouncebacks from proofpoint
653 and not (
654 sender.display_name == "Mail Delivery Subsystem"
655 and strings.ends_with(headers.message_id, "pphosted.com>")
656 and any(headers.hops,
657 .index == 0 and strings.contains(.received.server.raw, "pphosted.com")
658 )
659 and any(attachments, .content_type == "message/rfc822")
660 )
661 // negate CheckPoint encrypted messages
662 and not (
663 // CheckPoint banner
664 length(attachments) == 1
665 and any(body.links, .href_url.domain.root_domain == "checkpointcloudsec.com")
666 and strings.istarts_with(headers.message_id, "<encrypted")
667 and any(headers.domains, .root_domain == "checkpointcloudsec.com")
668 )
669 // an impersonated high trust domain
670 and (
671 (
672 sender.email.domain.root_domain in $high_trust_sender_root_domains
673 and not headers.auth_summary.dmarc.pass
674 // service abuse
675 and not sender.email.email in ("noreply-application-integration@google.com")
676 )
677 or sender.email.domain.root_domain not in $high_trust_sender_root_domains
678
679 // sender profile
680 or (
681 (
682 not sender.email.domain.root_domain in $org_domains
683 and (profile.by_sender_email().prevalence not in ("common"))
684 and not profile.by_sender_email().solicited
685 )
686 or (
687 profile.by_sender_email().any_messages_malicious_or_spam
688 and not profile.by_sender_email().any_messages_benign
689 )
690 // match if the sender is in org domains but failed auth
691 or (
692 sender.email.domain.domain in $org_domains
693 and not coalesce(headers.auth_summary.dmarc.pass, false)
694 )
695 // match if the sender address is blank or null
696 or (regex.match(sender.email.email, "") or sender.email.email is null)
697 )
698 )
699attack_types:
700 - "Credential Phishing"
701tactics_and_techniques:
702 - "Social engineering"
703detection_methods:
704 - "Content analysis"
705 - "Natural Language Understanding"
706 - "Sender analysis"
707 - "URL analysis"
708id: "74ba7787-e543-5ce8-b6eb-e1ecdb8f1d67"