Fake voicemail notification (untrusted sender)
This rule detects a common credential phishing vector enticing the user to engage with links under the premise that they have a voicemail to retrieve. The rule looks for voicemail verbiage in the display name, body, subject or a combination of those elements with emojis or a medium to high credential theft NLU Intent from first-time + unsolicited sender.
Sublime rule (View on GitHub)
1name: "Fake voicemail notification (untrusted sender)"
2description: |
3 This rule detects a common credential phishing vector enticing the user to engage with links under the premise that they have a voicemail to retrieve.
4 The rule looks for voicemail verbiage in the display name, body, subject or a combination of those elements with emojis or a medium to high credential theft NLU Intent from first-time + unsolicited sender.
5type: "rule"
6severity: "medium"
7source: |
8 type.inbound
9 // contains links or attachments
10 and (
11 (
12 0 < length(filter(body.links, .href_url.scheme != "mailto")) <= 25
13 or 0 < length(distinct(attachments, .md5)) <= 3
14 )
15 and 0 <= length(distinct(attachments, .md5)) <= 8
16 )
17
18 // the subject or display_name need some keywords which are voicemail related
19 and (
20 any([subject.subject, sender.display_name],
21 regex.icontains(.,
22 // split phrases that occur within 3 words between or only punctuation between them
23 '(?:v[nm](\b|[[:punct:]])?|\bv[o0][il1]ce(?:mail|message)?|audi[o0]|incoming|missed(?:\sa\s)?|left( a)?|wireless)(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:mail|message|msg|recording|received|notif|support|ca[li1][li1]\d*\b|ca[il1][il1](?:er)?|log|transcript(?:ion)?)\b',
24 // regex specific to v-mail, v_msg, v,mail, etc
25 // list of "secondary" words synced with regex above this one
26 'v[[:punct:]](?:mail|message|msg|recording|received|notif|support|ca[li1][li1]\d*\b|ca[il1][il1](?:er)?|log|transcript(?:ion)?\b)',
27 // split phrases that start with "caller" that occur within 3 words between or only punctation
28 'ca[li1][li1](?:er)?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[nm](\b|[[:punct:]])?|\bv[o0][il1]ce(?:mail|message)?|audi[o0]|missed(?:\sa\s)?|left( a)?)',
29 // strong phrases
30 '(?:open mp3|audi[o0] note|\.wav|left a vm|[^\s]+voip[^\s]*|unanswered.*ca[li1][li1]|incoming.vm|left msg|wireless ca[li1][li1]er|VM Service|v[o0][il1]ce message|missed.ca[li1][li1](?:e[rd])?|\bca[li1][li1].(?:support|service)(?: for| log)?|missed.{0,10} VM|new v[o0][il1]cemail from|new.v.m.from.\+?\d+|new v[o0][il1]cemail?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}transcript(s|ion)?|message received|new (?:message|call|voicemail).{0,15}(?:info|notification|alert)|incoming transmission|voice note)',
31 // starts in the format of `(4)` and contains some voicemail keywords
32 '^\(\d\)\s(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:message|voip|v[o0][il1]ce|unread|call)',
33 'ca[li1][li1](?:er)?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:playback|transcript)',
34
35 // obfuscated phone number with at least one digit in the area code and at least one obfuscated number in the last group
36 // 555-555-555X, 555-555-XXXX, 555-5XX-XXXX
37 '\b1?\(?(\d{3}|\d{2}[\*X]|\d[\*X]{2})\)?[^a-z0-9]{0,2}(\d{2,3}|\d{2}[\*X]|\d[\*X]{2}|[\*X]{2,3})[^a-z0-9]{0,4}(\d{3}[\*X]|\d{2}[\*X]{2}|\d[\*X]{3}|[\*X]{3,4})[^0-9]',
38 // obfuscated phone number with at least one digit in the prefix
39 // XXX-555-5555, XXX-5XX-XXXX
40 '\b1?\(?(\d{2}[\*X]|\d[\*X]{2}|[\*X]{2,3})\)?[^a-z0-9]{0,2}(\d{2,3}|\d{2}[\*X]|\d[\*X]{2})[^a-z0-9]{0,4}(\d{4}|\d{3}[\*X]|\d{2}[\*X]{2}|\d[\*X]{3}|[\*X]{3,4})\b',
41 // obfuscated voicemail/voicemessage keywords
42 'v[o0][il1]ce[\s\-_]?m(?:ail|sg|essage)?[\*X\.\-_]{2,}',
43 'v[o0][il1]cem[\*X\.\-_]{2,}',
44 // "X new voice..." patterns
45 '\d+\s+new.*v[o0][il1]ce(?:mail|message|m[\*]+)?',
46 // sent-message patterns
47 '(?:sent|new|incoming)[\s\-]+message.*(v[o0][il1]ce|<.*@.*>)',
48 )
49 )
50 // body.current_thread.text inspection should be very specific to avoid FP
51 or regex.icontains(strings.replace_confusables(body.current_thread.text),
52 // body.current_thread.text,
53 'sent (?:from|by) (?:your )?v[o0][il1]ce (?:ma[il1][li1] )?system',
54 '(?:(?:new|this) |^)(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0]) (?:message|notification|record)',
55 'v[o0][il1]cema[il1][li1] (is )?attached',
56 'an? (?:new )?encrypted v[o0][il1]cemail',
57 'a (?:new )?pending message',
58 'Your? have (?: an?)?incoming v[o0][il1]ceRec',
59 "you(?:\'ve| have) a (?:new )?m[il1]ssed ca[li1][li1]",
60 'New V[o0][il1]cema[il1][li1] Received',
61 'New m[il1]ssed ca[li1][li1] record',
62 '\bv[o0][il1]cema[il1][li1] transcript\b',
63 'Listen to V[o0][il1]ceMa[il1][li1]',
64 'New v[o0][il1]cema[il1][li1] from',
65 'v[o0][il1]ce note'
66 )
67 // pull out two regexes that could benefit from negations
68 or (
69 regex.icontains(body.current_thread.text,
70 // body.current_thread.text,
71 '(?:you|we) (?:have |received )+(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:\b|\s+)v[o0][il1]ce\s?(?:ma[il1][li1]|aud[il1][o0]|message|notification)',
72 'left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0])(?: message|notification)?',
73 )
74 and not regex.icontains(body.current_thread.text,
75 '(?:I(?:\sjust)?|just(?: called you at (?:\d+[[:punct:]])+) and)? left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:mail)?|audio)(?: message)?'
76 )
77 and not regex.icontains(body.current_thread.text,
78 'you (?:have |received )my voice\s?(?:mail|audio|message)'
79 )
80 )
81 // Reuse the body.current_thread.text logic against the OCR output of the message screenshot
82 or (
83 length(attachments) > 0
84 and (
85 all(attachments,
86 .file_type in $file_types_images
87 and beta.parse_exif(.).image_height != 1
88 )
89 // there is a mix of fake audio attachments and images
90 or (
91 length(filter(attachments,
92 strings.starts_with(.content_type, "audio")
93 // confirm the content type with .file_type
94 // we have seen attachments claim to be audio/* files, only to be exploded as something else
95 and not .file_type in ("wav", "mp3")
96 )
97 )
98 // the total # of fake audio attachments + the total # of image attachments = the total # of attachments
99 // meaning, all attachments that are NOT fake audio attachments MUST be images
100 + length(filter(attachments,
101 .file_type in $file_types_images
102 and beta.parse_exif(.).image_height != 1
103 )
104 ) == length(attachments)
105 )
106 )
107 and any((filter(file.explode(file.message_screenshot()), .depth == 0)),
108 regex.icontains(.scan.ocr.raw,
109 // body.current_thread.text,
110 'sent (?:from|by) (?:your )?v[o0][il1]ce (?:ma[il1][li1] )?system',
111 '(?:(?:new|this) |^)(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0]) (?:message|notification|record)',
112 'v[o0][il1]cema[il1][li1] (is )?attached',
113 'an? (?:new )?encrypted v[o0][il1]cemail',
114 'a (?:new )?pending message',
115 'Your? have (?: an?)?incoming v[o0][il1]ceRec',
116 "you(?:\'ve| have) a (?:new )?m[il1]ssed ca[li1][li1]",
117 'New V[o0][il1]cema[il1][li1] Received',
118 'New m[il1]ssed ca[li1][li1] record',
119 '\bv[o0][il1]cema[il1][li1] transcript\b',
120 'Listen to V[o0][il1]ceMa[il1][li1]',
121 'New v[o0][il1]cema[il1][li1] from',
122 'v[o0][il1]ce note'
123 )
124 or (
125 regex.icontains(.scan.ocr.raw,
126 // body.current_thread.text,
127 '(?:you|we) (?:have |received )+(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:\b|\s+)v[o0][il1]ce\s?(?:ma[il1][li1]|aud[il1][o0]|message|notification)',
128 'left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:mail)?|audi[o0])(?: message)?',
129 )
130 and not regex.icontains(body.current_thread.text,
131 '(?:I(?:\sjust)?|just) left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:voice(?:mail)?|audio)(?: message)?'
132 )
133 and not regex.icontains(body.current_thread.text,
134 'you (?:have |received )my voice\s?(?:mail|audio|message)'
135 )
136 )
137 )
138 )
139 or strings.icontains(body.html.raw, '<title>Voicemail Notification</title>')
140 or strings.icontains(body.html.raw, '<!-- Voicemail phone logo')
141 )
142 and 2 of (
143 (
144 // the sender is a freemail
145 sender.email.domain.root_domain in $free_email_providers
146 ),
147 (
148 any(ml.nlu_classifier(body.current_thread.text).intents,
149 .name in ("cred_theft") and .confidence in ("medium", "high")
150 )
151 or
152 // use the OCR from the message screenshot
153 any(filter(file.explode(file.message_screenshot()), .depth == 0),
154 any(ml.nlu_classifier(.scan.ocr.raw).intents,
155 .name in ("cred_theft") and .confidence in ("medium", "high")
156 )
157 )
158 ),
159 (
160 any(attachments,
161 .content_type in ("html", "text", "text/html")
162 and any(ml.logo_detect(file.html_screenshot(.)).brands,
163 .name in ("Microsoft") and .confidence in ("medium", "high")
164 )
165 )
166 ),
167 (
168 regex.icontains(sender.display_name,
169 '(v[o0][il1]ce|audi[o0]|call|missed|caii)(\s?|-)(mail|message|recording|call|caii)|(transcription|Caller.?ID)'
170 )
171 ),
172 // attachment names are often HTML and voice mail related
173 (
174 any(attachments,
175 // this logic is reused below for eml attachments
176 // ensure updates occur both places
177 (
178 .content_type in ("html", "text", "text/html")
179 or .file_type in ("html", "unknown", "svg")
180 or .file_type == "pdf"
181 )
182 and (
183 regex.icontains(.file_name,
184 '(?:v[o0][il1]ce|aud[i1l][o0]|call|missed|caii|mail|message|recording|call|caii|transcr[il1]ption|v[nm]|audi[o0]|play|listen|unheard|msg)',
185 // contains a time
186 // 01min , 60secs
187 '0?[1-9]\s*min(?:(?:ute)?s)?',
188 '\d{1,2}\s*s(?:ec(?:ond)?s)?',
189 // (00:50s)
190 // 3:26 seconds
191 '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
192 // 03min25secs
193 '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
194 // [0:39]
195 // (0:39)
196 '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s',
197 // contains an emoji
198 '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
199 )
200 // somtimes there is no name, it's just the extension which is also strange
201 or .file_name in~ (".htm", ".html")
202 // or sometimes it has no name....
203 or .file_name is null
204 )
205 )
206 ),
207 // attachment contains javascript
208 (
209 any(attachments,
210 (
211 .content_type in ("html", "text", "text/html")
212 or .file_type in ("html", "unknown", "svg")
213 )
214 and (
215 (
216 .size < 1500
217 and any(file.explode(.), length(.scan.html.scripts) > 0)
218 )
219 // bypass the size requirement under these conditions
220 or (
221 // sync with https://github.com/sublime-security/sublime-rules/blob/main/detection-rules/attachment_svg_embedded_js.yml
222 strings.ilike(file.parse_text(.,
223 encodings=[
224 "ascii",
225 "utf8",
226 "utf16-le"
227 ]
228 ).text,
229 "*onload*",
230 "*window.location.href*",
231 "*onerror*",
232 "*CDATA*",
233 "*<script*",
234 "*</script*",
235 "*atob*",
236 "*location.assign*",
237 "*decodeURIComponent*"
238 )
239 )
240 )
241 )
242 ),
243 (
244 any(attachments,
245 (
246 .content_type in ("html", "text", "text/html")
247 or .file_type in ("html", "unknown", "svg")
248 )
249 and any(recipients.to,
250 // the html attachment contains a receipient email address
251 strings.contains(file.parse_html(..).raw, .email.email)
252 // the sld of the domain is in the attachment name
253 or strings.contains(..file_name, .email.domain.sld)
254 )
255 )
256 ),
257 // eml attachments
258 (
259 any(filter(attachments,
260 .content_type == "message/rfc822" or .file_extension in ('eml')
261 ),
262 // which contain attachments
263 // this is the same logic as above
264 any(file.parse_eml(.).attachments,
265 (
266 .content_type in ("html", "text", "text/html")
267 or .file_type in ("html", "unknown", "svg")
268 or .file_type == "pdf"
269 )
270 and (
271 regex.icontains(.file_name,
272 '(?:v[o0][il1]ce|aud[il1][o0]|call|missed|caii|mail|message|recording|call|caii|transcr[il1]ption|v[nm]|audi[o0]|play|listen|unheard|msg)',
273 // contains a time
274 // 01min , 60secs
275 '0?[1-9]\s*min(?:(?:ute)?s)?',
276 '\d{1,2}\s*s(?:ec(?:ond)?s)?',
277 // (00:50s)
278 // 3:26 seconds
279 '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
280 // 03min25secs
281 '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
282 // [0:39]
283 // (0:39)
284 '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s',
285 // contains an emoji
286 '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
287 )
288 // somtimes there is no name, it's just the extension which is also strange
289 or .file_name in~ (".htm", ".html")
290 // or sometimes it has no name....
291 or .file_name is null
292 )
293 )
294 )
295 ),
296 // attached eml sender/recipeient/subject are all the same as the outer
297 // and have an attachment or body links
298 (
299 any(filter(attachments,
300 .content_type == "message/rfc822" or .file_extension in ('eml')
301 ),
302 // which contain attachments
303 // this is the same logic as above
304 file.parse_eml(.).subject.subject == subject.subject
305 and file.parse_eml(.).sender.email.email == sender.email.email
306 and (
307 length(file.parse_eml(.).recipients.to) == length(recipients.to)
308 and all(recipients.to,
309 .email.email in map(file.parse_eml(..).recipients.to,
310 .email.email
311 )
312 )
313 )
314 and (
315 // there are attachments
316 length(file.parse_eml(.).attachments) > 0
317 // or body links
318 or length(filter(file.parse_eml(.).body.links,
319 .href_url.domain.domain not in $org_domains
320 and .href_url.domain.root_domain not in $org_domains
321 )
322 ) > 0
323 )
324 )
325 ),
326 // the body links contain the recipients email
327 (
328 length(filter(recipients.to, .email.email != "" or .email.domain.valid)) > 0
329 and any(body.links,
330 any(recipients.to,
331 strings.icontains(..href_url.url, .email.email)
332 or strings.icontains(..href_url.url, .email.local_part)
333 )
334 )
335 ),
336 (
337 length(body.current_thread.text) < 700
338 and regex.icontains(body.current_thread.text,
339 'Méssãge|Méssage|Recéived|Addréss'
340 )
341 ),
342 (
343 // sender domain matches no body domains
344 // only inspect "links" that have a display_text and display_url is null to remove "plain text" email address from being caught
345 length(filter(body.links,
346 .display_text is not null
347 and .display_url.url is null
348 and .href_url.domain.valid
349 )
350 ) > 0
351 and all(filter(body.links,
352 .display_text is not null
353 and .display_url.url is null
354 and .href_url.domain.valid
355 ),
356 .href_url.domain.root_domain != sender.email.domain.root_domain
357 and .href_url.domain.root_domain not in $org_domains
358 and .href_url.domain.root_domain not in ("aka.ms")
359 and .href_url.domain.root_domain not in (
360 "unitelvoice.com",
361 "googleapis.com",
362 "dialmycalls.com",
363 "ringcentral.biz",
364 "google.com"
365 )
366 )
367 ),
368 // the body links contain vm related phrases
369 (
370 any(body.links,
371 regex.contains(.display_text, '[^a-z]*[A-Z][^a-z]*')
372 and regex.icontains(.display_text,
373 '(?:v[nm]|v[o0][il1]ce|audi[o0]|ca[li][li]|missed|preview)(\s?|-)(mail|message|recording|call|caii)|transcription|open mp3|audi[o0] note|listen|playback|\(?(?:\*\*\*|[0-9]{3})?.(?:\*\*\*|[0-9]{3})[^a-z]{0,2}(?:\*{4}|\d+\*+)|play|voice note'
374 )
375 // negate FP terms in link display texts
376 and not strings.icontains(.display_text, 'voice call center')
377 )
378 ),
379 (
380 any(body.links,
381 .href_url.path == "/ctt"
382 and regex.icontains(.display_text,
383 '(v[nm]|v[o0][il1]ce|audi[o0]|call|missed|caii)(\s?|-)(mail|message|recording|call|caii)|transcription|open mp3|audi[o0] note|listen|playback|\(?(?:\*\*\*|[0-9]{3})?.(?:\*\*\*|[0-9]{3})[^a-z]{0,2}(?:\*{4}|\d+\*+)|play|voice note'
384 )
385 // negate FP terms in link display texts
386 and not strings.icontains(.display_text, 'voice call center')
387 )
388 ),
389 // new domains
390 (
391 any(body.links,
392 network.whois(.href_url.domain).days_old < 10
393 and not strings.icontains(.href_url.path, "unsubscribe")
394 )
395 ),
396 // sld use in sender/subject selements
397 (
398 any(recipients.to,
399 // recipient's SLD is in the sender's display name
400 strings.icontains(sender.display_name, .email.domain.sld)
401 // recipient's SLD is in the sender's display name
402 or strings.icontains(subject.subject, .email.domain.sld)
403 // recipient's SLD is in the senders local_part
404 or strings.icontains(sender.email.local_part, .email.domain.sld)
405 )
406 ),
407 // often times the subject or sender display name will contain time references
408 (
409 any([sender.display_name, subject.subject, body.current_thread.text],
410 regex.icontains(.,
411 // 01min , 60secs
412 '0?[1-9]\s*min(?:(?:ute)?s)?\b',
413 '\d{1,2}\s*s(?:ec(?:ond)?s)?\b',
414 // (00:50s)
415 // 3:26 seconds
416 '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
417 // 03min25secs
418 '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
419 // [0:39]
420 // (0:39)
421 '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s'
422 )
423 )
424 // resuse the same logic against ORC output of message_screenshot
425 or any(filter(file.explode(file.message_screenshot()), .depth == 0),
426 regex.icontains(.scan.ocr.raw,
427 // 01min , 60secs
428 '0?[1-9]\s*min(?:(?:ute)?s)?\b',
429 '\d{1,2}\s*s(?:ec(?:ond)?s)?\b',
430 // (00:50s)
431 // 3:26 seconds
432 '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
433 // 03min25secs
434 '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
435 // [0:39]
436 // (0:39)
437 '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s'
438 )
439 )
440 ),
441 // often times the subject or sender display name will contain dates
442 (
443 any([sender.display_name, subject.subject],
444 // days of week
445 any([
446 'monday',
447 'tuesday',
448 'wednesday',
449 'thursday',
450 'friday',
451 'saturday',
452 'sunday'
453 ],
454 strings.icontains(.., .)
455 )
456 // months
457 // may is problematic for words like "Mayor", "Maybe", "MayFlower", etc
458 or any([
459 "January",
460 "February",
461 "March",
462 "April",
463 "June",
464 "July",
465 "August",
466 "September",
467 "October",
468 "November",
469 "December"
470 ],
471 strings.icontains(.., .)
472 )
473 // use a regex for May
474 or regex.icontains(., '\bmay\b')
475 // common date formats
476 or regex.contains(.,
477 // YYYY-MM-DD or YY-MM-DD (ISO 8601 format)
478 '\d{2}(\d{2})?-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])',
479 // MM/DD/YYYY or MM/DD/YY (US format)
480 '(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/\d{2}(\d{2})?',
481 // DD/MM/YYYY or DD/MM/YY (European format)
482 '(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/\d{2}(\d{2})?',
483 // Month DD, YYYY or Month DD, YY (e.g., March 15, 2024 or March 15, 24)
484 '(January|February|March|April|May|June|July|August|September|October|November|December) (0[1-9]|[12]\d|3[01]), \d{2}(\d{2})?'
485 )
486 // common time formats
487 or regex.contains(.,
488 // Example: 23:45, 08:30
489 '([01]\d|2[0-3]):([0-5]\d)',
490 // Example: 23:45:59, 08:30:12
491 '([01]\d|2[0-3]):([0-5]\d):([0-5]\d)',
492 // Example: 08:30 AM, 12:45 pm
493 '(0[1-9]|1[0-2]):([0-5]\d)\s?([AaPp][Mm])',
494 // Example: 08:30 AM, 12:45 pm
495 '(0[1-9]|1[0-2]):([0-5]\d):([0-5]\d) ?([AaPp][Mm])'
496 )
497 )
498 // or use the OCR results from file.message_screenshot
499 or any(filter(file.explode(file.message_screenshot()), .depth == 0),
500 // days of week
501 any([
502 'monday',
503 'tuesday',
504 'wednesday',
505 'thursday',
506 'friday',
507 'saturday',
508 'sunday'
509 ],
510 strings.icontains(..scan.ocr.raw, .)
511 )
512 // months
513 // may is problematic for words like "Mayor", "Maybe", "MayFlower", etc
514 or any([
515 "January",
516 "February",
517 "March",
518 "April",
519 "June",
520 "July",
521 "August",
522 "September",
523 "October",
524 "November",
525 "December"
526 ],
527 strings.icontains(..scan.ocr.raw, .)
528 )
529 // use a regex for May
530 or regex.contains(.scan.ocr.raw, '\bMay\b')
531 // common date formats
532 or regex.contains(.scan.ocr.raw,
533 // YYYY-MM-DD or YY-MM-DD (ISO 8601 format)
534 '\d{2}(\d{2})?-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])',
535 // MM/DD/YYYY or MM/DD/YY (US format)
536 '(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/\d{2}(\d{2})?',
537 // DD/MM/YYYY or DD/MM/YY (European format)
538 '(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/\d{2}(\d{2})?',
539 // Month DD, YYYY or Month DD, YY (e.g., March 15, 2024 or March 15, 24)
540 '(January|February|March|April|May|June|July|August|September|October|November|December) (0[1-9]|[12]\d|3[01]), \d{2}(\d{2})?'
541 )
542 // common time formats
543 or regex.contains(.scan.ocr.raw,
544 // Example: 23:45, 08:30
545 '([01]\d|2[0-3]):([0-5]\d)',
546 // Example: 23:45:59, 08:30:12
547 '([01]\d|2[0-3]):([0-5]\d):([0-5]\d)',
548 // Example: 08:30 AM, 12:45 pm
549 '(0[1-9]|1[0-2]):([0-5]\d)\s?([AaPp][Mm])',
550 // Example: 08:30 AM, 12:45 pm
551 '(0[1-9]|1[0-2]):([0-5]\d):([0-5]\d) ?([AaPp][Mm])'
552 )
553 )
554 ),
555 // there are often emoji in the sender display name
556 (
557 any([sender.display_name, subject.subject],
558 // contains an emoji
559 regex.contains(.,
560 '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
561 )
562 // negate where the emoji occur in tags
563 and not regex.contains(.,
564 '^(?:\[[^\]]*\]\s*)*\[[^\]]*[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}][^\]]*\]'
565 )
566 )
567 ),
568 // an attachment is a pdf, image, or document that contains a url
569 (
570 1 <= length(attachments) <= 2
571 and any(attachments,
572 (
573 .file_type in $file_types_images
574 or .file_type == "pdf"
575 or .file_extension in $file_extensions_macros
576 )
577 and any(file.explode(.),
578 .scan.qr.type == "url"
579 or strings.icontains(.scan.qr.data, 'http')
580 or any(recipients.to,
581 strings.icontains(..scan.qr.data, .email.local_part)
582 or strings.icontains(..scan.qr.data, .email.email)
583 )
584 )
585 )
586 )
587 )
588
589 // negating legit replies and legitimate audio file attachments and known voicemail senders
590 and not (
591 sender.email.domain.valid
592 and sender.email.domain.root_domain in (
593 "magicjack.com",
594 "magicjackforbusiness.com",
595 "unitelvoice.com",
596 "voipinterface.net",
597 "ringcentral.biz",
598 "verizonwireless.com",
599 "t-mobile.com",
600 "justcall.io",
601 "airtel.com",
602 "grasshopper.com",
603 "ooma.com",
604 "ui.com"
605 )
606 )
607 and not (
608 any(attachments,
609 strings.starts_with(.content_type, "audio")
610 // confirm the content type with .file_type
611 // we have seen attachments claim to be audio/* files, only to be exploded as something else
612 and .file_type in ("wav", "mp3")
613 )
614 )
615 and not (
616 (
617 strings.istarts_with(subject.subject, "RE:")
618 // out of office auto-reply
619 // the NLU model will handle these better natively soon
620 or strings.istarts_with(subject.subject, "Automatic reply:")
621 )
622 and (length(headers.references) > 0 or headers.in_reply_to is not null)
623 )
624 // bounce-back negations
625 and not any(attachments,
626 any(file.parse_eml(.).attachments,
627 .content_type == "message/delivery-status"
628 )
629 )
630 // bounce-back negations
631 and not (
632 any(attachments,
633 .content_type in ("message/delivery-status", "text/calendar")
634 )
635 )
636 // negate newsletters
637 and not (
638 (
639 any(ml.nlu_classifier(body.current_thread.text).topics,
640 .name in (
641 "Newsletters and Digests",
642 "B2B Cold Outreach",
643 "Events and Webinars"
644 )
645 and .confidence == "high"
646 )
647 )
648 and not strings.icontains(body.current_thread.text,
649 "this voicemail was shared by"
650 )
651 )
652 // negate bouncebacks from proofpoint
653 and not (
654 sender.display_name == "Mail Delivery Subsystem"
655 and strings.ends_with(headers.message_id, "pphosted.com>")
656 and any(headers.hops,
657 .index == 0 and strings.contains(.received.server.raw, "pphosted.com")
658 )
659 and any(attachments, .content_type == "message/rfc822")
660 )
661 // negate CheckPoint encrypted messages
662 and not (
663 // CheckPoint banner
664 length(attachments) == 1
665 and any(body.links, .href_url.domain.root_domain == "checkpointcloudsec.com")
666 and strings.istarts_with(headers.message_id, "<encrypted")
667 and any(headers.domains, .root_domain == "checkpointcloudsec.com")
668 )
669 // an impersonated high trust domain
670 and (
671 (
672 sender.email.domain.root_domain in $high_trust_sender_root_domains
673 and not headers.auth_summary.dmarc.pass
674 // service abuse
675 and not sender.email.email in ("noreply-application-integration@google.com")
676 )
677 or sender.email.domain.root_domain not in $high_trust_sender_root_domains
678
679 // sender profile
680 or (
681 (
682 not sender.email.domain.root_domain in $org_domains
683 and (profile.by_sender_email().prevalence not in ("common"))
684 and not profile.by_sender_email().solicited
685 )
686 or (
687 profile.by_sender_email().any_messages_malicious_or_spam
688 and not profile.by_sender_email().any_messages_benign
689 )
690 // match if the sender is in org domains but failed auth
691 or (
692 sender.email.domain.domain in $org_domains
693 and not coalesce(headers.auth_summary.dmarc.pass, false)
694 )
695 // match if the sender address is blank or null
696 or (regex.match(sender.email.email, "") or sender.email.email is null)
697 )
698 )
699attack_types:
700 - "Credential Phishing"
701tactics_and_techniques:
702 - "Social engineering"
703detection_methods:
704 - "Content analysis"
705 - "Natural Language Understanding"
706 - "Sender analysis"
707 - "URL analysis"
708id: "74ba7787-e543-5ce8-b6eb-e1ecdb8f1d67"