Link: Multistage landing - Scribd document

Jan 12, 2026 ·
Detects when a Scribd document contains embedded links that are suspicious, particularly those targeting Microsoft services through various evasion techniques. The rule analyzes both the document content and linked destinations for suspicious patterns and redirects.
Sublime rule (View on GitHub)

 1name: "Link: Multistage landing - Scribd document"
 2description: "Detects when a Scribd document contains embedded links that are suspicious, particularly those targeting Microsoft services through various evasion techniques. The rule analyzes both the document content and linked destinations for suspicious patterns and redirects."
 3type: "rule"
 4severity: "medium"
 5source: |
 6  type.inbound
 7  // only one link to Scribd
 8  and length(distinct(filter(body.links,
 9                             .href_url.domain.root_domain in ("scribd.com")
10                             and strings.istarts_with(.href_url.path, "/document")
11                      ),
12                      .href_url.url
13             )
14  ) == 1
15  and any(body.links,
16          .href_url.domain.root_domain == "scribd.com"
17          and strings.istarts_with(.href_url.path, "/document")
18          and (
19            // target the embedded links via XPath
20            any(html.xpath(ml.link_analysis(.).final_dom,
21                           '//a[@class="ll"]/@href'
22                ).nodes,
23                strings.parse_url(.raw).domain.tld in $suspicious_tlds
24                or strings.parse_url(.raw).domain.domain in $free_subdomain_hosts
25                or strings.parse_url(.raw).domain.root_domain in $free_subdomain_hosts
26                // observed pattern in credential theft URLs
27                or strings.ilike(strings.parse_url(.raw).path,
28                                 "*o365*",
29                                 "*office365*",
30                                 "*microsoft*"
31                )
32                // observed pattern in credential theft URLs
33                or strings.ilike(strings.parse_url(.raw).query_params,
34                                 "*o365*",
35                                 "*office365*",
36                                 "*microsoft*"
37                )
38                // observed pattern in credential theft URLs
39                or any(beta.scan_base64(strings.parse_url(.raw).query_params),
40                       strings.ilike(., "*o365*", "*office365*", "*microsoft*")
41                )
42                or ml.link_analysis(strings.parse_url(.raw), mode="aggressive").credphish.disposition == "phishing"
43                or ml.link_analysis(strings.parse_url(.raw), mode="aggressive").credphish.contains_captcha
44                or strings.icontains(ml.link_analysis(strings.parse_url(.raw),
45                                                      mode="aggressive"
46                                     ).final_dom.display_text,
47                                     "I'm Human"
48                )
49                // bails out to a well-known domain, seen in evasion attempts
50                or (
51                  length(ml.link_analysis(strings.parse_url(.raw),
52                                          mode="aggressive"
53                         ).redirect_history
54                  ) > 0
55                  and ml.link_analysis(strings.parse_url(.raw), mode="aggressive").effective_url.domain.root_domain in $tranco_10k
56                )
57            )
58            // credential theft language on the main Scribd page
59            or any(ml.nlu_classifier(beta.ocr(ml.link_analysis(.,
60                                                               mode="aggressive"
61                                              ).screenshot
62                                     ).text
63                   ).intents,
64                   .name == "cred_theft" and .confidence != "low"
65            )
66          )
67  )
68  // negate highly trusted sender domains unless they fail DMARC authentication
69  and (
70    (
71      sender.email.domain.root_domain in $high_trust_sender_root_domains
72      and not headers.auth_summary.dmarc.pass
73    )
74    or sender.email.domain.root_domain not in $high_trust_sender_root_domains
75  )  
76attack_types:
77  - "Credential Phishing"
78tactics_and_techniques:
79  - "Evasion"
80  - "Social engineering"
81  - "Impersonation: Brand"
82  - "Free file host"
83detection_methods:
84  - "URL analysis"
85  - "HTML analysis"
86  - "Natural Language Understanding"
87  - "Computer Vision"
88  - "Optical Character Recognition"
89  - "URL screenshot"
90id: "afa9807d-c70f-5af6-91ef-284c72d01cab"