Attachment: Office file with credential phishing URLs

Sep 2, 2025 ·
Detects Office documents containing embedded URLs that redirect to credential phishing pages. The rule filters out standard XML namespace and schema URLs commonly found in legitimate Office documents, then analyzes remaining URLs for malicious content using machine learning link analysis.
Sublime rule (View on GitHub)

  1name: "Attachment: Office file with credential phishing URLs"
  2description: "Detects Office documents containing embedded URLs that redirect to credential phishing pages. The rule filters out standard XML namespace and schema URLs commonly found in legitimate Office documents, then analyzes remaining URLs for malicious content using machine learning link analysis."
  3type: "rule"
  4severity: "medium"
  5source: |
  6  type.inbound
  7  // Filter to Office documents that contain 1-3 non-schema URLs
  8  and any(filter(attachments,
  9                 // Only check Office documents that can contain macros/embedded content
 10                 .file_extension in $file_extensions_macros
 11  
 12                 // Count URLs after filtering out common XML namespace/schema URLs
 13                 and 0 < sum(map(map(file.explode(.),
 14                                     // Filter out standard XML namespace URLs that appear in all Office docs
 15                                     filter(.scan.url.urls,
 16                                            // Exclude OpenXML format schemas
 17                                            .domain.domain not in (
 18                                              'schemas.openxmlformats.org',
 19                                              'schemas.microsoft.com',
 20                                              'www.w3.org'
 21                                            )
 22                                            // Additional Microsoft domain exclusion
 23                                            and not .domain.domain in (
 24                                              'microsoft.com',
 25                                              'wps.cn' // WPS is a china based alt to MS Office and used in namespaces of the documents created by that product
 26                                            )
 27                                            // Exclude Dublin Core persistent URLs (metadata schemas)
 28                                            and not (
 29                                              .domain.domain == 'purl.org'
 30                                              and strings.starts_with(.path,
 31                                                                      '/dc/'
 32                                              )
 33                                            )
 34                                            // Exclude Dublin Core XML schemas
 35                                            and not (
 36                                              .domain.domain == "dublincore.org"
 37                                              and strings.starts_with(.path,
 38                                                                      '/schemas/xmls/'
 39                                              )
 40                                            )
 41                                     )
 42                                 ),
 43                                 // Count URLs in each exploded file component
 44                                 length(.)
 45                             )
 46                 ) <= 3 // Only process attachments with 3 or fewer non-schema URLs
 47          ),
 48          // For the filtered Office documents, check for malicious URLs
 49          any(file.explode(.),
 50              any(
 51                  // Apply the same URL filtering to remove XML namespace noise
 52                  filter(.scan.url.urls,
 53                         .domain.domain not in (
 54                           'schemas.openxmlformats.org',
 55                           'schemas.microsoft.com',
 56                           'www.w3.org'
 57                         )
 58                         and not .domain.domain in (
 59                           'microsoft.com',
 60                           'wps.cn' // WPS is a china based alt to MS Office and used in namespaces of the documents created by that product
 61                         )
 62                         and not (
 63                           .domain.domain == 'purl.org'
 64                           and strings.starts_with(.path, '/dc/')
 65                         )
 66                         and not (
 67                           .domain.domain == "dublincore.org"
 68                           and strings.starts_with(.path, '/schemas/xmls/')
 69                         )
 70                  ),
 71                  // Run link analysis on the filtered URLs to detect phishing
 72                  ml.link_analysis(.).credphish.disposition == "phishing"
 73                  // confidence is only returned when brands, if it's not there, consider this true
 74                  // this ensures if there is a brand, the confidence is high
 75                  // and allows matching when there is no confidence
 76                  and coalesce(ml.link_analysis(.).credphish.confidence == "high", true)
 77                  and not (
 78                    ml.link_analysis(.).credphish.brand.name is not null
 79                    and ml.link_analysis(.).credphish.brand.name == "GoDaddy"
 80                    and strings.icontains(ml.link_analysis(.).final_dom.inner_text,
 81                                          'is parked free, courtesy of GoDaddy.com.'
 82                    )
 83                    and strings.icontains(ml.link_analysis(.).final_dom.inner_text,
 84                                          'Get This Domain'
 85                    )
 86                  )
 87              )
 88          )
 89  )  
 90attack_types:
 91  - "Credential Phishing"
 92tactics_and_techniques:
 93  - "Evasion"
 94  - "Social engineering"
 95detection_methods:
 96  - "File analysis"
 97  - "URL analysis"
 98  - "Archive analysis"
 99  - "Content analysis"
100id: "b2cae98d-3a93-5f49-bc81-1b20296cc332"