Skip to content
har.fyi 🧪

GET_ORIGIN function

The httparchive.fn.GET_ORIGIN function returns the origin for a given URL.

Input

url

The URL of a web page.

Type: STRING

Output

The corresponding origin.

Type: STRING

Example usage

Basic usage

SELECT
  url,
  `httparchive.fn.GET_ORIGIN`(url) AS origin
FROM
  UNNEST([
    'https://www.example.com/product/123',
    'https://example.com/',
    'http://example.com:80/index.html'
  ]) AS url

Counting cross-origin resources per page

WITH cross_origin AS (
  SELECT
    COUNT(0) AS resources
  FROM
    `httparchive.all.requests`
  WHERE
    date = '2023-11-01' AND
    client = 'mobile' AND
    is_root_page AND
    `httparchive.fn.GET_ORIGIN`(url) != `httparchive.fn.GET_ORIGIN`(page)
  GROUP BY
    page
)


SELECT
  APPROX_QUANTILES(resources, 1000)[OFFSET(500)] AS median_xo_resources_per_page
FROM
  cross_origin

Routine

LOWER(CONCAT(
    -- only network protocols (excludes blob, filesystem, chrome, etc)
    REGEXP_EXTRACT(url, r'(?i)^(https?://)'),
    NET.HOST(url),
    -- be lazy and include @ and : for username/password without enforcing order.
    IFNULL(REGEXP_EXTRACT(url, r'(?i)^https?://[\w-.@:]+(:\d+)'), '')
  ))