GSC Storage

Query, filter, and persist Google Search Console analytics data in SQLite.

source

filter_exclude_pages


def filter_exclude_pages(
    query, # SQLModel query
    exclude_pages:list, # Page path substrings to exclude
):

Exclude rows where page contains any of the given substrings.


source

filter_dimension


def filter_dimension(
    query, # SQLModel query
    dimension:str, # GSCAnalytics field name
    value:str, # Value to match
):

Filter query by a specific dimension value.


source

filter_dates


def filter_dates(
    query, # SQLModel query
    start:str, # Start date (YYYY-MM-DD)
    end:str, # End date (YYYY-MM-DD)
):

Filter query by date range.


source

filter_site


def filter_site(
    query, # SQLModel query
    site_url:str, # GSC property URL
):

Filter query by site URL.


source

AnalyticsSummary


def AnalyticsSummary(
    __pydantic_self__, data:Any
)->None:

!!! abstract “Usage Documentation” Models

A base class for creating Pydantic models.

Attributes: class_vars: The names of the class variables defined on the model. private_attributes: Metadata about the private attributes of the model. signature: The synthesized __init__ [Signature][inspect.Signature] of the model.

__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The core schema of the model.
__pydantic_custom_init__: Whether the model has a custom `__init__` function.
__pydantic_decorators__: Metadata containing the decorators defined on the model.
    This replaces `Model.__validators__` and `Model.__root_validators__` from Pydantic V1.
__pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to
    __args__, __origin__, __parameters__ in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a [`RootModel`][pydantic.root_model.RootModel].
__pydantic_serializer__: The `pydantic-core` `SchemaSerializer` used to dump instances of the model.
__pydantic_validator__: The `pydantic-core` `SchemaValidator` used to validate instances of the model.

__pydantic_fields__: A dictionary of field names and their corresponding [`FieldInfo`][pydantic.fields.FieldInfo] objects.
__pydantic_computed_fields__: A dictionary of computed field names and their corresponding [`ComputedFieldInfo`][pydantic.fields.ComputedFieldInfo] objects.

__pydantic_extra__: A dictionary containing extra values, if [`extra`][pydantic.config.ConfigDict.extra]
    is set to `'allow'`.
__pydantic_fields_set__: The names of fields explicitly set during instantiation.
__pydantic_private__: Values of private attributes set on the model instance.

source

normalize_url


def normalize_url(
    url:str
)->str:

Normalize URL by decoding percent-encoding and standardizing separators


source

parse_gsc_row


def parse_gsc_row(
    row:dict, # Raw GSC API row
    site_url:str, # GSC property URL
    date:str, # Date of the row (YYYY-MM-DD)
)->GSCAnalytics:

Parse a raw GSC API row into a GSCAnalytics instance.


source

store_gsc_data


def store_gsc_data(
    session:Session, # Active database session
    site_url:str, # GSC property URL
    date:str, # Date of the data (YYYY-MM-DD)
    rows:list, # Raw GSC API rows
)->None:

Store GSC rows with upsert (update on conflict).


source

get_top_queries


def get_top_queries(
    session:Session, # Active database session
    site_url:str, # GSC property URL
    start_date:str, # Start date (YYYY-MM-DD)
    end_date:str, # End date (YYYY-MM-DD)
    country:str | None=None, # Filter by country code
    page_path:str | None=None, # Filter by page path substring
    limit:int=10, # Max rows to return
    sort_by:str='clicks', # Sort by 'clicks' or 'impressions'
)->list:

Get top performing queries, optionally filtered by country and page.


source

get_top_pages


def get_top_pages(
    session:Session, site_url:str, start_date:str, end_date:str, country:str | None=None, limit:int=20,
    sort_by:str='clicks'
)->list:

Get top performing pages by clicks or impressions.


source

get_wins


def get_wins(
    session:Session, site_url:str, start_date:str, end_date:str, min_impressions:int=100, min_position:float=10.0,
    max_position:float=50.0, country:str | None=None, page_url:str | None=None, limit:int=20
)->list:

Get high-impression, low-ranking keyword opportunities.


source

get_top_queries_excluding_pages


def get_top_queries_excluding_pages(
    session:Session, # Active database session
    site_url:str, # GSC property URL
    start_date:str, # Start date (YYYY-MM-DD)
    end_date:str, # End date (YYYY-MM-DD)
    exclude_pages:list, # Page substrings to exclude
    country:str | None=None, # Filter by country code
    limit:int=10, # Max rows to return
)->list:

Get top queries excluding specific pages.


source

get_page_analytics


def get_page_analytics(
    session:Session, # Active database session
    site_url:str, # GSC property URL
    page_path:str, # Partial page path to match
    start_date:str, # Start date (YYYY-MM-DD)
    end_date:str, # End date (YYYY-MM-DD)
)->dict:

Get aggregated analytics and top queries for a specific page.


source

get_analytics_by_date_range


def get_analytics_by_date_range(
    session:Session, # Active database session
    site_url:str, # GSC property URL
    start_date:str, # Start date (YYYY-MM-DD)
    end_date:str, # End date (YYYY-MM-DD)
)->list:

Get all raw GSC analytics rows for a date range.


source

get_analytics_by


def get_analytics_by(
    session:Session, # Active database session
    site_url:str, # GSC property URL
    start_date:str, # Start date (YYYY-MM-DD)
    end_date:str, # End date (YYYY-MM-DD)
    dimension:str, # GSCAnalytics field to filter by
    value:str, # Value to match
)->list:

Get query-level analytics filtered by a specific dimension value.


source

store_single_date


def store_single_date(
    session:Session, # Active database session
    auth:GSCAuth, # Authenticated GSCAuth instance
    site_url:str, # GSC property URL
    date:str, # Date to fetch and store (YYYY-MM-DD)
)->int:

Fetch and store GSC data for a single date. Returns number of records stored.

# Check what's actually stored
stored = session.exec(select(GSCAnalytics).limit(1)).first()
print(f"Query: {stored.query}")
print(f"Clicks: {stored.clicks}")
print(f"Date: {stored.date}")
test_eq(stored.site_url, "sc-domain:kareemai.com")
Query: أنواع السباكة
Clicks: 0
Date: 2024-11-11
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
Cell In[31], line 8
      6 print(f"Clicks: {stored.clicks}")
      7 print(f"Date: {stored.date}")
----> 8 test_eq(stored.site_url, "sc-domain:kareemai.com")

File ~/Desktop/seo_rat/.venv/lib/python3.13/site-packages/fastcore/test.py:40, in test_eq(a, b)
     38 def test_eq(a,b):
     39     "`test` that `a==b`"
---> 40     test(a,b,equals, cname='==')

File ~/Desktop/seo_rat/.venv/lib/python3.13/site-packages/fastcore/test.py:30, in test(a, b, cmp, cname)
     28 "`assert` that `cmp(a,b)`; display inputs and `cname or cmp.__name__` if it fails"
     29 if cname is None: cname=cmp.__name__
---> 30 assert cmp(a,b),f"{cname}:\n{a}\n{b}"

AssertionError: ==:
sc-domain:awazly.com
sc-domain:kareemai.com

source

store_date_range


def store_date_range(
    session:Session, auth:GSCAuth, site_url:str, start_date:str, end_date:str
)->dict:

Fetch and store GSC data for all dates in range.


source

get_missing_dates


def get_missing_dates(
    session:Session, # Active database session
    site_url:str, # GSC property URL
    start_date:str, # Start date (YYYY-MM-DD)
    end_date:str, # End date (YYYY-MM-DD)
)->list:

Return dates in range that have no stored GSC data.


source

iter_dates


def iter_dates(
    start_date:str, # Start date (YYYY-MM-DD)
    end_date:str, # End date (YYYY-MM-DD)
)->list:

Generate all dates between start and end inclusive.


source

sync_missing_dates


def sync_missing_dates(
    session:Session, auth:GSCAuth, site_url:str, start_date:str, end_date:str
)->dict:

Fetch and store GSC data for missing dates only.


source

daily_sync


def daily_sync(
    session:Session, # Active database session
    auth:GSCAuth, # Authenticated GSCAuth instance
    sites:list, # List of GSC property URLs to sync
)->dict:

Sync missing GSC data for all sites up to today.


source

compare_date_ranges


def compare_date_ranges(
    session:Session, # Active database session
    site_url:str, # GSC property URL
    start1:str, # First period start date (YYYY-MM-DD)
    end1:str, # First period end date (YYYY-MM-DD)
    start2:str, # Second period start date (YYYY-MM-DD)
    end2:str, # Second period end date (YYYY-MM-DD)
    page_url:str | None=None, # Optional specific page to compare
)->dict:

Compare GSC metrics between two date ranges, optionally for a specific page.


source

get_country_breakdown


def get_country_breakdown(
    session:Session, # Active database session
    site_url:str, # GSC property URL
    start_date:str, # Start date (YYYY-MM-DD)
    end_date:str, # End date (YYYY-MM-DD)
    page_url:str | None=None, # Optional specific page to filter
    limit:int=20, # Max number of countries to return
)->list:

Get traffic metrics grouped by country, optionally for a specific page.