SEO Content Analysis

Analyze content for keyword placement, heading structure, and on-page SEO factors.
from seo_rat.article import Article
# | export
from seo_rat.content_parser import extract_headers, remove_metadata, get_page_content
from seo_rat.gsc_client import get_date_range

source

calculate_keyword_density


def calculate_keyword_density(
    content:str, # Page content
    keyword:str, # Keyword to search for
)->dict:

Calculate keyword density using word-level matching.


source

find_cannibalized


def find_cannibalized(
    session, # Active database session
    website_id:int, # Website ID
    site_url:str, # GSC property URL
    start:str, # Start date (YYYY-MM-DD)
    end:str, # End date (YYYY-MM-DD)
)->dict:

Find keyword cannibalization via exact focus keyword matches and GSC ranking data.

Future Update

  • Add DSPY to filter htre nosie from the cannibalization
    • Input: query string + website description/niche
    • Output: is_relevant: bool + reason: str

source

check_h1_count


def check_h1_count(
    headers:list, # Headers from [`extract_headers`](https://abdelkareemkobo.github.io/seo_rat/content_parser.html#extract_headers)
    title:str | None=None, # Frontmatter title (for Quarto)
    title_is_h1:bool=False, # Whether title counts as H1
)->dict:

Check H1 count — for Quarto, the title frontmatter field acts as H1.


source

keyword_in_first_section


def keyword_in_first_section(
    content:str, # Page content
    keyword:str, # Keyword to search for
    percent:int=10, # Percentage of content to check
)->bool:

Check if keyword appears in the first X% of content.


source

check_paragraph_length


def check_paragraph_length(
    content:str, # Page content
)->dict:

Check average number of sentences per paragraph.


source

keyword_in_metadata


def keyword_in_metadata(
    metadata:dict, # Parsed frontmatter dict
    keyword:str, # Keyword to search for
    desc_field:str='description', # Frontmatter description field
)->dict:

Check if keyword appears in title and description metadata fields.


source

keyword_in_alt_texts


def keyword_in_alt_texts(
    images:list, # Images from [`extract_images`](https://abdelkareemkobo.github.io/seo_rat/content_parser.html#extract_images)
    keyword:str, # Keyword to search for
)->bool:

Check if keyword appears in any image alt text.


source

analyze_header_distribution


def analyze_header_distribution(
    headers:list, # Headers from [`extract_headers`](https://abdelkareemkobo.github.io/seo_rat/content_parser.html#extract_headers)
)->dict:

Analyze header type distribution as counts and percentages.


source

check_keyword_placement


def check_keyword_placement(
    keyword:str | None, # Focus keyword
    metadata:dict, # Parsed frontmatter dict
    headers:list, # Headers from [`extract_headers`](https://abdelkareemkobo.github.io/seo_rat/content_parser.html#extract_headers)
    content:str, # Page content
    url:str, # Page URL
    desc_field:str='description', # Frontmatter description field
    title_is_h1:bool=False, # Whether title counts as H1
)->dict:

Check where the focus keyword appears across page elements.


source

content_freshness


def content_freshness(
    last_updated:str, # Last updated date (YYYY-MM-DD)
    days:int=180, # Max days to consider content fresh
)->dict:

Check content freshness based on last updated date.