gary.info

here be dragons

Quick SEO Check

seo.sh
#!/usr/bin/env bash

# Check if a URL is provided as an argument
if [ -z "$1" ]; then
  echo "Usage: $0 <URL> [output_file]"
  exit 1
fi

url="$1"

# Check for output file argument (optional)
output_file="${2:-output.txt}"  # Use parameter expansion for default value


# Basic URL validation (using grep for wider compatibility)
if ! echo "$url" | grep -qE '^(http|https)://'; then
  echo "Error: Invalid URL format.  Must start with http:// or https://"
  exit 1
fi

# No need for artisan on macOS (unless specifically in a Laravel project)
# if [ -f "artisan" ]; then
#   php artisan view:clear
# fi

# Fetch the HTML content (macOS curl works well)
html_content=$(curl -s -L --max-time 10 --user-agent "WebsiteInfoScript/1.0" "$url")

# Check if curl was successful
if [ $? -ne 0 ]; then
  echo "Error: Failed to fetch HTML from $url"
  exit 1
fi

# --- Extraction using pup ---

# Extract the title (pup is cross-platform)
title=$(echo "$html_content" | pup 'title text{}')

# Extract the description (handle missing description)
description=$(echo "$html_content" | pup 'meta[name="description"] attr{content}' 2>/dev/null)
description="${description:-N/A (Could not extract)}" # Use parameter expansion

# Extract the canonical URL (handle missing URL)
canonical_url=$(echo "$html_content" | pup 'link[rel="canonical"] attr{href}' 2>/dev/null)
canonical_url="${canonical_url:-N/A (Could not extract)}"

# Extract meta tags (jq is cross-platform if installed)
meta_tags=$(echo "$html_content" | pup 'meta[name][content] json{}' | jq -r '.[] | "\(.name): \(.content)"' 2>/dev/null)
if [ -z "$meta_tags" ]; then
    meta_tags="N/A (Could not extract or no meta tags found)"
fi

# Extract h1-h5 tags
h1_tags=$(echo "$html_content" | pup 'h1 text{}')
h2_tags=$(echo "$html_content" | pup 'h2 text{}')
h3_tags=$(echo "$html_content" | pup 'h3 text{}')
h4_tags=$(echo "$html_content" | pup 'h4 text{}')
h5_tags=$(echo "$html_content" | pup 'h5 text{}')

# --- ANSI Colors and Formatting ---
bold="\033[1m"
reset="\033[0m"
cyan="\033[36m"
green="\033[32m"
yellow="\033[33m"
blue="\033[34m"
magenta="\033[35m"
red="\033[31m"

# --- Output ---

# Function for consistent output (to both stdout and file)
output() {
  echo -e "$@"  # Use echo -e here
  echo -e "$@" >> "$output_file" # and here
}

output "${cyan}${bold}Website Information for: ${url}${reset}"
output ""

printf "%s${green}${bold}%-15s${reset} %s\n" "" "Title:" "$title" | tee -a "$output_file"
printf "%s${green}${bold}%-15s${reset} %s\n" "" "Canonical URL:" "$canonical_url" | tee -a "$output_file"
output ""

output "${yellow}${bold}Meta Tags:${reset}"
if [ "$meta_tags" != "N/A (Could not extract or no meta tags found)" ]; then
    IFS=$'\n'
    for tag in $meta_tags; do
      printf "  %s${blue}${bold}%s ${reset}%s\n" "" "•" "$tag" | tee -a "$output_file"
    done
else
  output "  ${red}$meta_tags${reset}" #Output the N/A message
fi
output ""

output "${yellow}${bold}Heading Tags:${reset}"
output ""
print_headings() {
  local tag_name="$1"
  local tags="$2"
  local max_headings=3  # Define the maximum number of headings to display

  # Split into an array *first*, preserving newlines
  IFS=$'\n' read -rd '' -a tag_array <<< "$tags"

  # Check if the array has any non-empty elements *after* trimming and counting
  local trimmed_tag
  local non_empty_count=0
  local displayed_count=0 #Keep track of the displayed count
  for tag in "${tag_array[@]}"; do
    trimmed_tag=$(echo "$tag" | xargs) # Trim each tag individually
    if [[ -n "$trimmed_tag" ]]; then
      ((non_empty_count++))
    fi
  done

    if [[ $non_empty_count -gt 0 ]]; then
        output "${magenta}${bold}## $tag_name${reset}"
        for tag in "${tag_array[@]}"; do
           trimmed_tag=$(echo "$tag" | xargs)
           if [[ -n "$trimmed_tag" ]] && [[ $displayed_count -lt $max_headings ]]; then
               output "- $trimmed_tag"
               ((displayed_count++))
           fi
        done
        if [[ $non_empty_count -gt $max_headings ]]; then
            output "... (and $((non_empty_count - max_headings)) more)"
        fi
        output ""
    fi
}

# Call print_headings
print_headings "H1" "$h1_tags"
print_headings "H2" "$h2_tags"
print_headings "H3" "$h3_tags"
print_headings "H4" "$h4_tags"
print_headings "H5" "$h5_tags"

output ""
exit 0