Refactor SEO automation into unified CLI application
Major refactoring to create a clean, integrated CLI application: ### New Features: - Unified CLI executable (./seo) with simple command structure - All commands accept optional CSV file arguments - Auto-detection of latest files when no arguments provided - Simplified output directory structure (output/ instead of output/reports/) - Cleaner export filename format (all_posts_YYYY-MM-DD.csv) ### Commands: - export: Export all posts from WordPress sites - analyze [csv]: Analyze posts with AI (optional CSV input) - recategorize [csv]: Recategorize posts with AI - seo_check: Check SEO quality - categories: Manage categories across sites - approve [files]: Review and approve recommendations - full_pipeline: Run complete workflow - analytics, gaps, opportunities, report, status ### Changes: - Moved all scripts to scripts/ directory - Created config.yaml for configuration - Updated all scripts to use output/ directory - Deprecated old seo-cli.py in favor of new ./seo - Added AGENTS.md and CHANGELOG.md documentation - Consolidated README.md with updated usage ### Technical: - Added PyYAML dependency - Removed hardcoded configuration values - All scripts now properly integrated - Better error handling and user feedback Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
778
scripts/multi_site_seo_analyzer.py
Executable file
778
scripts/multi_site_seo_analyzer.py
Executable file
@@ -0,0 +1,778 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Multi-Site WordPress SEO Analyzer
|
||||
Fetches posts from 3 WordPress sites, analyzes titles and meta descriptions,
|
||||
and provides AI-powered optimization recommendations.
|
||||
"""
|
||||
|
||||
import os
|
||||
import csv
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
import time
|
||||
from config import Config
|
||||
import sys
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class MultiSiteSEOAnalyzer:
|
||||
"""Analyzes titles and meta descriptions across multiple WordPress sites."""
|
||||
|
||||
def __init__(self, progressive_csv: bool = True):
|
||||
"""
|
||||
Initialize the analyzer.
|
||||
|
||||
Args:
|
||||
progressive_csv: If True, write CSV progressively as posts are analyzed
|
||||
"""
|
||||
self.sites_config = Config.WORDPRESS_SITES
|
||||
self.posts_data = {}
|
||||
self.analysis_results = []
|
||||
self.api_calls = 0
|
||||
self.ai_cost = 0.0
|
||||
self.openrouter_api_key = Config.OPENROUTER_API_KEY
|
||||
self.progressive_csv = progressive_csv
|
||||
self.csv_file = None
|
||||
self.csv_writer = None
|
||||
|
||||
def fetch_posts_from_site(self, site_name: str, site_config: Dict,
|
||||
include_drafts: bool = False) -> List[Dict]:
|
||||
"""
|
||||
Fetch posts from a WordPress site using REST API.
|
||||
|
||||
Args:
|
||||
site_name: Name of the site (domain)
|
||||
site_config: Configuration dict with url, username, password
|
||||
include_drafts: If True, fetch both published and draft posts
|
||||
|
||||
Returns:
|
||||
List of posts with metadata
|
||||
"""
|
||||
logger.info(f"Fetching posts from {site_name}...")
|
||||
|
||||
posts = []
|
||||
base_url = site_config['url'].rstrip('/')
|
||||
api_url = f"{base_url}/wp-json/wp/v2/posts"
|
||||
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
|
||||
|
||||
# Determine which statuses to fetch
|
||||
statuses = ['publish', 'draft'] if include_drafts else ['publish']
|
||||
status_str = ', '.join(statuses).replace('publish', 'published').replace('draft', 'drafts')
|
||||
|
||||
# Fetch each status separately to avoid 400 Bad Request on pagination
|
||||
for status in statuses:
|
||||
page = 1
|
||||
status_count = 0
|
||||
use_fields = True # Try with _fields first, fallback without if 400
|
||||
|
||||
while True:
|
||||
params = {
|
||||
'page': page,
|
||||
'per_page': 100,
|
||||
'status': status, # Single status per request
|
||||
}
|
||||
|
||||
# Add _fields only if not getting 400 errors
|
||||
if use_fields:
|
||||
params['_fields'] = 'id,title,slug,link,meta,status'
|
||||
|
||||
try:
|
||||
response = requests.get(api_url, params=params, auth=auth, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
page_posts = response.json()
|
||||
if not page_posts:
|
||||
break
|
||||
|
||||
posts.extend(page_posts)
|
||||
status_count += len(page_posts)
|
||||
logger.info(f" ✓ Fetched {len(page_posts)} {status} posts (page {page})")
|
||||
|
||||
page += 1
|
||||
time.sleep(Config.API_DELAY_SECONDS)
|
||||
|
||||
except requests.exceptions.HTTPError as e:
|
||||
# Handle 400 errors gracefully
|
||||
if response.status_code == 400 and use_fields and page == 1:
|
||||
# Retry page 1 without _fields parameter
|
||||
logger.info(f" ⓘ Retrying without _fields parameter...")
|
||||
use_fields = False
|
||||
continue
|
||||
elif response.status_code == 400:
|
||||
# Pagination or API limit reached
|
||||
logger.info(f" ⓘ API limit reached (fetched {status_count} {status} posts)")
|
||||
break
|
||||
else:
|
||||
logger.error(f"Error fetching page {page} from {site_name}: {e}")
|
||||
break
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"Error fetching from {site_name}: {e}")
|
||||
break
|
||||
|
||||
if status_count > 0:
|
||||
logger.info(f" ✓ Total {status} posts: {status_count}")
|
||||
|
||||
logger.info(f"✓ Total posts from {site_name} ({status_str}): {len(posts)}")
|
||||
return posts
|
||||
|
||||
def extract_seo_data(self, post: Dict, site_name: str) -> Dict:
|
||||
"""
|
||||
Extract SEO-relevant data from a post.
|
||||
|
||||
Args:
|
||||
post: Post data from WordPress API
|
||||
site_name: Name of the site
|
||||
|
||||
Returns:
|
||||
Dict with extracted SEO data
|
||||
"""
|
||||
title = post.get('title', {})
|
||||
if isinstance(title, dict):
|
||||
title = title.get('rendered', '')
|
||||
|
||||
# Get meta description from various SEO plugins
|
||||
# Check multiple possible locations where different plugins store meta descriptions
|
||||
meta_desc = ''
|
||||
if isinstance(post.get('meta'), dict):
|
||||
meta_dict = post['meta']
|
||||
|
||||
# Try various SEO plugin fields (order matters - most specific first)
|
||||
meta_desc = (
|
||||
meta_dict.get('_yoast_wpseo_metadesc', '') or # Yoast SEO
|
||||
meta_dict.get('_rank_math_description', '') or # Rank Math
|
||||
meta_dict.get('_aioseo_description', '') or # All in One SEO
|
||||
meta_dict.get('description', '') or # Standard field
|
||||
meta_dict.get('_meta_description', '') or # Alternative
|
||||
meta_dict.get('metadesc', '') # Alternative
|
||||
)
|
||||
|
||||
# Get post status
|
||||
status = post.get('status', 'publish')
|
||||
|
||||
return {
|
||||
'site': site_name,
|
||||
'post_id': post['id'],
|
||||
'title': title.strip(),
|
||||
'slug': post.get('slug', ''),
|
||||
'url': post.get('link', ''),
|
||||
'meta_description': meta_desc.strip(),
|
||||
'status': status,
|
||||
}
|
||||
|
||||
def analyze_title(self, title: str) -> Dict:
|
||||
"""
|
||||
Analyze title for SEO best practices.
|
||||
|
||||
Args:
|
||||
title: Post title
|
||||
|
||||
Returns:
|
||||
Dict with analysis results
|
||||
"""
|
||||
length = len(title)
|
||||
|
||||
# SEO best practices
|
||||
issues = []
|
||||
recommendations = []
|
||||
score = 100
|
||||
|
||||
if length < 30:
|
||||
issues.append(f"Too short ({length})")
|
||||
recommendations.append("Expand title to 50-60 characters")
|
||||
score -= 20
|
||||
elif length < 50:
|
||||
recommendations.append("Could be slightly longer (target 50-60)")
|
||||
score -= 5
|
||||
elif length > 70:
|
||||
issues.append(f"Too long ({length})")
|
||||
recommendations.append("Consider shortening to 50-70 characters")
|
||||
score -= 15
|
||||
|
||||
# Check for power words
|
||||
power_words = ['best', 'ultimate', 'complete', 'essential', 'proven',
|
||||
'effective', 'powerful', 'expert', 'guide', 'tutorial',
|
||||
'how to', 'step by step', 'top 10', 'ultimate guide']
|
||||
|
||||
has_power_word = any(word.lower() in title.lower() for word in power_words)
|
||||
if not has_power_word:
|
||||
recommendations.append("Consider adding a power word (best, complete, guide, etc.)")
|
||||
score -= 10
|
||||
|
||||
# Check for numbers
|
||||
if not any(c.isdigit() for c in title):
|
||||
recommendations.append("Consider adding a number (e.g., 'Top 5', '2025')")
|
||||
score -= 5
|
||||
|
||||
# Check for emojis or special chars that might break rendering
|
||||
special_chars = set(title) - set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 -:')
|
||||
if special_chars:
|
||||
recommendations.append(f"Check special characters: {special_chars}")
|
||||
score -= 5
|
||||
|
||||
return {
|
||||
'length': length,
|
||||
'issues': issues,
|
||||
'recommendations': recommendations,
|
||||
'score': max(0, score),
|
||||
'has_power_word': has_power_word,
|
||||
'has_number': any(c.isdigit() for c in title)
|
||||
}
|
||||
|
||||
def analyze_meta_description(self, meta_desc: str) -> Dict:
|
||||
"""
|
||||
Analyze meta description for SEO best practices.
|
||||
|
||||
Args:
|
||||
meta_desc: Meta description text
|
||||
|
||||
Returns:
|
||||
Dict with analysis results
|
||||
"""
|
||||
length = len(meta_desc)
|
||||
|
||||
issues = []
|
||||
recommendations = []
|
||||
score = 100
|
||||
|
||||
if not meta_desc or length == 0:
|
||||
issues.append("Missing meta description")
|
||||
recommendations.append("Write a 120-160 character meta description")
|
||||
score = 0
|
||||
else:
|
||||
if length < 100:
|
||||
issues.append(f"Too short ({length})")
|
||||
recommendations.append("Expand to 120-160 characters")
|
||||
score -= 20
|
||||
elif length < 120:
|
||||
recommendations.append("Could be slightly longer (target 120-160)")
|
||||
score -= 5
|
||||
elif length > 160:
|
||||
issues.append(f"Too long ({length})")
|
||||
recommendations.append("Shorten to 120-160 characters")
|
||||
score -= 15
|
||||
|
||||
# Check for CTA
|
||||
cta_words = ['learn', 'discover', 'read', 'explore', 'find', 'get',
|
||||
'download', 'check', 'see', 'watch', 'try', 'start']
|
||||
has_cta = any(word.lower() in meta_desc.lower() for word in cta_words)
|
||||
if not has_cta:
|
||||
recommendations.append("Consider adding a call-to-action")
|
||||
score -= 5
|
||||
|
||||
return {
|
||||
'length': length,
|
||||
'is_missing': not meta_desc,
|
||||
'issues': issues,
|
||||
'recommendations': recommendations,
|
||||
'score': max(0, score),
|
||||
}
|
||||
|
||||
def calculate_overall_score(self, title_analysis: Dict, meta_analysis: Dict) -> float:
|
||||
"""Calculate overall SEO score (0-100)."""
|
||||
title_weight = 0.4
|
||||
meta_weight = 0.6
|
||||
return (title_analysis['score'] * title_weight) + (meta_analysis['score'] * meta_weight)
|
||||
|
||||
def generate_ai_recommendations(self, post_data: Dict, title_analysis: Dict,
|
||||
meta_analysis: Dict) -> Optional[str]:
|
||||
"""
|
||||
Use Claude AI to generate specific optimization recommendations.
|
||||
|
||||
Args:
|
||||
post_data: Post data
|
||||
title_analysis: Title analysis results
|
||||
meta_analysis: Meta description analysis
|
||||
|
||||
Returns:
|
||||
AI-generated recommendations or None if AI disabled
|
||||
"""
|
||||
if not self.openrouter_api_key:
|
||||
return None
|
||||
|
||||
prompt = f"""Analyze this blog post and provide specific SEO optimization recommendations:
|
||||
|
||||
Post Title: "{post_data['title']}"
|
||||
Current Meta Description: "{post_data['meta_description'] or 'MISSING'}"
|
||||
URL: {post_data['url']}
|
||||
|
||||
Title Analysis:
|
||||
- Length: {title_analysis['length']} characters (target: 50-70)
|
||||
- Issues: {', '.join(title_analysis['issues']) or 'None'}
|
||||
|
||||
Meta Description Analysis:
|
||||
- Length: {meta_analysis['length']} characters (target: 120-160)
|
||||
- Issues: {', '.join(meta_analysis['issues']) or 'None'}
|
||||
|
||||
Provide 2-3 specific, actionable recommendations to improve SEO. Focus on:
|
||||
1. If title needs improvement: suggest a better title
|
||||
2. If meta description is missing: write one
|
||||
3. If both are weak: provide both improved versions
|
||||
|
||||
Format as:
|
||||
- Recommendation 1: [specific action]
|
||||
- Recommendation 2: [specific action]
|
||||
etc.
|
||||
|
||||
Be concise and specific."""
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
"https://openrouter.ai/api/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {self.openrouter_api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": "anthropic/claude-3.5-sonnet",
|
||||
"messages": [
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
"temperature": 0.7,
|
||||
},
|
||||
timeout=30
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
result = response.json()
|
||||
self.api_calls += 1
|
||||
|
||||
# Track cost (Claude 3.5 Sonnet: $3/$15 per 1M tokens)
|
||||
usage = result.get('usage', {})
|
||||
input_tokens = usage.get('prompt_tokens', 0)
|
||||
output_tokens = usage.get('completion_tokens', 0)
|
||||
self.ai_cost += (input_tokens * 3 + output_tokens * 15) / 1_000_000
|
||||
|
||||
recommendations = result['choices'][0]['message']['content'].strip()
|
||||
return recommendations
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"AI recommendation failed: {e}")
|
||||
return None
|
||||
|
||||
def _setup_progressive_csv(self) -> Optional[Tuple]:
|
||||
"""
|
||||
Setup CSV file for progressive writing.
|
||||
|
||||
Returns:
|
||||
Tuple of (file_handle, writer) or None if progressive_csv is False
|
||||
"""
|
||||
if not self.progressive_csv:
|
||||
return None
|
||||
|
||||
output_dir = Path(__file__).parent.parent / 'output'
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
csv_path = output_dir / f'seo_analysis_{timestamp}.csv'
|
||||
|
||||
fieldnames = [
|
||||
'site', 'post_id', 'status', 'title', 'slug', 'url',
|
||||
'meta_description', 'title_score', 'title_issues',
|
||||
'title_recommendations', 'meta_score', 'meta_issues',
|
||||
'meta_recommendations', 'overall_score', 'ai_recommendations',
|
||||
]
|
||||
|
||||
csv_file = open(csv_path, 'w', newline='', encoding='utf-8')
|
||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
csv_file.flush()
|
||||
|
||||
logger.info(f"✓ CSV file created: {csv_path}")
|
||||
self.csv_file = csv_file
|
||||
self.csv_writer = writer
|
||||
|
||||
return csv_path
|
||||
|
||||
def _write_result_to_csv(self, result: Dict) -> None:
|
||||
"""Write a single result row to CSV file."""
|
||||
if self.progressive_csv and self.csv_writer:
|
||||
self.csv_writer.writerow(result)
|
||||
self.csv_file.flush()
|
||||
|
||||
def analyze_all_sites(self, use_ai: bool = True, top_n: int = 10,
|
||||
include_drafts: bool = False):
|
||||
"""
|
||||
Analyze all configured sites.
|
||||
|
||||
Args:
|
||||
use_ai: Whether to use AI for recommendations
|
||||
top_n: Number of top priority posts to get AI recommendations for
|
||||
include_drafts: If True, include draft posts in analysis
|
||||
"""
|
||||
logger.info(f"Starting analysis of {len(self.sites_config)} sites...")
|
||||
if include_drafts:
|
||||
logger.info("(Including draft posts)")
|
||||
logger.info("")
|
||||
|
||||
all_posts = []
|
||||
|
||||
# Fetch posts from all sites
|
||||
for site_name, config in self.sites_config.items():
|
||||
posts = self.fetch_posts_from_site(site_name, config, include_drafts=include_drafts)
|
||||
if posts:
|
||||
self.posts_data[site_name] = posts
|
||||
all_posts.extend(posts)
|
||||
|
||||
if not all_posts:
|
||||
logger.error("No posts found on any site")
|
||||
return
|
||||
|
||||
logger.info(f"\nAnalyzing {len(all_posts)} posts...\n")
|
||||
|
||||
# Setup progressive CSV if enabled
|
||||
csv_path = self._setup_progressive_csv()
|
||||
|
||||
# Analyze each post
|
||||
for site_name, posts in self.posts_data.items():
|
||||
logger.info(f"Analyzing {len(posts)} posts from {site_name}...")
|
||||
|
||||
for idx, post in enumerate(posts, 1):
|
||||
seo_data = self.extract_seo_data(post, site_name)
|
||||
title_analysis = self.analyze_title(seo_data['title'])
|
||||
meta_analysis = self.analyze_meta_description(seo_data['meta_description'])
|
||||
overall_score = self.calculate_overall_score(title_analysis, meta_analysis)
|
||||
|
||||
result = {
|
||||
**seo_data,
|
||||
'title_score': title_analysis['score'],
|
||||
'title_issues': '|'.join(title_analysis['issues']) or 'None',
|
||||
'title_recommendations': '|'.join(title_analysis['recommendations']),
|
||||
'meta_score': meta_analysis['score'],
|
||||
'meta_issues': '|'.join(meta_analysis['issues']) or 'None',
|
||||
'meta_recommendations': '|'.join(meta_analysis['recommendations']),
|
||||
'overall_score': overall_score,
|
||||
'ai_recommendations': '',
|
||||
}
|
||||
|
||||
self.analysis_results.append(result)
|
||||
|
||||
# Write to CSV progressively (before AI recommendations)
|
||||
if self.progressive_csv:
|
||||
self._write_result_to_csv(result)
|
||||
logger.debug(f" [{idx}/{len(posts)}] Written: {seo_data['title'][:40]}")
|
||||
|
||||
# Sort by priority (lowest scores first) and get AI recommendations for top posts
|
||||
if use_ai:
|
||||
self.analysis_results.sort(key=lambda x: x['overall_score'])
|
||||
logger.info(f"\nGenerating AI recommendations for top {top_n} posts...\n")
|
||||
|
||||
for idx, result in enumerate(self.analysis_results[:top_n], 1):
|
||||
logger.info(f" [{idx}/{top_n}] {result['title'][:50]}...")
|
||||
|
||||
ai_recs = self.generate_ai_recommendations(
|
||||
result,
|
||||
{
|
||||
'score': result['title_score'],
|
||||
'issues': result['title_issues'].split('|'),
|
||||
'length': len(result['title'])
|
||||
},
|
||||
{
|
||||
'score': result['meta_score'],
|
||||
'issues': result['meta_issues'].split('|'),
|
||||
'length': len(result['meta_description'])
|
||||
}
|
||||
)
|
||||
|
||||
result['ai_recommendations'] = ai_recs or ''
|
||||
|
||||
# Update CSV with AI recommendations if using progressive CSV
|
||||
if self.progressive_csv and self.csv_writer:
|
||||
# Find and update the row in the CSV by re-writing it
|
||||
# This is a limitation of CSV - we'll update in final export instead
|
||||
pass
|
||||
|
||||
time.sleep(0.5) # Rate limiting
|
||||
|
||||
# Sort by overall score for final export
|
||||
self.analysis_results.sort(key=lambda x: x['overall_score'])
|
||||
|
||||
# Close progressive CSV if open (will be re-written with final data including AI recs)
|
||||
if self.progressive_csv and self.csv_file:
|
||||
self.csv_file.close()
|
||||
self.csv_file = None
|
||||
self.csv_writer = None
|
||||
|
||||
def export_results(self, output_file: Optional[str] = None):
|
||||
"""
|
||||
Export analysis results to CSV.
|
||||
|
||||
Args:
|
||||
output_file: Output file path (optional)
|
||||
"""
|
||||
if not output_file:
|
||||
output_dir = Path(__file__).parent.parent / 'output'
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if self.progressive_csv:
|
||||
# Use same timestamp as progressive file
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
# Find the most recent seo_analysis file
|
||||
files = sorted(output_dir.glob('seo_analysis_*.csv'))
|
||||
if files:
|
||||
output_file = files[-1] # Use the most recent one
|
||||
else:
|
||||
output_file = output_dir / f'seo_analysis_{timestamp}_final.csv'
|
||||
else:
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
output_file = output_dir / f'seo_analysis_{timestamp}.csv'
|
||||
|
||||
output_file = Path(output_file)
|
||||
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not self.analysis_results:
|
||||
logger.error("No results to export")
|
||||
return
|
||||
|
||||
fieldnames = [
|
||||
'site',
|
||||
'post_id',
|
||||
'status',
|
||||
'title',
|
||||
'slug',
|
||||
'url',
|
||||
'meta_description',
|
||||
'title_score',
|
||||
'title_issues',
|
||||
'title_recommendations',
|
||||
'meta_score',
|
||||
'meta_issues',
|
||||
'meta_recommendations',
|
||||
'overall_score',
|
||||
'ai_recommendations',
|
||||
]
|
||||
|
||||
with open(output_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
|
||||
for result in self.analysis_results:
|
||||
writer.writerow({field: result.get(field, '') for field in fieldnames})
|
||||
|
||||
if self.progressive_csv:
|
||||
logger.info(f"\n✓ Final results saved to: {output_file}")
|
||||
else:
|
||||
logger.info(f"\n✓ Results exported to: {output_file}")
|
||||
|
||||
# Also export as a summary report
|
||||
self.export_summary_report(output_file)
|
||||
|
||||
def export_summary_report(self, csv_file: Path):
|
||||
"""Export a markdown summary report."""
|
||||
report_file = csv_file.parent / f"{csv_file.stem}_summary.md"
|
||||
|
||||
# Group by site
|
||||
by_site = {}
|
||||
for result in self.analysis_results:
|
||||
site = result['site']
|
||||
if site not in by_site:
|
||||
by_site[site] = []
|
||||
by_site[site].append(result)
|
||||
|
||||
with open(report_file, 'w', encoding='utf-8') as f:
|
||||
f.write("# Multi-Site SEO Analysis Report\n\n")
|
||||
f.write(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
|
||||
|
||||
# Summary stats
|
||||
total_posts = len(self.analysis_results)
|
||||
published = sum(1 for r in self.analysis_results if r['status'] == 'publish')
|
||||
drafts = sum(1 for r in self.analysis_results if r['status'] == 'draft')
|
||||
avg_score = sum(r['overall_score'] for r in self.analysis_results) / total_posts if total_posts > 0 else 0
|
||||
|
||||
f.write("## Summary\n\n")
|
||||
f.write(f"- **Total Posts:** {total_posts}\n")
|
||||
if published > 0:
|
||||
f.write(f" - Published: {published}\n")
|
||||
if drafts > 0:
|
||||
f.write(f" - Drafts: {drafts}\n")
|
||||
f.write(f"- **Average SEO Score:** {avg_score:.1f}/100\n")
|
||||
f.write(f"- **API Calls Made:** {self.api_calls}\n")
|
||||
f.write(f"- **AI Cost:** ${self.ai_cost:.4f}\n")
|
||||
f.write(f"- **Sites Analyzed:** {len(by_site)}\n\n")
|
||||
|
||||
# Priority issues
|
||||
missing_meta = sum(1 for r in self.analysis_results if r['meta_score'] == 0)
|
||||
weak_titles = sum(1 for r in self.analysis_results if r['title_score'] < 50)
|
||||
weak_meta = sum(1 for r in self.analysis_results if r['meta_score'] < 50 and r['meta_score'] > 0)
|
||||
|
||||
f.write("## Priority Issues\n\n")
|
||||
f.write(f"- **Missing Meta Descriptions:** {missing_meta} posts\n")
|
||||
f.write(f"- **Weak Titles (Score < 50):** {weak_titles} posts\n")
|
||||
f.write(f"- **Weak Meta (Score < 50):** {weak_meta} posts\n\n")
|
||||
|
||||
# By site
|
||||
for site_name, posts in by_site.items():
|
||||
avg = sum(p['overall_score'] for p in posts) / len(posts)
|
||||
f.write(f"## {site_name}\n\n")
|
||||
f.write(f"- **Posts:** {len(posts)}\n")
|
||||
f.write(f"- **Avg Score:** {avg:.1f}/100\n")
|
||||
f.write(f"- **Missing Meta:** {sum(1 for p in posts if p['meta_score'] == 0)}\n\n")
|
||||
|
||||
# Top 5 to optimize
|
||||
f.write("### Top 5 Posts to Optimize\n\n")
|
||||
for idx, post in enumerate(posts[:5], 1):
|
||||
f.write(f"{idx}. **{post['title']}** (Score: {post['overall_score']:.0f})\n")
|
||||
f.write(f" - URL: {post['url']}\n")
|
||||
if post['meta_issues'] != 'None':
|
||||
f.write(f" - Meta Issues: {post['meta_issues']}\n")
|
||||
if post['ai_recommendations']:
|
||||
f.write(f" - Recommendations: {post['ai_recommendations'].split(chr(10))[0]}\n")
|
||||
f.write("\n")
|
||||
|
||||
f.write("\n## Legend\n\n")
|
||||
f.write("- **Title Score:** Evaluates length, power words, numbers, readability\n")
|
||||
f.write("- **Meta Score:** Evaluates presence, length, call-to-action\n")
|
||||
f.write("- **Overall Score:** 40% title + 60% meta description\n")
|
||||
f.write("- **Optimal Ranges:**\n")
|
||||
f.write(" - Title: 50-70 characters\n")
|
||||
f.write(" - Meta: 120-160 characters\n")
|
||||
|
||||
logger.info(f"✓ Summary report: {report_file}")
|
||||
|
||||
def run(self, use_ai: bool = True, top_n: int = 10, include_drafts: bool = False):
|
||||
"""Run complete analysis."""
|
||||
try:
|
||||
self.analyze_all_sites(use_ai=use_ai, top_n=top_n, include_drafts=include_drafts)
|
||||
self.export_results()
|
||||
|
||||
logger.info("\n" + "="*60)
|
||||
logger.info("ANALYSIS COMPLETE")
|
||||
logger.info("="*60)
|
||||
logger.info(f"Total posts analyzed: {len(self.analysis_results)}")
|
||||
published = sum(1 for r in self.analysis_results if r['status'] == 'publish')
|
||||
drafts = sum(1 for r in self.analysis_results if r['status'] == 'draft')
|
||||
if published > 0:
|
||||
logger.info(f" - Published: {published}")
|
||||
if drafts > 0:
|
||||
logger.info(f" - Drafts: {drafts}")
|
||||
logger.info(f"AI recommendations: {sum(1 for r in self.analysis_results if r['ai_recommendations'])}")
|
||||
logger.info(f"AI cost: ${self.ai_cost:.4f}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Analysis failed: {e}", exc_info=True)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def check_meta_fields(site_url: str, username: str, password: str) -> None:
|
||||
"""
|
||||
Diagnostic function to check what meta fields are available on a site.
|
||||
|
||||
Args:
|
||||
site_url: WordPress site URL
|
||||
username: WordPress username
|
||||
password: WordPress app password
|
||||
"""
|
||||
logger.info(f"\n{'='*60}")
|
||||
logger.info("META FIELD DIAGNOSTIC")
|
||||
logger.info(f"{'='*60}\n")
|
||||
logger.info(f"Site: {site_url}")
|
||||
logger.info("Checking available meta fields in first post...\n")
|
||||
|
||||
base_url = site_url.rstrip('/')
|
||||
api_url = f"{base_url}/wp-json/wp/v2/posts"
|
||||
auth = HTTPBasicAuth(username, password)
|
||||
|
||||
try:
|
||||
params = {
|
||||
'per_page': 1,
|
||||
'status': 'publish'
|
||||
}
|
||||
|
||||
response = requests.get(api_url, params=params, auth=auth, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
posts = response.json()
|
||||
if not posts:
|
||||
logger.error("No posts found")
|
||||
return
|
||||
|
||||
post = posts[0]
|
||||
logger.info(f"Post: {post.get('title', {}).get('rendered', 'N/A')}")
|
||||
logger.info(f"\nAvailable meta fields:")
|
||||
|
||||
if isinstance(post.get('meta'), dict):
|
||||
meta_dict = post['meta']
|
||||
if meta_dict:
|
||||
for key, value in sorted(meta_dict.items()):
|
||||
preview = str(value)[:60]
|
||||
logger.info(f" • {key}: {preview}")
|
||||
else:
|
||||
logger.info(" (No meta fields found)")
|
||||
else:
|
||||
logger.info(" (Meta is not a dictionary)")
|
||||
|
||||
logger.info(f"\nFull meta object:")
|
||||
logger.info(json.dumps(post.get('meta', {}), indent=2)[:500])
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Analyze SEO across multiple WordPress sites'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--no-ai',
|
||||
action='store_true',
|
||||
help='Skip AI recommendations to save cost'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--top-n',
|
||||
type=int,
|
||||
default=10,
|
||||
help='Number of top posts to get AI recommendations for'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--output',
|
||||
help='Output CSV file path'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--include-drafts',
|
||||
action='store_true',
|
||||
help='Include draft posts in analysis (published + drafts)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--no-progressive',
|
||||
action='store_true',
|
||||
help='Disable real-time CSV writing (write only at end)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--diagnose',
|
||||
help='Diagnose meta fields for a site (URL). Example: --diagnose https://www.mistergeek.net'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Diagnostic mode
|
||||
if args.diagnose:
|
||||
# Ask for username/password if not in env
|
||||
from getpass import getpass
|
||||
username = Config.WORDPRESS_USERNAME
|
||||
password = Config.WORDPRESS_APP_PASSWORD
|
||||
|
||||
if not username or not password:
|
||||
logger.error("WORDPRESS_USERNAME and WORDPRESS_APP_PASSWORD must be set in .env")
|
||||
sys.exit(1)
|
||||
|
||||
check_meta_fields(args.diagnose, username, password)
|
||||
sys.exit(0)
|
||||
|
||||
analyzer = MultiSiteSEOAnalyzer(progressive_csv=not args.no_progressive)
|
||||
analyzer.run(use_ai=not args.no_ai, top_n=args.top_n, include_drafts=args.include_drafts)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user