Major refactoring to create a clean, integrated CLI application: ### New Features: - Unified CLI executable (./seo) with simple command structure - All commands accept optional CSV file arguments - Auto-detection of latest files when no arguments provided - Simplified output directory structure (output/ instead of output/reports/) - Cleaner export filename format (all_posts_YYYY-MM-DD.csv) ### Commands: - export: Export all posts from WordPress sites - analyze [csv]: Analyze posts with AI (optional CSV input) - recategorize [csv]: Recategorize posts with AI - seo_check: Check SEO quality - categories: Manage categories across sites - approve [files]: Review and approve recommendations - full_pipeline: Run complete workflow - analytics, gaps, opportunities, report, status ### Changes: - Moved all scripts to scripts/ directory - Created config.yaml for configuration - Updated all scripts to use output/ directory - Deprecated old seo-cli.py in favor of new ./seo - Added AGENTS.md and CHANGELOG.md documentation - Consolidated README.md with updated usage ### Technical: - Added PyYAML dependency - Removed hardcoded configuration values - All scripts now properly integrated - Better error handling and user feedback Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
779 lines
29 KiB
Python
Executable File
779 lines
29 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Multi-Site WordPress SEO Analyzer
|
|
Fetches posts from 3 WordPress sites, analyzes titles and meta descriptions,
|
|
and provides AI-powered optimization recommendations.
|
|
"""
|
|
|
|
import os
|
|
import csv
|
|
import json
|
|
import logging
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Tuple
|
|
import requests
|
|
from requests.auth import HTTPBasicAuth
|
|
import time
|
|
from config import Config
|
|
import sys
|
|
|
|
# Setup logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class MultiSiteSEOAnalyzer:
|
|
"""Analyzes titles and meta descriptions across multiple WordPress sites."""
|
|
|
|
def __init__(self, progressive_csv: bool = True):
|
|
"""
|
|
Initialize the analyzer.
|
|
|
|
Args:
|
|
progressive_csv: If True, write CSV progressively as posts are analyzed
|
|
"""
|
|
self.sites_config = Config.WORDPRESS_SITES
|
|
self.posts_data = {}
|
|
self.analysis_results = []
|
|
self.api_calls = 0
|
|
self.ai_cost = 0.0
|
|
self.openrouter_api_key = Config.OPENROUTER_API_KEY
|
|
self.progressive_csv = progressive_csv
|
|
self.csv_file = None
|
|
self.csv_writer = None
|
|
|
|
def fetch_posts_from_site(self, site_name: str, site_config: Dict,
|
|
include_drafts: bool = False) -> List[Dict]:
|
|
"""
|
|
Fetch posts from a WordPress site using REST API.
|
|
|
|
Args:
|
|
site_name: Name of the site (domain)
|
|
site_config: Configuration dict with url, username, password
|
|
include_drafts: If True, fetch both published and draft posts
|
|
|
|
Returns:
|
|
List of posts with metadata
|
|
"""
|
|
logger.info(f"Fetching posts from {site_name}...")
|
|
|
|
posts = []
|
|
base_url = site_config['url'].rstrip('/')
|
|
api_url = f"{base_url}/wp-json/wp/v2/posts"
|
|
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
|
|
|
|
# Determine which statuses to fetch
|
|
statuses = ['publish', 'draft'] if include_drafts else ['publish']
|
|
status_str = ', '.join(statuses).replace('publish', 'published').replace('draft', 'drafts')
|
|
|
|
# Fetch each status separately to avoid 400 Bad Request on pagination
|
|
for status in statuses:
|
|
page = 1
|
|
status_count = 0
|
|
use_fields = True # Try with _fields first, fallback without if 400
|
|
|
|
while True:
|
|
params = {
|
|
'page': page,
|
|
'per_page': 100,
|
|
'status': status, # Single status per request
|
|
}
|
|
|
|
# Add _fields only if not getting 400 errors
|
|
if use_fields:
|
|
params['_fields'] = 'id,title,slug,link,meta,status'
|
|
|
|
try:
|
|
response = requests.get(api_url, params=params, auth=auth, timeout=10)
|
|
response.raise_for_status()
|
|
|
|
page_posts = response.json()
|
|
if not page_posts:
|
|
break
|
|
|
|
posts.extend(page_posts)
|
|
status_count += len(page_posts)
|
|
logger.info(f" ✓ Fetched {len(page_posts)} {status} posts (page {page})")
|
|
|
|
page += 1
|
|
time.sleep(Config.API_DELAY_SECONDS)
|
|
|
|
except requests.exceptions.HTTPError as e:
|
|
# Handle 400 errors gracefully
|
|
if response.status_code == 400 and use_fields and page == 1:
|
|
# Retry page 1 without _fields parameter
|
|
logger.info(f" ⓘ Retrying without _fields parameter...")
|
|
use_fields = False
|
|
continue
|
|
elif response.status_code == 400:
|
|
# Pagination or API limit reached
|
|
logger.info(f" ⓘ API limit reached (fetched {status_count} {status} posts)")
|
|
break
|
|
else:
|
|
logger.error(f"Error fetching page {page} from {site_name}: {e}")
|
|
break
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
logger.error(f"Error fetching from {site_name}: {e}")
|
|
break
|
|
|
|
if status_count > 0:
|
|
logger.info(f" ✓ Total {status} posts: {status_count}")
|
|
|
|
logger.info(f"✓ Total posts from {site_name} ({status_str}): {len(posts)}")
|
|
return posts
|
|
|
|
def extract_seo_data(self, post: Dict, site_name: str) -> Dict:
|
|
"""
|
|
Extract SEO-relevant data from a post.
|
|
|
|
Args:
|
|
post: Post data from WordPress API
|
|
site_name: Name of the site
|
|
|
|
Returns:
|
|
Dict with extracted SEO data
|
|
"""
|
|
title = post.get('title', {})
|
|
if isinstance(title, dict):
|
|
title = title.get('rendered', '')
|
|
|
|
# Get meta description from various SEO plugins
|
|
# Check multiple possible locations where different plugins store meta descriptions
|
|
meta_desc = ''
|
|
if isinstance(post.get('meta'), dict):
|
|
meta_dict = post['meta']
|
|
|
|
# Try various SEO plugin fields (order matters - most specific first)
|
|
meta_desc = (
|
|
meta_dict.get('_yoast_wpseo_metadesc', '') or # Yoast SEO
|
|
meta_dict.get('_rank_math_description', '') or # Rank Math
|
|
meta_dict.get('_aioseo_description', '') or # All in One SEO
|
|
meta_dict.get('description', '') or # Standard field
|
|
meta_dict.get('_meta_description', '') or # Alternative
|
|
meta_dict.get('metadesc', '') # Alternative
|
|
)
|
|
|
|
# Get post status
|
|
status = post.get('status', 'publish')
|
|
|
|
return {
|
|
'site': site_name,
|
|
'post_id': post['id'],
|
|
'title': title.strip(),
|
|
'slug': post.get('slug', ''),
|
|
'url': post.get('link', ''),
|
|
'meta_description': meta_desc.strip(),
|
|
'status': status,
|
|
}
|
|
|
|
def analyze_title(self, title: str) -> Dict:
|
|
"""
|
|
Analyze title for SEO best practices.
|
|
|
|
Args:
|
|
title: Post title
|
|
|
|
Returns:
|
|
Dict with analysis results
|
|
"""
|
|
length = len(title)
|
|
|
|
# SEO best practices
|
|
issues = []
|
|
recommendations = []
|
|
score = 100
|
|
|
|
if length < 30:
|
|
issues.append(f"Too short ({length})")
|
|
recommendations.append("Expand title to 50-60 characters")
|
|
score -= 20
|
|
elif length < 50:
|
|
recommendations.append("Could be slightly longer (target 50-60)")
|
|
score -= 5
|
|
elif length > 70:
|
|
issues.append(f"Too long ({length})")
|
|
recommendations.append("Consider shortening to 50-70 characters")
|
|
score -= 15
|
|
|
|
# Check for power words
|
|
power_words = ['best', 'ultimate', 'complete', 'essential', 'proven',
|
|
'effective', 'powerful', 'expert', 'guide', 'tutorial',
|
|
'how to', 'step by step', 'top 10', 'ultimate guide']
|
|
|
|
has_power_word = any(word.lower() in title.lower() for word in power_words)
|
|
if not has_power_word:
|
|
recommendations.append("Consider adding a power word (best, complete, guide, etc.)")
|
|
score -= 10
|
|
|
|
# Check for numbers
|
|
if not any(c.isdigit() for c in title):
|
|
recommendations.append("Consider adding a number (e.g., 'Top 5', '2025')")
|
|
score -= 5
|
|
|
|
# Check for emojis or special chars that might break rendering
|
|
special_chars = set(title) - set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 -:')
|
|
if special_chars:
|
|
recommendations.append(f"Check special characters: {special_chars}")
|
|
score -= 5
|
|
|
|
return {
|
|
'length': length,
|
|
'issues': issues,
|
|
'recommendations': recommendations,
|
|
'score': max(0, score),
|
|
'has_power_word': has_power_word,
|
|
'has_number': any(c.isdigit() for c in title)
|
|
}
|
|
|
|
def analyze_meta_description(self, meta_desc: str) -> Dict:
|
|
"""
|
|
Analyze meta description for SEO best practices.
|
|
|
|
Args:
|
|
meta_desc: Meta description text
|
|
|
|
Returns:
|
|
Dict with analysis results
|
|
"""
|
|
length = len(meta_desc)
|
|
|
|
issues = []
|
|
recommendations = []
|
|
score = 100
|
|
|
|
if not meta_desc or length == 0:
|
|
issues.append("Missing meta description")
|
|
recommendations.append("Write a 120-160 character meta description")
|
|
score = 0
|
|
else:
|
|
if length < 100:
|
|
issues.append(f"Too short ({length})")
|
|
recommendations.append("Expand to 120-160 characters")
|
|
score -= 20
|
|
elif length < 120:
|
|
recommendations.append("Could be slightly longer (target 120-160)")
|
|
score -= 5
|
|
elif length > 160:
|
|
issues.append(f"Too long ({length})")
|
|
recommendations.append("Shorten to 120-160 characters")
|
|
score -= 15
|
|
|
|
# Check for CTA
|
|
cta_words = ['learn', 'discover', 'read', 'explore', 'find', 'get',
|
|
'download', 'check', 'see', 'watch', 'try', 'start']
|
|
has_cta = any(word.lower() in meta_desc.lower() for word in cta_words)
|
|
if not has_cta:
|
|
recommendations.append("Consider adding a call-to-action")
|
|
score -= 5
|
|
|
|
return {
|
|
'length': length,
|
|
'is_missing': not meta_desc,
|
|
'issues': issues,
|
|
'recommendations': recommendations,
|
|
'score': max(0, score),
|
|
}
|
|
|
|
def calculate_overall_score(self, title_analysis: Dict, meta_analysis: Dict) -> float:
|
|
"""Calculate overall SEO score (0-100)."""
|
|
title_weight = 0.4
|
|
meta_weight = 0.6
|
|
return (title_analysis['score'] * title_weight) + (meta_analysis['score'] * meta_weight)
|
|
|
|
def generate_ai_recommendations(self, post_data: Dict, title_analysis: Dict,
|
|
meta_analysis: Dict) -> Optional[str]:
|
|
"""
|
|
Use Claude AI to generate specific optimization recommendations.
|
|
|
|
Args:
|
|
post_data: Post data
|
|
title_analysis: Title analysis results
|
|
meta_analysis: Meta description analysis
|
|
|
|
Returns:
|
|
AI-generated recommendations or None if AI disabled
|
|
"""
|
|
if not self.openrouter_api_key:
|
|
return None
|
|
|
|
prompt = f"""Analyze this blog post and provide specific SEO optimization recommendations:
|
|
|
|
Post Title: "{post_data['title']}"
|
|
Current Meta Description: "{post_data['meta_description'] or 'MISSING'}"
|
|
URL: {post_data['url']}
|
|
|
|
Title Analysis:
|
|
- Length: {title_analysis['length']} characters (target: 50-70)
|
|
- Issues: {', '.join(title_analysis['issues']) or 'None'}
|
|
|
|
Meta Description Analysis:
|
|
- Length: {meta_analysis['length']} characters (target: 120-160)
|
|
- Issues: {', '.join(meta_analysis['issues']) or 'None'}
|
|
|
|
Provide 2-3 specific, actionable recommendations to improve SEO. Focus on:
|
|
1. If title needs improvement: suggest a better title
|
|
2. If meta description is missing: write one
|
|
3. If both are weak: provide both improved versions
|
|
|
|
Format as:
|
|
- Recommendation 1: [specific action]
|
|
- Recommendation 2: [specific action]
|
|
etc.
|
|
|
|
Be concise and specific."""
|
|
|
|
try:
|
|
response = requests.post(
|
|
"https://openrouter.ai/api/v1/chat/completions",
|
|
headers={
|
|
"Authorization": f"Bearer {self.openrouter_api_key}",
|
|
"Content-Type": "application/json",
|
|
},
|
|
json={
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"messages": [
|
|
{"role": "user", "content": prompt}
|
|
],
|
|
"temperature": 0.7,
|
|
},
|
|
timeout=30
|
|
)
|
|
response.raise_for_status()
|
|
|
|
result = response.json()
|
|
self.api_calls += 1
|
|
|
|
# Track cost (Claude 3.5 Sonnet: $3/$15 per 1M tokens)
|
|
usage = result.get('usage', {})
|
|
input_tokens = usage.get('prompt_tokens', 0)
|
|
output_tokens = usage.get('completion_tokens', 0)
|
|
self.ai_cost += (input_tokens * 3 + output_tokens * 15) / 1_000_000
|
|
|
|
recommendations = result['choices'][0]['message']['content'].strip()
|
|
return recommendations
|
|
|
|
except Exception as e:
|
|
logger.warning(f"AI recommendation failed: {e}")
|
|
return None
|
|
|
|
def _setup_progressive_csv(self) -> Optional[Tuple]:
|
|
"""
|
|
Setup CSV file for progressive writing.
|
|
|
|
Returns:
|
|
Tuple of (file_handle, writer) or None if progressive_csv is False
|
|
"""
|
|
if not self.progressive_csv:
|
|
return None
|
|
|
|
output_dir = Path(__file__).parent.parent / 'output'
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
csv_path = output_dir / f'seo_analysis_{timestamp}.csv'
|
|
|
|
fieldnames = [
|
|
'site', 'post_id', 'status', 'title', 'slug', 'url',
|
|
'meta_description', 'title_score', 'title_issues',
|
|
'title_recommendations', 'meta_score', 'meta_issues',
|
|
'meta_recommendations', 'overall_score', 'ai_recommendations',
|
|
]
|
|
|
|
csv_file = open(csv_path, 'w', newline='', encoding='utf-8')
|
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
csv_file.flush()
|
|
|
|
logger.info(f"✓ CSV file created: {csv_path}")
|
|
self.csv_file = csv_file
|
|
self.csv_writer = writer
|
|
|
|
return csv_path
|
|
|
|
def _write_result_to_csv(self, result: Dict) -> None:
|
|
"""Write a single result row to CSV file."""
|
|
if self.progressive_csv and self.csv_writer:
|
|
self.csv_writer.writerow(result)
|
|
self.csv_file.flush()
|
|
|
|
def analyze_all_sites(self, use_ai: bool = True, top_n: int = 10,
|
|
include_drafts: bool = False):
|
|
"""
|
|
Analyze all configured sites.
|
|
|
|
Args:
|
|
use_ai: Whether to use AI for recommendations
|
|
top_n: Number of top priority posts to get AI recommendations for
|
|
include_drafts: If True, include draft posts in analysis
|
|
"""
|
|
logger.info(f"Starting analysis of {len(self.sites_config)} sites...")
|
|
if include_drafts:
|
|
logger.info("(Including draft posts)")
|
|
logger.info("")
|
|
|
|
all_posts = []
|
|
|
|
# Fetch posts from all sites
|
|
for site_name, config in self.sites_config.items():
|
|
posts = self.fetch_posts_from_site(site_name, config, include_drafts=include_drafts)
|
|
if posts:
|
|
self.posts_data[site_name] = posts
|
|
all_posts.extend(posts)
|
|
|
|
if not all_posts:
|
|
logger.error("No posts found on any site")
|
|
return
|
|
|
|
logger.info(f"\nAnalyzing {len(all_posts)} posts...\n")
|
|
|
|
# Setup progressive CSV if enabled
|
|
csv_path = self._setup_progressive_csv()
|
|
|
|
# Analyze each post
|
|
for site_name, posts in self.posts_data.items():
|
|
logger.info(f"Analyzing {len(posts)} posts from {site_name}...")
|
|
|
|
for idx, post in enumerate(posts, 1):
|
|
seo_data = self.extract_seo_data(post, site_name)
|
|
title_analysis = self.analyze_title(seo_data['title'])
|
|
meta_analysis = self.analyze_meta_description(seo_data['meta_description'])
|
|
overall_score = self.calculate_overall_score(title_analysis, meta_analysis)
|
|
|
|
result = {
|
|
**seo_data,
|
|
'title_score': title_analysis['score'],
|
|
'title_issues': '|'.join(title_analysis['issues']) or 'None',
|
|
'title_recommendations': '|'.join(title_analysis['recommendations']),
|
|
'meta_score': meta_analysis['score'],
|
|
'meta_issues': '|'.join(meta_analysis['issues']) or 'None',
|
|
'meta_recommendations': '|'.join(meta_analysis['recommendations']),
|
|
'overall_score': overall_score,
|
|
'ai_recommendations': '',
|
|
}
|
|
|
|
self.analysis_results.append(result)
|
|
|
|
# Write to CSV progressively (before AI recommendations)
|
|
if self.progressive_csv:
|
|
self._write_result_to_csv(result)
|
|
logger.debug(f" [{idx}/{len(posts)}] Written: {seo_data['title'][:40]}")
|
|
|
|
# Sort by priority (lowest scores first) and get AI recommendations for top posts
|
|
if use_ai:
|
|
self.analysis_results.sort(key=lambda x: x['overall_score'])
|
|
logger.info(f"\nGenerating AI recommendations for top {top_n} posts...\n")
|
|
|
|
for idx, result in enumerate(self.analysis_results[:top_n], 1):
|
|
logger.info(f" [{idx}/{top_n}] {result['title'][:50]}...")
|
|
|
|
ai_recs = self.generate_ai_recommendations(
|
|
result,
|
|
{
|
|
'score': result['title_score'],
|
|
'issues': result['title_issues'].split('|'),
|
|
'length': len(result['title'])
|
|
},
|
|
{
|
|
'score': result['meta_score'],
|
|
'issues': result['meta_issues'].split('|'),
|
|
'length': len(result['meta_description'])
|
|
}
|
|
)
|
|
|
|
result['ai_recommendations'] = ai_recs or ''
|
|
|
|
# Update CSV with AI recommendations if using progressive CSV
|
|
if self.progressive_csv and self.csv_writer:
|
|
# Find and update the row in the CSV by re-writing it
|
|
# This is a limitation of CSV - we'll update in final export instead
|
|
pass
|
|
|
|
time.sleep(0.5) # Rate limiting
|
|
|
|
# Sort by overall score for final export
|
|
self.analysis_results.sort(key=lambda x: x['overall_score'])
|
|
|
|
# Close progressive CSV if open (will be re-written with final data including AI recs)
|
|
if self.progressive_csv and self.csv_file:
|
|
self.csv_file.close()
|
|
self.csv_file = None
|
|
self.csv_writer = None
|
|
|
|
def export_results(self, output_file: Optional[str] = None):
|
|
"""
|
|
Export analysis results to CSV.
|
|
|
|
Args:
|
|
output_file: Output file path (optional)
|
|
"""
|
|
if not output_file:
|
|
output_dir = Path(__file__).parent.parent / 'output'
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
if self.progressive_csv:
|
|
# Use same timestamp as progressive file
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
# Find the most recent seo_analysis file
|
|
files = sorted(output_dir.glob('seo_analysis_*.csv'))
|
|
if files:
|
|
output_file = files[-1] # Use the most recent one
|
|
else:
|
|
output_file = output_dir / f'seo_analysis_{timestamp}_final.csv'
|
|
else:
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
output_file = output_dir / f'seo_analysis_{timestamp}.csv'
|
|
|
|
output_file = Path(output_file)
|
|
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
if not self.analysis_results:
|
|
logger.error("No results to export")
|
|
return
|
|
|
|
fieldnames = [
|
|
'site',
|
|
'post_id',
|
|
'status',
|
|
'title',
|
|
'slug',
|
|
'url',
|
|
'meta_description',
|
|
'title_score',
|
|
'title_issues',
|
|
'title_recommendations',
|
|
'meta_score',
|
|
'meta_issues',
|
|
'meta_recommendations',
|
|
'overall_score',
|
|
'ai_recommendations',
|
|
]
|
|
|
|
with open(output_file, 'w', newline='', encoding='utf-8') as f:
|
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
|
|
for result in self.analysis_results:
|
|
writer.writerow({field: result.get(field, '') for field in fieldnames})
|
|
|
|
if self.progressive_csv:
|
|
logger.info(f"\n✓ Final results saved to: {output_file}")
|
|
else:
|
|
logger.info(f"\n✓ Results exported to: {output_file}")
|
|
|
|
# Also export as a summary report
|
|
self.export_summary_report(output_file)
|
|
|
|
def export_summary_report(self, csv_file: Path):
|
|
"""Export a markdown summary report."""
|
|
report_file = csv_file.parent / f"{csv_file.stem}_summary.md"
|
|
|
|
# Group by site
|
|
by_site = {}
|
|
for result in self.analysis_results:
|
|
site = result['site']
|
|
if site not in by_site:
|
|
by_site[site] = []
|
|
by_site[site].append(result)
|
|
|
|
with open(report_file, 'w', encoding='utf-8') as f:
|
|
f.write("# Multi-Site SEO Analysis Report\n\n")
|
|
f.write(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
|
|
|
|
# Summary stats
|
|
total_posts = len(self.analysis_results)
|
|
published = sum(1 for r in self.analysis_results if r['status'] == 'publish')
|
|
drafts = sum(1 for r in self.analysis_results if r['status'] == 'draft')
|
|
avg_score = sum(r['overall_score'] for r in self.analysis_results) / total_posts if total_posts > 0 else 0
|
|
|
|
f.write("## Summary\n\n")
|
|
f.write(f"- **Total Posts:** {total_posts}\n")
|
|
if published > 0:
|
|
f.write(f" - Published: {published}\n")
|
|
if drafts > 0:
|
|
f.write(f" - Drafts: {drafts}\n")
|
|
f.write(f"- **Average SEO Score:** {avg_score:.1f}/100\n")
|
|
f.write(f"- **API Calls Made:** {self.api_calls}\n")
|
|
f.write(f"- **AI Cost:** ${self.ai_cost:.4f}\n")
|
|
f.write(f"- **Sites Analyzed:** {len(by_site)}\n\n")
|
|
|
|
# Priority issues
|
|
missing_meta = sum(1 for r in self.analysis_results if r['meta_score'] == 0)
|
|
weak_titles = sum(1 for r in self.analysis_results if r['title_score'] < 50)
|
|
weak_meta = sum(1 for r in self.analysis_results if r['meta_score'] < 50 and r['meta_score'] > 0)
|
|
|
|
f.write("## Priority Issues\n\n")
|
|
f.write(f"- **Missing Meta Descriptions:** {missing_meta} posts\n")
|
|
f.write(f"- **Weak Titles (Score < 50):** {weak_titles} posts\n")
|
|
f.write(f"- **Weak Meta (Score < 50):** {weak_meta} posts\n\n")
|
|
|
|
# By site
|
|
for site_name, posts in by_site.items():
|
|
avg = sum(p['overall_score'] for p in posts) / len(posts)
|
|
f.write(f"## {site_name}\n\n")
|
|
f.write(f"- **Posts:** {len(posts)}\n")
|
|
f.write(f"- **Avg Score:** {avg:.1f}/100\n")
|
|
f.write(f"- **Missing Meta:** {sum(1 for p in posts if p['meta_score'] == 0)}\n\n")
|
|
|
|
# Top 5 to optimize
|
|
f.write("### Top 5 Posts to Optimize\n\n")
|
|
for idx, post in enumerate(posts[:5], 1):
|
|
f.write(f"{idx}. **{post['title']}** (Score: {post['overall_score']:.0f})\n")
|
|
f.write(f" - URL: {post['url']}\n")
|
|
if post['meta_issues'] != 'None':
|
|
f.write(f" - Meta Issues: {post['meta_issues']}\n")
|
|
if post['ai_recommendations']:
|
|
f.write(f" - Recommendations: {post['ai_recommendations'].split(chr(10))[0]}\n")
|
|
f.write("\n")
|
|
|
|
f.write("\n## Legend\n\n")
|
|
f.write("- **Title Score:** Evaluates length, power words, numbers, readability\n")
|
|
f.write("- **Meta Score:** Evaluates presence, length, call-to-action\n")
|
|
f.write("- **Overall Score:** 40% title + 60% meta description\n")
|
|
f.write("- **Optimal Ranges:**\n")
|
|
f.write(" - Title: 50-70 characters\n")
|
|
f.write(" - Meta: 120-160 characters\n")
|
|
|
|
logger.info(f"✓ Summary report: {report_file}")
|
|
|
|
def run(self, use_ai: bool = True, top_n: int = 10, include_drafts: bool = False):
|
|
"""Run complete analysis."""
|
|
try:
|
|
self.analyze_all_sites(use_ai=use_ai, top_n=top_n, include_drafts=include_drafts)
|
|
self.export_results()
|
|
|
|
logger.info("\n" + "="*60)
|
|
logger.info("ANALYSIS COMPLETE")
|
|
logger.info("="*60)
|
|
logger.info(f"Total posts analyzed: {len(self.analysis_results)}")
|
|
published = sum(1 for r in self.analysis_results if r['status'] == 'publish')
|
|
drafts = sum(1 for r in self.analysis_results if r['status'] == 'draft')
|
|
if published > 0:
|
|
logger.info(f" - Published: {published}")
|
|
if drafts > 0:
|
|
logger.info(f" - Drafts: {drafts}")
|
|
logger.info(f"AI recommendations: {sum(1 for r in self.analysis_results if r['ai_recommendations'])}")
|
|
logger.info(f"AI cost: ${self.ai_cost:.4f}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Analysis failed: {e}", exc_info=True)
|
|
sys.exit(1)
|
|
|
|
|
|
def check_meta_fields(site_url: str, username: str, password: str) -> None:
|
|
"""
|
|
Diagnostic function to check what meta fields are available on a site.
|
|
|
|
Args:
|
|
site_url: WordPress site URL
|
|
username: WordPress username
|
|
password: WordPress app password
|
|
"""
|
|
logger.info(f"\n{'='*60}")
|
|
logger.info("META FIELD DIAGNOSTIC")
|
|
logger.info(f"{'='*60}\n")
|
|
logger.info(f"Site: {site_url}")
|
|
logger.info("Checking available meta fields in first post...\n")
|
|
|
|
base_url = site_url.rstrip('/')
|
|
api_url = f"{base_url}/wp-json/wp/v2/posts"
|
|
auth = HTTPBasicAuth(username, password)
|
|
|
|
try:
|
|
params = {
|
|
'per_page': 1,
|
|
'status': 'publish'
|
|
}
|
|
|
|
response = requests.get(api_url, params=params, auth=auth, timeout=10)
|
|
response.raise_for_status()
|
|
|
|
posts = response.json()
|
|
if not posts:
|
|
logger.error("No posts found")
|
|
return
|
|
|
|
post = posts[0]
|
|
logger.info(f"Post: {post.get('title', {}).get('rendered', 'N/A')}")
|
|
logger.info(f"\nAvailable meta fields:")
|
|
|
|
if isinstance(post.get('meta'), dict):
|
|
meta_dict = post['meta']
|
|
if meta_dict:
|
|
for key, value in sorted(meta_dict.items()):
|
|
preview = str(value)[:60]
|
|
logger.info(f" • {key}: {preview}")
|
|
else:
|
|
logger.info(" (No meta fields found)")
|
|
else:
|
|
logger.info(" (Meta is not a dictionary)")
|
|
|
|
logger.info(f"\nFull meta object:")
|
|
logger.info(json.dumps(post.get('meta', {}), indent=2)[:500])
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error: {e}")
|
|
|
|
|
|
def main():
|
|
"""Main entry point."""
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description='Analyze SEO across multiple WordPress sites'
|
|
)
|
|
parser.add_argument(
|
|
'--no-ai',
|
|
action='store_true',
|
|
help='Skip AI recommendations to save cost'
|
|
)
|
|
parser.add_argument(
|
|
'--top-n',
|
|
type=int,
|
|
default=10,
|
|
help='Number of top posts to get AI recommendations for'
|
|
)
|
|
parser.add_argument(
|
|
'--output',
|
|
help='Output CSV file path'
|
|
)
|
|
parser.add_argument(
|
|
'--include-drafts',
|
|
action='store_true',
|
|
help='Include draft posts in analysis (published + drafts)'
|
|
)
|
|
parser.add_argument(
|
|
'--no-progressive',
|
|
action='store_true',
|
|
help='Disable real-time CSV writing (write only at end)'
|
|
)
|
|
parser.add_argument(
|
|
'--diagnose',
|
|
help='Diagnose meta fields for a site (URL). Example: --diagnose https://www.mistergeek.net'
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Diagnostic mode
|
|
if args.diagnose:
|
|
# Ask for username/password if not in env
|
|
from getpass import getpass
|
|
username = Config.WORDPRESS_USERNAME
|
|
password = Config.WORDPRESS_APP_PASSWORD
|
|
|
|
if not username or not password:
|
|
logger.error("WORDPRESS_USERNAME and WORDPRESS_APP_PASSWORD must be set in .env")
|
|
sys.exit(1)
|
|
|
|
check_meta_fields(args.diagnose, username, password)
|
|
sys.exit(0)
|
|
|
|
analyzer = MultiSiteSEOAnalyzer(progressive_csv=not args.no_progressive)
|
|
analyzer.run(use_ai=not args.no_ai, top_n=args.top_n, include_drafts=args.include_drafts)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|