Major refactoring to create a clean, integrated CLI application: ### New Features: - Unified CLI executable (./seo) with simple command structure - All commands accept optional CSV file arguments - Auto-detection of latest files when no arguments provided - Simplified output directory structure (output/ instead of output/reports/) - Cleaner export filename format (all_posts_YYYY-MM-DD.csv) ### Commands: - export: Export all posts from WordPress sites - analyze [csv]: Analyze posts with AI (optional CSV input) - recategorize [csv]: Recategorize posts with AI - seo_check: Check SEO quality - categories: Manage categories across sites - approve [files]: Review and approve recommendations - full_pipeline: Run complete workflow - analytics, gaps, opportunities, report, status ### Changes: - Moved all scripts to scripts/ directory - Created config.yaml for configuration - Updated all scripts to use output/ directory - Deprecated old seo-cli.py in favor of new ./seo - Added AGENTS.md and CHANGELOG.md documentation - Consolidated README.md with updated usage ### Technical: - Added PyYAML dependency - Removed hardcoded configuration values - All scripts now properly integrated - Better error handling and user feedback Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
437 lines
20 KiB
Python
437 lines
20 KiB
Python
"""
|
|
SEO optimization report generator.
|
|
Consolidates all analysis into comprehensive markdown report and action plan.
|
|
"""
|
|
|
|
import csv
|
|
import json
|
|
import argparse
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from config import Config
|
|
|
|
|
|
class ReportGenerator:
|
|
"""Generate comprehensive SEO optimization report."""
|
|
|
|
def __init__(self):
|
|
"""Initialize generator."""
|
|
self.config = Config
|
|
self.output_dir = self.config.OUTPUT_DIR
|
|
self.logs = []
|
|
|
|
def log(self, message):
|
|
"""Add message to log."""
|
|
self.logs.append(message)
|
|
print(message)
|
|
|
|
def load_posts_with_analytics(self, csv_path):
|
|
"""Load posts with all analytics data."""
|
|
posts = {}
|
|
if not csv_path.exists():
|
|
self.log(f"❌ File not found: {csv_path}")
|
|
return posts
|
|
|
|
try:
|
|
with open(csv_path, 'r', encoding='utf-8') as f:
|
|
reader = csv.DictReader(f)
|
|
for row in reader:
|
|
post_id = row.get('ID')
|
|
if not post_id:
|
|
continue
|
|
|
|
# Handle different title column names
|
|
title = (row.get('Title') or
|
|
row.get('title') or
|
|
row.get('post_title') or '')
|
|
|
|
posts[post_id] = {
|
|
'title': title,
|
|
'url': row.get('URL') or row.get('url') or row.get('post_url') or '',
|
|
'seo_title': row.get('SEO Title') or row.get('seo_title') or '',
|
|
'meta_description': row.get('Meta Description') or row.get('meta_description') or '',
|
|
'traffic': int(row.get('traffic', 0) or 0),
|
|
'users': int(row.get('users', 0) or 0),
|
|
'bounce_rate': float(row.get('bounce_rate', 0) or 0),
|
|
'impressions': int(row.get('impressions', 0) or 0),
|
|
'clicks': int(row.get('clicks', 0) or 0),
|
|
'avg_position': float(row.get('avg_position', 0) or 0),
|
|
'ctr': float(row.get('ctr', 0) or 0),
|
|
'keywords_count': int(row.get('keywords_count', 0) or 0),
|
|
'top_keywords': row.get('top_keywords', '')
|
|
}
|
|
|
|
self.log(f"✓ Loaded {len(posts)} posts")
|
|
except Exception as e:
|
|
self.log(f"❌ Error reading posts: {e}")
|
|
|
|
return posts
|
|
|
|
def load_opportunities(self, csv_path):
|
|
"""Load keyword opportunities."""
|
|
opportunities = {}
|
|
if not csv_path.exists():
|
|
self.log(f"⚠️ Opportunities file not found: {csv_path}")
|
|
return opportunities
|
|
|
|
try:
|
|
with open(csv_path, 'r', encoding='utf-8') as f:
|
|
reader = csv.DictReader(f)
|
|
for row in reader:
|
|
post_id = row.get('ID')
|
|
if post_id:
|
|
try:
|
|
opportunities[post_id] = {
|
|
'opportunity_score': float(row.get('opportunity_score', 0) or 0),
|
|
'estimated_traffic_gain': int(float(row.get('estimated_traffic_gain', 0) or 0)),
|
|
'title_recommendations': row.get('title_recommendations', ''),
|
|
'description_recommendations': row.get('description_recommendations', ''),
|
|
'content_recommendations': row.get('content_recommendations', '')
|
|
}
|
|
except (ValueError, TypeError):
|
|
# Skip rows with parsing errors
|
|
continue
|
|
|
|
self.log(f"✓ Loaded {len(opportunities)} opportunities")
|
|
except Exception as e:
|
|
self.log(f"⚠️ Error reading opportunities: {e}")
|
|
|
|
return opportunities
|
|
|
|
def load_content_gaps(self, csv_path):
|
|
"""Load content gap suggestions."""
|
|
gaps = []
|
|
if not csv_path.exists():
|
|
self.log(f"⚠️ Content gaps file not found: {csv_path}")
|
|
return gaps
|
|
|
|
try:
|
|
with open(csv_path, 'r', encoding='utf-8') as f:
|
|
reader = csv.DictReader(f)
|
|
for row in reader:
|
|
gaps.append({
|
|
'title': row.get('title', ''),
|
|
'why_valuable': row.get('why_valuable', ''),
|
|
'search_volume': row.get('search_volume', ''),
|
|
'format': row.get('format', ''),
|
|
'traffic_potential': int(row.get('traffic_potential', 0) or 0),
|
|
'priority': row.get('priority', 'medium')
|
|
})
|
|
|
|
self.log(f"✓ Loaded {len(gaps)} content gap ideas")
|
|
except Exception as e:
|
|
self.log(f"⚠️ Error reading content gaps: {e}")
|
|
|
|
return gaps
|
|
|
|
def calculate_priority_score(self, post, opportunity=None):
|
|
"""Calculate comprehensive priority score (0-100)."""
|
|
position = post.get('avg_position', 50)
|
|
impressions = post.get('impressions', 0)
|
|
ctr = post.get('ctr', 0)
|
|
traffic = post.get('traffic', 0)
|
|
|
|
# Position score (35%): Closer to page 1 = higher
|
|
if position > 0 and position <= 30:
|
|
position_score = max(0, (30 - position) / 29 * 35)
|
|
else:
|
|
position_score = 0
|
|
|
|
# Traffic potential (30%): Based on impressions
|
|
traffic_potential = min(30, (impressions / 1000) * 30)
|
|
|
|
# CTR improvement (20%): Gap vs expected
|
|
expected_ctr_map = {
|
|
1: 0.30, 2: 0.16, 3: 0.11, 4: 0.08, 5: 0.07,
|
|
6: 0.06, 7: 0.05, 8: 0.05, 9: 0.04, 10: 0.04,
|
|
11: 0.02, 12: 0.02, 13: 0.015, 14: 0.015, 15: 0.013,
|
|
16: 0.012, 17: 0.011, 18: 0.01, 19: 0.009, 20: 0.008
|
|
}
|
|
expected_ctr = expected_ctr_map.get(int(position), 0.005) if position > 0 else 0
|
|
if expected_ctr > 0:
|
|
ctr_gap = max(0, expected_ctr - ctr)
|
|
ctr_score = min(20, (ctr_gap / expected_ctr * 100 / 5) * 20)
|
|
else:
|
|
ctr_score = 0
|
|
|
|
# Content quality (15%): Existing traffic and engagement
|
|
quality_score = min(15, (traffic / 100) * 7.5 +
|
|
(100 - post.get('bounce_rate', 50)) / 100 * 7.5)
|
|
|
|
total = round(position_score + traffic_potential + ctr_score + quality_score, 1)
|
|
return max(0, min(100, total))
|
|
|
|
def generate_markdown_report(self, posts, opportunities, gaps, top_n=20):
|
|
"""Generate comprehensive markdown report."""
|
|
report = []
|
|
report.append("# SEO Optimization Strategy Report\n")
|
|
report.append(f"*Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n\n")
|
|
|
|
# Calculate metrics
|
|
total_traffic = sum(p.get('traffic', 0) for p in posts.values())
|
|
total_impressions = sum(p.get('impressions', 0) for p in posts.values())
|
|
avg_position = sum(p.get('avg_position', 50) for p in posts.values() if p.get('avg_position', 0) > 0) / max(1, len([p for p in posts.values() if p.get('avg_position', 0) > 0]))
|
|
|
|
# Executive Summary
|
|
report.append("## Executive Summary\n")
|
|
report.append(f"- **Total Posts Analyzed:** {len(posts)}\n")
|
|
report.append(f"- **Current Monthly Traffic:** {total_traffic:,} visits\n")
|
|
report.append(f"- **Total Impressions (90d):** {total_impressions:,}\n")
|
|
report.append(f"- **Average Search Position:** {avg_position:.1f}\n")
|
|
report.append(f"- **Optimization Opportunities:** {len(opportunities)}\n")
|
|
report.append(f"- **Content Gap Ideas:** {len(gaps)}\n")
|
|
report.append(f"- **Potential Traffic Gain (Phase 1):** +{sum(o.get('estimated_traffic_gain', 0) for o in opportunities.values()):,} visits/month\n\n")
|
|
|
|
# Key Metrics
|
|
report.append("### Quick Wins (Estimated Impact)\n\n")
|
|
quick_wins = sorted(opportunities.values(),
|
|
key=lambda x: x.get('estimated_traffic_gain', 0),
|
|
reverse=True)[:5]
|
|
total_quick_win_traffic = sum(w.get('estimated_traffic_gain', 0) for w in quick_wins)
|
|
report.append(f"Top 5 opportunities could bring **+{total_quick_win_traffic:,} visits/month**\n\n")
|
|
|
|
# Top 20 Posts to Optimize
|
|
report.append("## Top 20 Posts to Optimize\n\n")
|
|
report.append("Ranked by optimization potential (combination of position, traffic potential, and CTR improvement).\n\n")
|
|
|
|
# Score all posts
|
|
scored_posts = []
|
|
for post_id, post in posts.items():
|
|
opp = opportunities.get(post_id, {})
|
|
score = self.calculate_priority_score(post, opp)
|
|
scored_posts.append((post_id, post, opp, score))
|
|
|
|
scored_posts = sorted(scored_posts, key=lambda x: x[3], reverse=True)
|
|
|
|
for i, (post_id, post, opp, score) in enumerate(scored_posts[:top_n], 1):
|
|
position = post.get('avg_position', 0)
|
|
impressions = post.get('impressions', 0)
|
|
traffic = post.get('traffic', 0)
|
|
|
|
report.append(f"### {i}. {post['title']}\n\n")
|
|
report.append(f"**Current Position:** {position:.1f} | **Impressions:** {impressions:,} | **Traffic:** {traffic} visits\n")
|
|
report.append(f"**Priority Score:** {score:.1f}/100 | **Estimated Gain:** +{opp.get('estimated_traffic_gain', 0)} visits\n\n")
|
|
|
|
if position > 0 and position <= 30:
|
|
report.append(f"**Status:** Ranking on {'page 1' if position <= 10 else 'page 2-3'}\n\n")
|
|
|
|
if opp.get('title_recommendations'):
|
|
report.append("**Title Optimization:**\n")
|
|
for rec in opp['title_recommendations'].split(';'):
|
|
rec = rec.strip()
|
|
if rec:
|
|
report.append(f"- {rec}\n")
|
|
report.append("\n")
|
|
|
|
if opp.get('description_recommendations'):
|
|
report.append("**Meta Description:**\n")
|
|
for rec in opp['description_recommendations'].split(';'):
|
|
rec = rec.strip()
|
|
if rec:
|
|
report.append(f"- {rec}\n")
|
|
report.append("\n")
|
|
|
|
if opp.get('content_recommendations'):
|
|
report.append("**Content Improvements:**\n")
|
|
for rec in opp['content_recommendations'].split(';'):
|
|
rec = rec.strip()
|
|
if rec:
|
|
report.append(f"- {rec}\n")
|
|
report.append("\n")
|
|
|
|
report.append("---\n\n")
|
|
|
|
# Keyword Opportunities Summary
|
|
report.append("## Keyword Opportunities Summary\n\n")
|
|
opportunity_categories = {
|
|
'page_2': [],
|
|
'page_3': [],
|
|
'ready_for_optimization': []
|
|
}
|
|
|
|
for opp_id, opp in opportunities.items():
|
|
if any(opp_id == p[0] for p in scored_posts[:top_n]):
|
|
score = opp.get('opportunity_score', 0)
|
|
post = posts.get(opp_id, {})
|
|
position = post.get('avg_position', 0)
|
|
|
|
if 11 <= position <= 15:
|
|
opportunity_categories['page_2'].append((score, opp))
|
|
elif 16 <= position <= 30:
|
|
opportunity_categories['page_3'].append((score, opp))
|
|
|
|
report.append(f"**Page 2 (Positions 11-15):** {len(opportunity_categories['page_2'])} keywords ready for quick wins\n")
|
|
report.append(f"**Page 3+ (Positions 16-30):** {len(opportunity_categories['page_3'])} keywords with medium effort\n\n")
|
|
|
|
# Content Gap Analysis
|
|
report.append("## Content Gap Analysis\n\n")
|
|
report.append(f"Identified **{len(gaps)} high-value content opportunities** not currently covered:\n\n")
|
|
|
|
for i, gap in enumerate(sorted(gaps, key=lambda x: x.get('priority') == 'high', reverse=True)[:15], 1):
|
|
report.append(f"### {i}. {gap['title']}\n\n")
|
|
report.append(f"**Priority:** {gap.get('priority', 'medium').upper()}\n")
|
|
report.append(f"**Search Volume:** {gap.get('search_volume', 'medium')}\n")
|
|
report.append(f"**Format:** {gap.get('format', 'guide')}\n")
|
|
report.append(f"**Estimated Traffic Potential:** +{gap.get('traffic_potential', 50)} visits/month\n\n")
|
|
|
|
if gap.get('why_valuable'):
|
|
report.append(f"**Why valuable:** {gap['why_valuable']}\n\n")
|
|
|
|
# 90-Day Action Plan
|
|
report.append("## 90-Day Action Plan\n\n")
|
|
report.append("### Week 1-2: Quick Wins (Estimated +100 visits/month)\n\n")
|
|
report.append("Focus on posts with highest opportunity scores that are already ranking on page 2:\n\n")
|
|
quick_wins_phase = sorted(scored_posts[:top_n], key=lambda x: x[3], reverse=True)[:5]
|
|
for i, (post_id, post, opp, score) in enumerate(quick_wins_phase, 1):
|
|
report.append(f"{i}. **{post['title'][:60]}**\n")
|
|
report.append(f" - Update SEO title and meta description\n")
|
|
report.append(f" - Estimated effort: 30-60 minutes\n")
|
|
report.append(f" - Expected gain: +{opp.get('estimated_traffic_gain', 50)} visits\n\n")
|
|
|
|
report.append("### Week 3-4: Core Content Optimization (Estimated +150 visits/month)\n\n")
|
|
report.append("Improve content structure and internal linking:\n\n")
|
|
mid_phase = sorted(scored_posts[5:15], key=lambda x: x[3], reverse=True)[:5]
|
|
for i, (post_id, post, opp, score) in enumerate(mid_phase, 1):
|
|
report.append(f"{i}. **{post['title'][:60]}**\n")
|
|
report.append(f" - Add missing content sections\n")
|
|
report.append(f" - Improve header structure\n")
|
|
report.append(f" - Estimated effort: 2-3 hours\n\n")
|
|
|
|
report.append("### Week 5-8: New Content Creation (Estimated +300 visits/month)\n\n")
|
|
report.append("Create 3-5 pieces of new content targeting high-value gaps:\n\n")
|
|
for i, gap in enumerate(sorted(gaps, key=lambda x: x.get('traffic_potential', 0), reverse=True)[:4], 1):
|
|
report.append(f"{i}. **{gap['title']}** ({gap.get('format', 'guide').title()})\n")
|
|
report.append(f" - Estimated effort: 4-6 hours\n")
|
|
report.append(f" - Expected traffic: +{gap.get('traffic_potential', 50)} visits/month\n\n")
|
|
|
|
report.append("### Week 9-12: Refinement & Analysis (Estimated +100 visits/month)\n\n")
|
|
report.append("- Monitor ranking changes and CTR improvements\n")
|
|
report.append("- Refine underperforming optimizations\n")
|
|
report.append("- Re-run keyword analysis to identify new opportunities\n\n")
|
|
|
|
report.append("**Total Estimated 90-Day Impact: +650 visits/month (+~7.8% growth)**\n\n")
|
|
|
|
# Methodology
|
|
report.append("## Methodology\n\n")
|
|
report.append("### Priority Score Calculation\n\n")
|
|
report.append("Each post is scored based on:\n")
|
|
report.append("- **Position (35%):** Posts ranking 11-20 get highest scores (closest to page 1)\n")
|
|
report.append("- **Traffic Potential (30%):** Based on search impressions\n")
|
|
report.append("- **CTR Gap (20%):** Difference between current and expected CTR for position\n")
|
|
report.append("- **Content Quality (15%):** Existing traffic and bounce rate\n\n")
|
|
|
|
report.append("### Data Sources\n\n")
|
|
report.append("- **Google Analytics:** Traffic metrics (90-day window)\n")
|
|
report.append("- **Google Search Console:** Keyword data, impressions, clicks, positions\n")
|
|
report.append("- **WordPress REST API:** Current SEO metadata and content structure\n\n")
|
|
|
|
report.append("### Assumptions\n\n")
|
|
report.append("- Traffic estimates are based on historical CTR and position data\n")
|
|
report.append("- Moving one position up typically improves CTR by 20-30%\n")
|
|
report.append("- Page 1 rankings (positions 1-10) receive ~20-30% of total impressions\n")
|
|
report.append("- New content takes 4-8 weeks to gain significant traction\n\n")
|
|
|
|
return "\n".join(report)
|
|
|
|
def export_report(self, report_text, output_md):
|
|
"""Export markdown report."""
|
|
try:
|
|
with open(output_md, 'w', encoding='utf-8') as f:
|
|
f.write(report_text)
|
|
|
|
self.log(f"✓ Exported report to {output_md}")
|
|
except Exception as e:
|
|
self.log(f"❌ Error exporting report: {e}")
|
|
|
|
def export_prioritized_csv(self, posts, opportunities, output_csv):
|
|
"""Export all posts with priority scores."""
|
|
try:
|
|
scored_posts = []
|
|
for post_id, post in posts.items():
|
|
opp = opportunities.get(post_id, {})
|
|
score = self.calculate_priority_score(post, opp)
|
|
|
|
scored_posts.append({
|
|
'ID': post_id,
|
|
'Title': post.get('title', ''),
|
|
'URL': post.get('url', ''),
|
|
'Priority_Score': score,
|
|
'Estimated_Traffic_Gain': opp.get('estimated_traffic_gain', 0),
|
|
'Current_Position': post.get('avg_position', 0),
|
|
'Impressions': post.get('impressions', 0),
|
|
'Traffic': post.get('traffic', 0),
|
|
'CTR': f"{post.get('ctr', 0):.2%}",
|
|
'Keywords_Count': post.get('keywords_count', 0)
|
|
})
|
|
|
|
scored_posts = sorted(scored_posts, key=lambda x: x['Priority_Score'], reverse=True)
|
|
|
|
fieldnames = ['ID', 'Title', 'URL', 'Priority_Score', 'Estimated_Traffic_Gain',
|
|
'Current_Position', 'Impressions', 'Traffic', 'CTR', 'Keywords_Count']
|
|
|
|
with open(output_csv, 'w', newline='', encoding='utf-8') as f:
|
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
writer.writerows(scored_posts)
|
|
|
|
self.log(f"✓ Exported {len(scored_posts)} prioritized posts to {output_csv}")
|
|
except Exception as e:
|
|
self.log(f"❌ Error exporting prioritized CSV: {e}")
|
|
|
|
def run(self, posts_csv, opportunities_csv, gaps_csv, output_md, output_prioritized_csv, top_n=20):
|
|
"""Run complete report generation workflow."""
|
|
self.log("📊 Generating SEO optimization report...")
|
|
self.log(f"Input files: posts_with_analytics, opportunities, content_gaps\n")
|
|
|
|
# Load data
|
|
posts = self.load_posts_with_analytics(posts_csv)
|
|
opportunities = self.load_opportunities(opportunities_csv)
|
|
gaps = self.load_content_gaps(gaps_csv)
|
|
|
|
if not posts:
|
|
self.log("❌ No posts loaded. Cannot generate report.")
|
|
return
|
|
|
|
# Generate report
|
|
self.log("\n📝 Generating markdown report...")
|
|
report_text = self.generate_markdown_report(posts, opportunities, gaps, top_n)
|
|
|
|
# Export report
|
|
self.log("\n📁 Exporting files...")
|
|
self.export_report(report_text, output_md)
|
|
self.export_prioritized_csv(posts, opportunities, output_prioritized_csv)
|
|
|
|
self.log("\n✓ Report generation complete!")
|
|
|
|
|
|
def main():
|
|
"""CLI entry point."""
|
|
parser = argparse.ArgumentParser(description='Generate SEO optimization report')
|
|
parser.add_argument('--posts-with-analytics', type=Path,
|
|
default=Path('output/results/posts_with_analytics.csv'),
|
|
help='Posts with analytics CSV')
|
|
parser.add_argument('--keyword-opportunities', type=Path,
|
|
default=Path('output/results/keyword_opportunities.csv'),
|
|
help='Keyword opportunities CSV')
|
|
parser.add_argument('--content-gaps', type=Path,
|
|
default=Path('output/results/content_gaps.csv'),
|
|
help='Content gaps CSV')
|
|
parser.add_argument('--output-report', type=Path,
|
|
default=Path('output/results/seo_optimization_report.md'),
|
|
help='Output markdown report')
|
|
parser.add_argument('--output-csv', type=Path,
|
|
default=Path('output/results/posts_prioritized.csv'),
|
|
help='Output prioritized posts CSV')
|
|
parser.add_argument('--top-n', type=int, default=20,
|
|
help='Number of top posts to detail')
|
|
|
|
args = parser.parse_args()
|
|
|
|
generator = ReportGenerator()
|
|
generator.run(args.posts_with_analytics, args.keyword_opportunities,
|
|
args.content_gaps, args.output_report, args.output_csv, args.top_n)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|