Refactor SEO automation into unified CLI application

Major refactoring to create a clean, integrated CLI application: ### New Features: - Unified CLI executable (./seo) with simple command structure - All commands accept optional CSV file arguments - Auto-detection of latest files when no arguments provided - Simplified output directory structure (output/ instead of output/reports/) - Cleaner export filename format (all_posts_YYYY-MM-DD.csv) ### Commands: - export: Export all posts from WordPress sites - analyze [csv]: Analyze posts with AI (optional CSV input) - recategorize [csv]: Recategorize posts with AI - seo_check: Check SEO quality - categories: Manage categories across sites - approve [files]: Review and approve recommendations - full_pipeline: Run complete workflow - analytics, gaps, opportunities, report, status ### Changes: - Moved all scripts to scripts/ directory - Created config.yaml for configuration - Updated all scripts to use output/ directory - Deprecated old seo-cli.py in favor of new ./seo - Added AGENTS.md and CHANGELOG.md documentation - Consolidated README.md with updated usage ### Technical: - Added PyYAML dependency - Removed hardcoded configuration values - All scripts now properly integrated - Better error handling and user feedback Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
2026-02-16 14:24:44 +01:00
parent 3b51952336
commit 8c7cd24685
57 changed files with 16095 additions and 560 deletions
--- a/scripts/ai_analyze_posts_for_decisions.py
+++ b/scripts/ai_analyze_posts_for_decisions.py
@@ -0,0 +1,453 @@
+#!/usr/bin/env python3
+"""
+AI-Powered Post Analysis and Recommendation Script
+Analyzes exported posts CSV using Claude via OpenRouter and provides
+clear, automation-friendly recommendations for:
+- Which site to move posts to
+- Categories to set
+- Posts to consolidate
+- Posts to delete
+- Posts to optimize
+"""
+
+import csv
+import json
+import logging
+import sys
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+import requests
+from datetime import datetime
+from config import Config
+
+# Setup logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+class PostAnalyzer:
+    """Analyze posts CSV using Claude AI via OpenRouter."""
+
+    def __init__(self, csv_file: str):
+        """Initialize analyzer with CSV file."""
+        self.csv_file = Path(csv_file)
+        self.openrouter_api_key = Config.OPENROUTER_API_KEY
+        self.posts = []
+        self.analyzed_posts = []
+        self.api_calls = 0
+        self.ai_cost = 0.0
+
+    def load_csv(self) -> bool:
+        """Load posts from CSV file."""
+        logger.info(f"Loading CSV: {self.csv_file}")
+
+        if not self.csv_file.exists():
+            logger.error(f"CSV file not found: {self.csv_file}")
+            return False
+
+        try:
+            with open(self.csv_file, 'r', encoding='utf-8') as f:
+                reader = csv.DictReader(f)
+                self.posts = list(reader)
+
+            logger.info(f"✓ Loaded {len(self.posts)} posts from CSV")
+
+            # Group by site for stats
+            by_site = {}
+            for post in self.posts:
+                site = post.get('site', '')
+                if site not in by_site:
+                    by_site[site] = 0
+                by_site[site] += 1
+
+            for site, count in by_site.items():
+                logger.info(f"  {site}: {count} posts")
+
+            return True
+
+        except Exception as e:
+            logger.error(f"Error loading CSV: {e}")
+            return False
+
+    def batch_posts_for_analysis(self, batch_size: int = 10) -> List[List[Dict]]:
+        """Batch posts for AI analysis to manage token usage."""
+        batches = []
+        for i in range(0, len(self.posts), batch_size):
+            batches.append(self.posts[i:i + batch_size])
+        return batches
+
+    def format_batch_for_ai(self, batch: List[Dict]) -> str:
+        """Format batch of posts for AI analysis."""
+        formatted = "POSTS TO ANALYZE:\n\n"
+
+        for i, post in enumerate(batch, 1):
+            formatted += f"{i}. POST ID: {post['post_id']}\n"
+            formatted += f"   Site: {post['site']}\n"
+            formatted += f"   Title: {post['title']}\n"
+            formatted += f"   Status: {post['status']}\n"
+            formatted += f"   Word Count: {post['word_count']}\n"
+            formatted += f"   Content: {post['content_preview']}\n"
+            formatted += f"   Current Categories: {post['categories']}\n"
+            formatted += f"   Meta Description: {post['meta_description']}\n"
+            formatted += "\n"
+
+        return formatted
+
+    def get_ai_recommendations(self, batch: List[Dict]) -> Optional[str]:
+        """Get AI recommendations for a batch of posts."""
+        if not self.openrouter_api_key:
+            logger.error("OPENROUTER_API_KEY not set")
+            return None
+
+        batch_text = self.format_batch_for_ai(batch)
+
+        prompt = f"""Analyze these blog posts and provide clear, actionable recommendations.
+
+Website Strategy:
+- mistergeek.net: High-value topics (VPN, Software, Gaming, General Tech, SEO, Content Marketing)
+- webscroll.fr: Torrenting, File-Sharing, Tracker guides (niche audience)
+- hellogeek.net: Low-traffic, experimental, off-brand, or niche content
+
+{batch_text}
+
+For EACH post, provide a JSON object with:
+{{
+  "post_id": <id>,
+  "decision": "<ACTION>" where ACTION is ONE of:
+    - "Keep on mistergeek.net" (high-value, high-traffic)
+    - "Move to webscroll.fr" (torrenting/file-sharing content)
+    - "Move to hellogeek.net" (low-traffic or off-brand)
+    - "Delete" (spam, extremely low quality, zero traffic)
+    - "Consolidate with post_id:<id>" (similar content, duplicate)
+  "category": "<CATEGORY>" where category is ONE of:
+    - "VPN"
+    - "Software/Tools"
+    - "Gaming"
+    - "Streaming"
+    - "Torrenting"
+    - "File-Sharing"
+    - "SEO"
+    - "Content Marketing"
+    - "Other"
+  "reason": "<Brief reason for decision>",
+  "priority": "<High|Medium|Low>",
+  "notes": "<Any additional notes>"
+}}
+
+Return ONLY a JSON array. Example:
+[
+  {{"post_id": 2845, "decision": "Keep on mistergeek.net", "category": "VPN", "reason": "High traffic, core topic", "priority": "High", "notes": "Already optimized"}},
+  {{"post_id": 1234, "decision": "Move to webscroll.fr", "category": "Torrenting", "reason": "Torrent tracker content", "priority": "Medium", "notes": "Good SEO potential on target site"}}
+]
+
+Analyze all posts and provide recommendations for EVERY post in the batch."""
+
+        try:
+            logger.info(f"  Sending batch to Claude for analysis...")
+
+            response = requests.post(
+                "https://openrouter.ai/api/v1/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {self.openrouter_api_key}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": "anthropic/claude-3.5-sonnet",
+                    "messages": [
+                        {"role": "user", "content": prompt}
+                    ],
+                    "temperature": 0.3,  # Lower temp for more consistent recommendations
+                },
+                timeout=60
+            )
+            response.raise_for_status()
+
+            result = response.json()
+            self.api_calls += 1
+
+            # Track cost
+            usage = result.get('usage', {})
+            input_tokens = usage.get('prompt_tokens', 0)
+            output_tokens = usage.get('completion_tokens', 0)
+            self.ai_cost += (input_tokens * 3 + output_tokens * 15) / 1_000_000
+
+            recommendations_text = result['choices'][0]['message']['content'].strip()
+            logger.info(f"  ✓ Got recommendations (tokens: {input_tokens}+{output_tokens})")
+
+            return recommendations_text
+
+        except Exception as e:
+            logger.error(f"Error getting AI recommendations: {e}")
+            return None
+
+    def parse_recommendations(self, recommendations_json: str) -> List[Dict]:
+        """Parse JSON recommendations from AI."""
+        try:
+            # Try to extract JSON from response
+            start_idx = recommendations_json.find('[')
+            end_idx = recommendations_json.rfind(']') + 1
+
+            if start_idx == -1 or end_idx == 0:
+                logger.error("Could not find JSON array in response")
+                return []
+
+            json_str = recommendations_json[start_idx:end_idx]
+            recommendations = json.loads(json_str)
+
+            return recommendations
+
+        except json.JSONDecodeError as e:
+            logger.error(f"Error parsing JSON recommendations: {e}")
+            logger.debug(f"Response was: {recommendations_json[:500]}")
+            return []
+
+    def analyze_all_posts(self) -> bool:
+        """Analyze all posts in batches."""
+        logger.info("\n" + "="*70)
+        logger.info("ANALYZING POSTS WITH AI")
+        logger.info("="*70 + "\n")
+
+        batches = self.batch_posts_for_analysis(batch_size=10)
+        logger.info(f"Processing {len(self.posts)} posts in {len(batches)} batches of 10...\n")
+
+        all_recommendations = {}
+
+        for batch_num, batch in enumerate(batches, 1):
+            logger.info(f"Batch {batch_num}/{len(batches)}: Analyzing {len(batch)} posts...")
+
+            recommendations_json = self.get_ai_recommendations(batch)
+
+            if not recommendations_json:
+                logger.error(f"  Failed to get recommendations for batch {batch_num}")
+                continue
+
+            recommendations = self.parse_recommendations(recommendations_json)
+
+            for rec in recommendations:
+                all_recommendations[str(rec.get('post_id', ''))] = rec
+
+            logger.info(f"  ✓ Got {len(recommendations)} recommendations")
+
+        logger.info(f"\n✓ Analysis complete!")
+        logger.info(f"  Total recommendations: {len(all_recommendations)}")
+        logger.info(f"  API calls: {self.api_calls}")
+        logger.info(f"  Estimated cost: ${self.ai_cost:.4f}")
+
+        # Map recommendations to posts
+        for post in self.posts:
+            post_id = str(post['post_id'])
+            if post_id in all_recommendations:
+                rec = all_recommendations[post_id]
+                post['decision'] = rec.get('decision', 'No decision')
+                post['recommended_category'] = rec.get('category', 'Other')
+                post['reason'] = rec.get('reason', '')
+                post['priority'] = rec.get('priority', 'Medium')
+                post['ai_notes'] = rec.get('notes', '')
+            else:
+                post['decision'] = 'Pending'
+                post['recommended_category'] = 'Other'
+                post['reason'] = 'No recommendation'
+                post['priority'] = 'Medium'
+                post['ai_notes'] = ''
+
+            self.analyzed_posts.append(post)
+
+        return len(self.analyzed_posts) > 0
+
+    def export_with_recommendations(self) -> Tuple[str, str, str, str]:
+        """Export CSV with recommendations and create action-specific files."""
+        output_dir = Path(__file__).parent.parent / 'output'
+        output_dir.mkdir(parents=True, exist_ok=True)
+        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+
+        # Main file with all recommendations
+        main_file = output_dir / f'posts_with_ai_recommendations_{timestamp}.csv'
+
+        # Action-specific files
+        moves_file = output_dir / f'posts_to_move_{timestamp}.csv'
+        consolidate_file = output_dir / f'posts_to_consolidate_{timestamp}.csv'
+        delete_file = output_dir / f'posts_to_delete_{timestamp}.csv'
+
+        # Export main file
+        fieldnames = list(self.analyzed_posts[0].keys()) + [
+            'decision',
+            'recommended_category',
+            'reason',
+            'priority',
+            'ai_notes'
+        ]
+
+        logger.info(f"\nExporting recommendations to CSV...")
+
+        with open(main_file, 'w', newline='', encoding='utf-8') as f:
+            writer = csv.DictWriter(f, fieldnames=fieldnames)
+            writer.writeheader()
+            writer.writerows(self.analyzed_posts)
+
+        logger.info(f"✓ Main file: {main_file}")
+
+        # Export action-specific files
+        posts_to_move = [p for p in self.analyzed_posts if 'Move to' in p.get('decision', '')]
+        posts_to_consolidate = [p for p in self.analyzed_posts if 'Consolidate' in p.get('decision', '')]
+        posts_to_delete = [p for p in self.analyzed_posts if p.get('decision') == 'Delete']
+
+        # Moves file
+        if posts_to_move:
+            with open(moves_file, 'w', newline='', encoding='utf-8') as f:
+                writer = csv.DictWriter(f, fieldnames=fieldnames)
+                writer.writeheader()
+                writer.writerows(posts_to_move)
+            logger.info(f"✓ Moves file ({len(posts_to_move)} posts): {moves_file}")
+
+        # Consolidate file
+        if posts_to_consolidate:
+            with open(consolidate_file, 'w', newline='', encoding='utf-8') as f:
+                writer = csv.DictWriter(f, fieldnames=fieldnames)
+                writer.writeheader()
+                writer.writerows(posts_to_consolidate)
+            logger.info(f"✓ Consolidate file ({len(posts_to_consolidate)} posts): {consolidate_file}")
+
+        # Delete file
+        if posts_to_delete:
+            with open(delete_file, 'w', newline='', encoding='utf-8') as f:
+                writer = csv.DictWriter(f, fieldnames=fieldnames)
+                writer.writeheader()
+                writer.writerows(posts_to_delete)
+            logger.info(f"✓ Delete file ({len(posts_to_delete)} posts): {delete_file}")
+
+        return (
+            str(main_file),
+            str(moves_file) if posts_to_move else None,
+            str(consolidate_file) if posts_to_consolidate else None,
+            str(delete_file) if posts_to_delete else None
+        )
+
+    def print_summary(self):
+        """Print analysis summary."""
+        logger.info("\n" + "="*70)
+        logger.info("ANALYSIS SUMMARY")
+        logger.info("="*70 + "\n")
+
+        # Count decisions
+        decisions = {}
+        for post in self.analyzed_posts:
+            decision = post.get('decision', 'Unknown')
+            decisions[decision] = decisions.get(decision, 0) + 1
+
+        logger.info("DECISIONS:")
+        for decision, count in sorted(decisions.items(), key=lambda x: x[1], reverse=True):
+            logger.info(f"  {decision}: {count} posts")
+
+        # Count categories
+        categories = {}
+        for post in self.analyzed_posts:
+            cat = post.get('recommended_category', 'Other')
+            categories[cat] = categories.get(cat, 0) + 1
+
+        logger.info("\nRECOMMENDED CATEGORIES:")
+        for cat, count in sorted(categories.items(), key=lambda x: x[1], reverse=True):
+            logger.info(f"  {cat}: {count} posts")
+
+        # Count priorities
+        priorities = {}
+        for post in self.analyzed_posts:
+            priority = post.get('priority', 'Unknown')
+            priorities[priority] = priorities.get(priority, 0) + 1
+
+        logger.info("\nPRIORITY BREAKDOWN:")
+        for priority in ['High', 'Medium', 'Low']:
+            count = priorities.get(priority, 0)
+            logger.info(f"  {priority}: {count} posts")
+
+        # By site
+        logger.info("\nBY SITE:")
+        by_site = {}
+        for post in self.analyzed_posts:
+            site = post.get('site', 'Unknown')
+            if site not in by_site:
+                by_site[site] = []
+            by_site[site].append(post.get('decision', 'Unknown'))
+
+        for site in sorted(by_site.keys()):
+            logger.info(f"\n  {site}:")
+            decisions_for_site = {}
+            for decision in by_site[site]:
+                decisions_for_site[decision] = decisions_for_site.get(decision, 0) + 1
+
+            for decision, count in sorted(decisions_for_site.items()):
+                logger.info(f"    {decision}: {count}")
+
+    def run(self):
+        """Run complete analysis."""
+        logger.info("="*70)
+        logger.info("AI-POWERED POST ANALYSIS AND RECOMMENDATIONS")
+        logger.info("="*70)
+
+        # Load CSV
+        if not self.load_csv():
+            sys.exit(1)
+
+        # Analyze posts
+        if not self.analyze_all_posts():
+            logger.error("Failed to analyze posts")
+            sys.exit(1)
+
+        # Print summary
+        self.print_summary()
+
+        # Export results
+        logger.info("\n" + "="*70)
+        logger.info("EXPORTING RESULTS")
+        logger.info("="*70)
+
+        main_file, moves_file, consol_file, delete_file = self.export_with_recommendations()
+
+        logger.info("\n" + "="*70)
+        logger.info("NEXT STEPS")
+        logger.info("="*70)
+        logger.info("\n1. Review main file with all recommendations:")
+        logger.info(f"   {main_file}")
+        logger.info("\n2. Execute moves (automate with script):")
+        if moves_file:
+            logger.info(f"   {moves_file}")
+        else:
+            logger.info("   No posts to move")
+
+        logger.info("\n3. Consolidate duplicates:")
+        if consol_file:
+            logger.info(f"   {consol_file}")
+        else:
+            logger.info("   No posts to consolidate")
+
+        logger.info("\n4. Delete low-quality posts:")
+        if delete_file:
+            logger.info(f"   {delete_file}")
+        else:
+            logger.info("   No posts to delete")
+
+        logger.info("\n✓ Analysis complete!")
+
+
+def main():
+    """Main entry point."""
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description='Analyze exported posts CSV using Claude AI and provide recommendations'
+    )
+    parser.add_argument(
+        'csv_file',
+        help='Path to exported posts CSV file'
+    )
+
+    args = parser.parse_args()
+
+    analyzer = PostAnalyzer(args.csv_file)
+    analyzer.run()
+
+
+if __name__ == '__main__':
+    main()