seo/scripts/ai_analyze_posts_for_decisions.py

#!/usr/bin/env python3
"""
AI-Powered Post Analysis and Recommendation Script
Analyzes exported posts CSV using Claude via OpenRouter and provides
clear, automation-friendly recommendations for:
- Which site to move posts to
- Categories to set
- Posts to consolidate
- Posts to delete
- Posts to optimize
"""

import csv
import json
import logging
import sys
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import requests
from datetime import datetime
from config import Config

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)


class PostAnalyzer:
    """Analyze posts CSV using Claude AI via OpenRouter."""

    def __init__(self, csv_file: str):
        """Initialize analyzer with CSV file."""
        self.csv_file = Path(csv_file)
        self.openrouter_api_key = Config.OPENROUTER_API_KEY
        self.posts = []
        self.analyzed_posts = []
        self.api_calls = 0
        self.ai_cost = 0.0

    def load_csv(self) -> bool:
        """Load posts from CSV file."""
        logger.info(f"Loading CSV: {self.csv_file}")

        if not self.csv_file.exists():
            logger.error(f"CSV file not found: {self.csv_file}")
            return False

        try:
            with open(self.csv_file, 'r', encoding='utf-8') as f:
                reader = csv.DictReader(f)
                self.posts = list(reader)

            logger.info(f"✓ Loaded {len(self.posts)} posts from CSV")

            # Group by site for stats
            by_site = {}
            for post in self.posts:
                site = post.get('site', '')
                if site not in by_site:
                    by_site[site] = 0
                by_site[site] += 1

            for site, count in by_site.items():
                logger.info(f"  {site}: {count} posts")

            return True

        except Exception as e:
            logger.error(f"Error loading CSV: {e}")
            return False

    def batch_posts_for_analysis(self, batch_size: int = 10) -> List[List[Dict]]:
        """Batch posts for AI analysis to manage token usage."""
        batches = []
        for i in range(0, len(self.posts), batch_size):
            batches.append(self.posts[i:i + batch_size])
        return batches

    def format_batch_for_ai(self, batch: List[Dict]) -> str:
        """Format batch of posts for AI analysis."""
        formatted = "POSTS TO ANALYZE:\n\n"

        for i, post in enumerate(batch, 1):
            formatted += f"{i}. POST ID: {post['post_id']}\n"
            formatted += f"   Site: {post['site']}\n"
            formatted += f"   Title: {post['title']}\n"
            formatted += f"   Status: {post['status']}\n"
            formatted += f"   Word Count: {post['word_count']}\n"
            formatted += f"   Content: {post['content_preview']}\n"
            formatted += f"   Current Categories: {post['categories']}\n"
            formatted += f"   Meta Description: {post['meta_description']}\n"
            formatted += "\n"

        return formatted

    def get_ai_recommendations(self, batch: List[Dict]) -> Optional[str]:
        """Get AI recommendations for a batch of posts."""
        if not self.openrouter_api_key:
            logger.error("OPENROUTER_API_KEY not set")
            return None

        batch_text = self.format_batch_for_ai(batch)

        prompt = f"""Analyze these blog posts and provide clear, actionable recommendations.

Website Strategy:
- mistergeek.net: High-value topics (VPN, Software, Gaming, General Tech, SEO, Content Marketing)
- webscroll.fr: Torrenting, File-Sharing, Tracker guides (niche audience)
- hellogeek.net: Low-traffic, experimental, off-brand, or niche content

{batch_text}

For EACH post, provide a JSON object with:
{{
  "post_id": <id>,
  "decision": "<ACTION>" where ACTION is ONE of:
    - "Keep on mistergeek.net" (high-value, high-traffic)
    - "Move to webscroll.fr" (torrenting/file-sharing content)
    - "Move to hellogeek.net" (low-traffic or off-brand)
    - "Delete" (spam, extremely low quality, zero traffic)
    - "Consolidate with post_id:<id>" (similar content, duplicate)
  "category": "<CATEGORY>" where category is ONE of:
    - "VPN"
    - "Software/Tools"
    - "Gaming"
    - "Streaming"
    - "Torrenting"
    - "File-Sharing"
    - "SEO"
    - "Content Marketing"
    - "Other"
  "reason": "<Brief reason for decision>",
  "priority": "<High|Medium|Low>",
  "notes": "<Any additional notes>"
}}

Return ONLY a JSON array. Example:
[
  {{"post_id": 2845, "decision": "Keep on mistergeek.net", "category": "VPN", "reason": "High traffic, core topic", "priority": "High", "notes": "Already optimized"}},
  {{"post_id": 1234, "decision": "Move to webscroll.fr", "category": "Torrenting", "reason": "Torrent tracker content", "priority": "Medium", "notes": "Good SEO potential on target site"}}
]

Analyze all posts and provide recommendations for EVERY post in the batch."""

        try:
            logger.info(f"  Sending batch to Claude for analysis...")

            response = requests.post(
                "https://openrouter.ai/api/v1/chat/completions",
                headers={
                    "Authorization": f"Bearer {self.openrouter_api_key}",
                    "Content-Type": "application/json",
                },
                json={
                    "model": "anthropic/claude-3.5-sonnet",
                    "messages": [
                        {"role": "user", "content": prompt}
                    ],
                    "temperature": 0.3,  # Lower temp for more consistent recommendations
                },
                timeout=60
            )
            response.raise_for_status()

            result = response.json()
            self.api_calls += 1

            # Track cost
            usage = result.get('usage', {})
            input_tokens = usage.get('prompt_tokens', 0)
            output_tokens = usage.get('completion_tokens', 0)
            self.ai_cost += (input_tokens * 3 + output_tokens * 15) / 1_000_000

            recommendations_text = result['choices'][0]['message']['content'].strip()
            logger.info(f"  ✓ Got recommendations (tokens: {input_tokens}+{output_tokens})")

            return recommendations_text

        except Exception as e:
            logger.error(f"Error getting AI recommendations: {e}")
            return None

    def parse_recommendations(self, recommendations_json: str) -> List[Dict]:
        """Parse JSON recommendations from AI."""
        try:
            # Try to extract JSON from response
            start_idx = recommendations_json.find('[')
            end_idx = recommendations_json.rfind(']') + 1

            if start_idx == -1 or end_idx == 0:
                logger.error("Could not find JSON array in response")
                return []

            json_str = recommendations_json[start_idx:end_idx]
            recommendations = json.loads(json_str)

            return recommendations

        except json.JSONDecodeError as e:
            logger.error(f"Error parsing JSON recommendations: {e}")
            logger.debug(f"Response was: {recommendations_json[:500]}")
            return []

    def analyze_all_posts(self) -> bool:
        """Analyze all posts in batches."""
        logger.info("\n" + "="*70)
        logger.info("ANALYZING POSTS WITH AI")
        logger.info("="*70 + "\n")

        batches = self.batch_posts_for_analysis(batch_size=10)
        logger.info(f"Processing {len(self.posts)} posts in {len(batches)} batches of 10...\n")

        all_recommendations = {}

        for batch_num, batch in enumerate(batches, 1):
            logger.info(f"Batch {batch_num}/{len(batches)}: Analyzing {len(batch)} posts...")

            recommendations_json = self.get_ai_recommendations(batch)

            if not recommendations_json:
                logger.error(f"  Failed to get recommendations for batch {batch_num}")
                continue

            recommendations = self.parse_recommendations(recommendations_json)

            for rec in recommendations:
                all_recommendations[str(rec.get('post_id', ''))] = rec

            logger.info(f"  ✓ Got {len(recommendations)} recommendations")

        logger.info(f"\n✓ Analysis complete!")
        logger.info(f"  Total recommendations: {len(all_recommendations)}")
        logger.info(f"  API calls: {self.api_calls}")
        logger.info(f"  Estimated cost: ${self.ai_cost:.4f}")

        # Map recommendations to posts
        for post in self.posts:
            post_id = str(post['post_id'])
            if post_id in all_recommendations:
                rec = all_recommendations[post_id]
                post['decision'] = rec.get('decision', 'No decision')
                post['recommended_category'] = rec.get('category', 'Other')
                post['reason'] = rec.get('reason', '')
                post['priority'] = rec.get('priority', 'Medium')
                post['ai_notes'] = rec.get('notes', '')
            else:
                post['decision'] = 'Pending'
                post['recommended_category'] = 'Other'
                post['reason'] = 'No recommendation'
                post['priority'] = 'Medium'
                post['ai_notes'] = ''

            self.analyzed_posts.append(post)

        return len(self.analyzed_posts) > 0

    def export_with_recommendations(self) -> Tuple[str, str, str, str]:
        """Export CSV with recommendations and create action-specific files."""
        output_dir = Path(__file__).parent.parent / 'output'
        output_dir.mkdir(parents=True, exist_ok=True)
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

        # Main file with all recommendations
        main_file = output_dir / f'posts_with_ai_recommendations_{timestamp}.csv'

        # Action-specific files
        moves_file = output_dir / f'posts_to_move_{timestamp}.csv'
        consolidate_file = output_dir / f'posts_to_consolidate_{timestamp}.csv'
        delete_file = output_dir / f'posts_to_delete_{timestamp}.csv'

        # Export main file
        fieldnames = list(self.analyzed_posts[0].keys()) + [
            'decision',
            'recommended_category',
            'reason',
            'priority',
            'ai_notes'
        ]

        logger.info(f"\nExporting recommendations to CSV...")

        with open(main_file, 'w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(self.analyzed_posts)

        logger.info(f"✓ Main file: {main_file}")

        # Export action-specific files
        posts_to_move = [p for p in self.analyzed_posts if 'Move to' in p.get('decision', '')]
        posts_to_consolidate = [p for p in self.analyzed_posts if 'Consolidate' in p.get('decision', '')]
        posts_to_delete = [p for p in self.analyzed_posts if p.get('decision') == 'Delete']

        # Moves file
        if posts_to_move:
            with open(moves_file, 'w', newline='', encoding='utf-8') as f:
                writer = csv.DictWriter(f, fieldnames=fieldnames)
                writer.writeheader()
                writer.writerows(posts_to_move)
            logger.info(f"✓ Moves file ({len(posts_to_move)} posts): {moves_file}")

        # Consolidate file
        if posts_to_consolidate:
            with open(consolidate_file, 'w', newline='', encoding='utf-8') as f:
                writer = csv.DictWriter(f, fieldnames=fieldnames)
                writer.writeheader()
                writer.writerows(posts_to_consolidate)
            logger.info(f"✓ Consolidate file ({len(posts_to_consolidate)} posts): {consolidate_file}")

        # Delete file
        if posts_to_delete:
            with open(delete_file, 'w', newline='', encoding='utf-8') as f:
                writer = csv.DictWriter(f, fieldnames=fieldnames)
                writer.writeheader()
                writer.writerows(posts_to_delete)
            logger.info(f"✓ Delete file ({len(posts_to_delete)} posts): {delete_file}")

        return (
            str(main_file),
            str(moves_file) if posts_to_move else None,
            str(consolidate_file) if posts_to_consolidate else None,
            str(delete_file) if posts_to_delete else None
        )

    def print_summary(self):
        """Print analysis summary."""
        logger.info("\n" + "="*70)
        logger.info("ANALYSIS SUMMARY")
        logger.info("="*70 + "\n")

        # Count decisions
        decisions = {}
        for post in self.analyzed_posts:
            decision = post.get('decision', 'Unknown')
            decisions[decision] = decisions.get(decision, 0) + 1

        logger.info("DECISIONS:")
        for decision, count in sorted(decisions.items(), key=lambda x: x[1], reverse=True):
            logger.info(f"  {decision}: {count} posts")

        # Count categories
        categories = {}
        for post in self.analyzed_posts:
            cat = post.get('recommended_category', 'Other')
            categories[cat] = categories.get(cat, 0) + 1

        logger.info("\nRECOMMENDED CATEGORIES:")
        for cat, count in sorted(categories.items(), key=lambda x: x[1], reverse=True):
            logger.info(f"  {cat}: {count} posts")

        # Count priorities
        priorities = {}
        for post in self.analyzed_posts:
            priority = post.get('priority', 'Unknown')
            priorities[priority] = priorities.get(priority, 0) + 1

        logger.info("\nPRIORITY BREAKDOWN:")
        for priority in ['High', 'Medium', 'Low']:
            count = priorities.get(priority, 0)
            logger.info(f"  {priority}: {count} posts")

        # By site
        logger.info("\nBY SITE:")
        by_site = {}
        for post in self.analyzed_posts:
            site = post.get('site', 'Unknown')
            if site not in by_site:
                by_site[site] = []
            by_site[site].append(post.get('decision', 'Unknown'))

        for site in sorted(by_site.keys()):
            logger.info(f"\n  {site}:")
            decisions_for_site = {}
            for decision in by_site[site]:
                decisions_for_site[decision] = decisions_for_site.get(decision, 0) + 1

            for decision, count in sorted(decisions_for_site.items()):
                logger.info(f"    {decision}: {count}")

    def run(self):
        """Run complete analysis."""
        logger.info("="*70)
        logger.info("AI-POWERED POST ANALYSIS AND RECOMMENDATIONS")
        logger.info("="*70)

        # Load CSV
        if not self.load_csv():
            sys.exit(1)

        # Analyze posts
        if not self.analyze_all_posts():
            logger.error("Failed to analyze posts")
            sys.exit(1)

        # Print summary
        self.print_summary()

        # Export results
        logger.info("\n" + "="*70)
        logger.info("EXPORTING RESULTS")
        logger.info("="*70)

        main_file, moves_file, consol_file, delete_file = self.export_with_recommendations()

        logger.info("\n" + "="*70)
        logger.info("NEXT STEPS")
        logger.info("="*70)
        logger.info("\n1. Review main file with all recommendations:")
        logger.info(f"   {main_file}")
        logger.info("\n2. Execute moves (automate with script):")
        if moves_file:
            logger.info(f"   {moves_file}")
        else:
            logger.info("   No posts to move")

        logger.info("\n3. Consolidate duplicates:")
        if consol_file:
            logger.info(f"   {consol_file}")
        else:
            logger.info("   No posts to consolidate")

        logger.info("\n4. Delete low-quality posts:")
        if delete_file:
            logger.info(f"   {delete_file}")
        else:
            logger.info("   No posts to delete")

        logger.info("\n✓ Analysis complete!")


def main():
    """Main entry point."""
    import argparse

    parser = argparse.ArgumentParser(
        description='Analyze exported posts CSV using Claude AI and provide recommendations'
    )
    parser.add_argument(
        'csv_file',
        help='Path to exported posts CSV file'
    )

    args = parser.parse_args()

    analyzer = PostAnalyzer(args.csv_file)
    analyzer.run()


if __name__ == '__main__':
    main()