Major refactoring to create a clean, integrated CLI application: ### New Features: - Unified CLI executable (./seo) with simple command structure - All commands accept optional CSV file arguments - Auto-detection of latest files when no arguments provided - Simplified output directory structure (output/ instead of output/reports/) - Cleaner export filename format (all_posts_YYYY-MM-DD.csv) ### Commands: - export: Export all posts from WordPress sites - analyze [csv]: Analyze posts with AI (optional CSV input) - recategorize [csv]: Recategorize posts with AI - seo_check: Check SEO quality - categories: Manage categories across sites - approve [files]: Review and approve recommendations - full_pipeline: Run complete workflow - analytics, gaps, opportunities, report, status ### Changes: - Moved all scripts to scripts/ directory - Created config.yaml for configuration - Updated all scripts to use output/ directory - Deprecated old seo-cli.py in favor of new ./seo - Added AGENTS.md and CHANGELOG.md documentation - Consolidated README.md with updated usage ### Technical: - Added PyYAML dependency - Removed hardcoded configuration values - All scripts now properly integrated - Better error handling and user feedback Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
454 lines
16 KiB
Python
Executable File
454 lines
16 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
AI-Powered Post Analysis and Recommendation Script
|
|
Analyzes exported posts CSV using Claude via OpenRouter and provides
|
|
clear, automation-friendly recommendations for:
|
|
- Which site to move posts to
|
|
- Categories to set
|
|
- Posts to consolidate
|
|
- Posts to delete
|
|
- Posts to optimize
|
|
"""
|
|
|
|
import csv
|
|
import json
|
|
import logging
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Tuple
|
|
import requests
|
|
from datetime import datetime
|
|
from config import Config
|
|
|
|
# Setup logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class PostAnalyzer:
|
|
"""Analyze posts CSV using Claude AI via OpenRouter."""
|
|
|
|
def __init__(self, csv_file: str):
|
|
"""Initialize analyzer with CSV file."""
|
|
self.csv_file = Path(csv_file)
|
|
self.openrouter_api_key = Config.OPENROUTER_API_KEY
|
|
self.posts = []
|
|
self.analyzed_posts = []
|
|
self.api_calls = 0
|
|
self.ai_cost = 0.0
|
|
|
|
def load_csv(self) -> bool:
|
|
"""Load posts from CSV file."""
|
|
logger.info(f"Loading CSV: {self.csv_file}")
|
|
|
|
if not self.csv_file.exists():
|
|
logger.error(f"CSV file not found: {self.csv_file}")
|
|
return False
|
|
|
|
try:
|
|
with open(self.csv_file, 'r', encoding='utf-8') as f:
|
|
reader = csv.DictReader(f)
|
|
self.posts = list(reader)
|
|
|
|
logger.info(f"✓ Loaded {len(self.posts)} posts from CSV")
|
|
|
|
# Group by site for stats
|
|
by_site = {}
|
|
for post in self.posts:
|
|
site = post.get('site', '')
|
|
if site not in by_site:
|
|
by_site[site] = 0
|
|
by_site[site] += 1
|
|
|
|
for site, count in by_site.items():
|
|
logger.info(f" {site}: {count} posts")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error loading CSV: {e}")
|
|
return False
|
|
|
|
def batch_posts_for_analysis(self, batch_size: int = 10) -> List[List[Dict]]:
|
|
"""Batch posts for AI analysis to manage token usage."""
|
|
batches = []
|
|
for i in range(0, len(self.posts), batch_size):
|
|
batches.append(self.posts[i:i + batch_size])
|
|
return batches
|
|
|
|
def format_batch_for_ai(self, batch: List[Dict]) -> str:
|
|
"""Format batch of posts for AI analysis."""
|
|
formatted = "POSTS TO ANALYZE:\n\n"
|
|
|
|
for i, post in enumerate(batch, 1):
|
|
formatted += f"{i}. POST ID: {post['post_id']}\n"
|
|
formatted += f" Site: {post['site']}\n"
|
|
formatted += f" Title: {post['title']}\n"
|
|
formatted += f" Status: {post['status']}\n"
|
|
formatted += f" Word Count: {post['word_count']}\n"
|
|
formatted += f" Content: {post['content_preview']}\n"
|
|
formatted += f" Current Categories: {post['categories']}\n"
|
|
formatted += f" Meta Description: {post['meta_description']}\n"
|
|
formatted += "\n"
|
|
|
|
return formatted
|
|
|
|
def get_ai_recommendations(self, batch: List[Dict]) -> Optional[str]:
|
|
"""Get AI recommendations for a batch of posts."""
|
|
if not self.openrouter_api_key:
|
|
logger.error("OPENROUTER_API_KEY not set")
|
|
return None
|
|
|
|
batch_text = self.format_batch_for_ai(batch)
|
|
|
|
prompt = f"""Analyze these blog posts and provide clear, actionable recommendations.
|
|
|
|
Website Strategy:
|
|
- mistergeek.net: High-value topics (VPN, Software, Gaming, General Tech, SEO, Content Marketing)
|
|
- webscroll.fr: Torrenting, File-Sharing, Tracker guides (niche audience)
|
|
- hellogeek.net: Low-traffic, experimental, off-brand, or niche content
|
|
|
|
{batch_text}
|
|
|
|
For EACH post, provide a JSON object with:
|
|
{{
|
|
"post_id": <id>,
|
|
"decision": "<ACTION>" where ACTION is ONE of:
|
|
- "Keep on mistergeek.net" (high-value, high-traffic)
|
|
- "Move to webscroll.fr" (torrenting/file-sharing content)
|
|
- "Move to hellogeek.net" (low-traffic or off-brand)
|
|
- "Delete" (spam, extremely low quality, zero traffic)
|
|
- "Consolidate with post_id:<id>" (similar content, duplicate)
|
|
"category": "<CATEGORY>" where category is ONE of:
|
|
- "VPN"
|
|
- "Software/Tools"
|
|
- "Gaming"
|
|
- "Streaming"
|
|
- "Torrenting"
|
|
- "File-Sharing"
|
|
- "SEO"
|
|
- "Content Marketing"
|
|
- "Other"
|
|
"reason": "<Brief reason for decision>",
|
|
"priority": "<High|Medium|Low>",
|
|
"notes": "<Any additional notes>"
|
|
}}
|
|
|
|
Return ONLY a JSON array. Example:
|
|
[
|
|
{{"post_id": 2845, "decision": "Keep on mistergeek.net", "category": "VPN", "reason": "High traffic, core topic", "priority": "High", "notes": "Already optimized"}},
|
|
{{"post_id": 1234, "decision": "Move to webscroll.fr", "category": "Torrenting", "reason": "Torrent tracker content", "priority": "Medium", "notes": "Good SEO potential on target site"}}
|
|
]
|
|
|
|
Analyze all posts and provide recommendations for EVERY post in the batch."""
|
|
|
|
try:
|
|
logger.info(f" Sending batch to Claude for analysis...")
|
|
|
|
response = requests.post(
|
|
"https://openrouter.ai/api/v1/chat/completions",
|
|
headers={
|
|
"Authorization": f"Bearer {self.openrouter_api_key}",
|
|
"Content-Type": "application/json",
|
|
},
|
|
json={
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"messages": [
|
|
{"role": "user", "content": prompt}
|
|
],
|
|
"temperature": 0.3, # Lower temp for more consistent recommendations
|
|
},
|
|
timeout=60
|
|
)
|
|
response.raise_for_status()
|
|
|
|
result = response.json()
|
|
self.api_calls += 1
|
|
|
|
# Track cost
|
|
usage = result.get('usage', {})
|
|
input_tokens = usage.get('prompt_tokens', 0)
|
|
output_tokens = usage.get('completion_tokens', 0)
|
|
self.ai_cost += (input_tokens * 3 + output_tokens * 15) / 1_000_000
|
|
|
|
recommendations_text = result['choices'][0]['message']['content'].strip()
|
|
logger.info(f" ✓ Got recommendations (tokens: {input_tokens}+{output_tokens})")
|
|
|
|
return recommendations_text
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting AI recommendations: {e}")
|
|
return None
|
|
|
|
def parse_recommendations(self, recommendations_json: str) -> List[Dict]:
|
|
"""Parse JSON recommendations from AI."""
|
|
try:
|
|
# Try to extract JSON from response
|
|
start_idx = recommendations_json.find('[')
|
|
end_idx = recommendations_json.rfind(']') + 1
|
|
|
|
if start_idx == -1 or end_idx == 0:
|
|
logger.error("Could not find JSON array in response")
|
|
return []
|
|
|
|
json_str = recommendations_json[start_idx:end_idx]
|
|
recommendations = json.loads(json_str)
|
|
|
|
return recommendations
|
|
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"Error parsing JSON recommendations: {e}")
|
|
logger.debug(f"Response was: {recommendations_json[:500]}")
|
|
return []
|
|
|
|
def analyze_all_posts(self) -> bool:
|
|
"""Analyze all posts in batches."""
|
|
logger.info("\n" + "="*70)
|
|
logger.info("ANALYZING POSTS WITH AI")
|
|
logger.info("="*70 + "\n")
|
|
|
|
batches = self.batch_posts_for_analysis(batch_size=10)
|
|
logger.info(f"Processing {len(self.posts)} posts in {len(batches)} batches of 10...\n")
|
|
|
|
all_recommendations = {}
|
|
|
|
for batch_num, batch in enumerate(batches, 1):
|
|
logger.info(f"Batch {batch_num}/{len(batches)}: Analyzing {len(batch)} posts...")
|
|
|
|
recommendations_json = self.get_ai_recommendations(batch)
|
|
|
|
if not recommendations_json:
|
|
logger.error(f" Failed to get recommendations for batch {batch_num}")
|
|
continue
|
|
|
|
recommendations = self.parse_recommendations(recommendations_json)
|
|
|
|
for rec in recommendations:
|
|
all_recommendations[str(rec.get('post_id', ''))] = rec
|
|
|
|
logger.info(f" ✓ Got {len(recommendations)} recommendations")
|
|
|
|
logger.info(f"\n✓ Analysis complete!")
|
|
logger.info(f" Total recommendations: {len(all_recommendations)}")
|
|
logger.info(f" API calls: {self.api_calls}")
|
|
logger.info(f" Estimated cost: ${self.ai_cost:.4f}")
|
|
|
|
# Map recommendations to posts
|
|
for post in self.posts:
|
|
post_id = str(post['post_id'])
|
|
if post_id in all_recommendations:
|
|
rec = all_recommendations[post_id]
|
|
post['decision'] = rec.get('decision', 'No decision')
|
|
post['recommended_category'] = rec.get('category', 'Other')
|
|
post['reason'] = rec.get('reason', '')
|
|
post['priority'] = rec.get('priority', 'Medium')
|
|
post['ai_notes'] = rec.get('notes', '')
|
|
else:
|
|
post['decision'] = 'Pending'
|
|
post['recommended_category'] = 'Other'
|
|
post['reason'] = 'No recommendation'
|
|
post['priority'] = 'Medium'
|
|
post['ai_notes'] = ''
|
|
|
|
self.analyzed_posts.append(post)
|
|
|
|
return len(self.analyzed_posts) > 0
|
|
|
|
def export_with_recommendations(self) -> Tuple[str, str, str, str]:
|
|
"""Export CSV with recommendations and create action-specific files."""
|
|
output_dir = Path(__file__).parent.parent / 'output'
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
|
|
# Main file with all recommendations
|
|
main_file = output_dir / f'posts_with_ai_recommendations_{timestamp}.csv'
|
|
|
|
# Action-specific files
|
|
moves_file = output_dir / f'posts_to_move_{timestamp}.csv'
|
|
consolidate_file = output_dir / f'posts_to_consolidate_{timestamp}.csv'
|
|
delete_file = output_dir / f'posts_to_delete_{timestamp}.csv'
|
|
|
|
# Export main file
|
|
fieldnames = list(self.analyzed_posts[0].keys()) + [
|
|
'decision',
|
|
'recommended_category',
|
|
'reason',
|
|
'priority',
|
|
'ai_notes'
|
|
]
|
|
|
|
logger.info(f"\nExporting recommendations to CSV...")
|
|
|
|
with open(main_file, 'w', newline='', encoding='utf-8') as f:
|
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
writer.writerows(self.analyzed_posts)
|
|
|
|
logger.info(f"✓ Main file: {main_file}")
|
|
|
|
# Export action-specific files
|
|
posts_to_move = [p for p in self.analyzed_posts if 'Move to' in p.get('decision', '')]
|
|
posts_to_consolidate = [p for p in self.analyzed_posts if 'Consolidate' in p.get('decision', '')]
|
|
posts_to_delete = [p for p in self.analyzed_posts if p.get('decision') == 'Delete']
|
|
|
|
# Moves file
|
|
if posts_to_move:
|
|
with open(moves_file, 'w', newline='', encoding='utf-8') as f:
|
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
writer.writerows(posts_to_move)
|
|
logger.info(f"✓ Moves file ({len(posts_to_move)} posts): {moves_file}")
|
|
|
|
# Consolidate file
|
|
if posts_to_consolidate:
|
|
with open(consolidate_file, 'w', newline='', encoding='utf-8') as f:
|
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
writer.writerows(posts_to_consolidate)
|
|
logger.info(f"✓ Consolidate file ({len(posts_to_consolidate)} posts): {consolidate_file}")
|
|
|
|
# Delete file
|
|
if posts_to_delete:
|
|
with open(delete_file, 'w', newline='', encoding='utf-8') as f:
|
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
writer.writerows(posts_to_delete)
|
|
logger.info(f"✓ Delete file ({len(posts_to_delete)} posts): {delete_file}")
|
|
|
|
return (
|
|
str(main_file),
|
|
str(moves_file) if posts_to_move else None,
|
|
str(consolidate_file) if posts_to_consolidate else None,
|
|
str(delete_file) if posts_to_delete else None
|
|
)
|
|
|
|
def print_summary(self):
|
|
"""Print analysis summary."""
|
|
logger.info("\n" + "="*70)
|
|
logger.info("ANALYSIS SUMMARY")
|
|
logger.info("="*70 + "\n")
|
|
|
|
# Count decisions
|
|
decisions = {}
|
|
for post in self.analyzed_posts:
|
|
decision = post.get('decision', 'Unknown')
|
|
decisions[decision] = decisions.get(decision, 0) + 1
|
|
|
|
logger.info("DECISIONS:")
|
|
for decision, count in sorted(decisions.items(), key=lambda x: x[1], reverse=True):
|
|
logger.info(f" {decision}: {count} posts")
|
|
|
|
# Count categories
|
|
categories = {}
|
|
for post in self.analyzed_posts:
|
|
cat = post.get('recommended_category', 'Other')
|
|
categories[cat] = categories.get(cat, 0) + 1
|
|
|
|
logger.info("\nRECOMMENDED CATEGORIES:")
|
|
for cat, count in sorted(categories.items(), key=lambda x: x[1], reverse=True):
|
|
logger.info(f" {cat}: {count} posts")
|
|
|
|
# Count priorities
|
|
priorities = {}
|
|
for post in self.analyzed_posts:
|
|
priority = post.get('priority', 'Unknown')
|
|
priorities[priority] = priorities.get(priority, 0) + 1
|
|
|
|
logger.info("\nPRIORITY BREAKDOWN:")
|
|
for priority in ['High', 'Medium', 'Low']:
|
|
count = priorities.get(priority, 0)
|
|
logger.info(f" {priority}: {count} posts")
|
|
|
|
# By site
|
|
logger.info("\nBY SITE:")
|
|
by_site = {}
|
|
for post in self.analyzed_posts:
|
|
site = post.get('site', 'Unknown')
|
|
if site not in by_site:
|
|
by_site[site] = []
|
|
by_site[site].append(post.get('decision', 'Unknown'))
|
|
|
|
for site in sorted(by_site.keys()):
|
|
logger.info(f"\n {site}:")
|
|
decisions_for_site = {}
|
|
for decision in by_site[site]:
|
|
decisions_for_site[decision] = decisions_for_site.get(decision, 0) + 1
|
|
|
|
for decision, count in sorted(decisions_for_site.items()):
|
|
logger.info(f" {decision}: {count}")
|
|
|
|
def run(self):
|
|
"""Run complete analysis."""
|
|
logger.info("="*70)
|
|
logger.info("AI-POWERED POST ANALYSIS AND RECOMMENDATIONS")
|
|
logger.info("="*70)
|
|
|
|
# Load CSV
|
|
if not self.load_csv():
|
|
sys.exit(1)
|
|
|
|
# Analyze posts
|
|
if not self.analyze_all_posts():
|
|
logger.error("Failed to analyze posts")
|
|
sys.exit(1)
|
|
|
|
# Print summary
|
|
self.print_summary()
|
|
|
|
# Export results
|
|
logger.info("\n" + "="*70)
|
|
logger.info("EXPORTING RESULTS")
|
|
logger.info("="*70)
|
|
|
|
main_file, moves_file, consol_file, delete_file = self.export_with_recommendations()
|
|
|
|
logger.info("\n" + "="*70)
|
|
logger.info("NEXT STEPS")
|
|
logger.info("="*70)
|
|
logger.info("\n1. Review main file with all recommendations:")
|
|
logger.info(f" {main_file}")
|
|
logger.info("\n2. Execute moves (automate with script):")
|
|
if moves_file:
|
|
logger.info(f" {moves_file}")
|
|
else:
|
|
logger.info(" No posts to move")
|
|
|
|
logger.info("\n3. Consolidate duplicates:")
|
|
if consol_file:
|
|
logger.info(f" {consol_file}")
|
|
else:
|
|
logger.info(" No posts to consolidate")
|
|
|
|
logger.info("\n4. Delete low-quality posts:")
|
|
if delete_file:
|
|
logger.info(f" {delete_file}")
|
|
else:
|
|
logger.info(" No posts to delete")
|
|
|
|
logger.info("\n✓ Analysis complete!")
|
|
|
|
|
|
def main():
|
|
"""Main entry point."""
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description='Analyze exported posts CSV using Claude AI and provide recommendations'
|
|
)
|
|
parser.add_argument(
|
|
'csv_file',
|
|
help='Path to exported posts CSV file'
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
analyzer = PostAnalyzer(args.csv_file)
|
|
analyzer.run()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|